diff --git a/CMakeLists.txt b/CMakeLists.txt
index 71a74260ab3b..c8c3038eba06 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -91,13 +91,8 @@ set(LLVM_ALL_TARGETS
 # List of targets with JIT support:
 set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM Mips)
 
-if( MSVC )
-  set(LLVM_TARGETS_TO_BUILD X86
-    CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
-else( MSVC )
-  set(LLVM_TARGETS_TO_BUILD "all"
+set(LLVM_TARGETS_TO_BUILD "all"
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
-endif( MSVC )
 
 set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ""
   CACHE STRING "Semicolon-separated list of experimental targets to build.")
@@ -193,6 +188,9 @@ if( LLVM_USE_OPROFILE )
   endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
 endif( LLVM_USE_OPROFILE )
 
+set(LLVM_USE_SANITIZER "" CACHE STRING
+  "Define the sanitizer used to build binaries and tests.")
+
 # Define an option controlling whether we should build for 32-bit on 64-bit
 # platforms, where supported.
 if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
@@ -387,6 +385,10 @@ if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include llvm/Support/Solaris.h")
 endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
 
+# Make sure we don't get -rdynamic in every binary. For those that need it,
+# use set_target_properties(target PROPERTIES ENABLE_EXPORTS 1)
+set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
+
 include(AddLLVM)
 include(TableGen)
 
@@ -412,8 +414,6 @@ add_subdirectory(utils/count)
 add_subdirectory(utils/not)
 add_subdirectory(utils/llvm-lit)
 add_subdirectory(utils/yaml-bench)
-add_subdirectory(utils/obj2yaml)
-add_subdirectory(utils/yaml2obj)
 
 add_subdirectory(projects)
 
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index f289ef6f697c..5f57840a03ea 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -23,7 +23,7 @@ D: Gold plugin (tools/gold/*)
 N: Chandler Carruth
 E: chandlerc@gmail.com
 E: chandlerc@google.com
-D: Config, ADT, Support, inlining & related passse, SROA/mem2reg & related passes, CMake, library layering
+D: Config, ADT, Support, inlining & related passes, SROA/mem2reg & related passes, CMake, library layering
 
 N: Evan Cheng
 E: evan.cheng@apple.com
@@ -125,3 +125,7 @@ D: R600 Backend
 N: Andrew Trick
 E: atrick@apple.com
 D: IndVar Simplify, Loop Strength Reduction, Instruction Scheduling
+
+N: Bill Wendling
+E: wendling@apple.com
+D: libLTO, IR Linker
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 344367923ca0..844ad39d03f1 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -251,6 +251,12 @@ D: The initial llvm-ar tool, converted regression testsuite to dejagnu
 D: Modulo scheduling in the SparcV9 backend
 D: Release manager (1.7+)
 
+N: Sylvestre Ledru
+E: sylvestre@debian.org
+W: http://sylvesre.ledru.info/
+D: Debian and Ubuntu packaging
+D: Continous integration with jenkins
+
 N: Andrew Lenharth
 E: alenhar2@cs.uiuc.edu
 W: http://www.lenharth.org/~andrewl/
@@ -423,7 +429,8 @@ D: Thread Local Storage implementation
 
 N: Bill Wendling
 E: wendling@apple.com
-D: Bug fixes
+D: Release manager
+D: Bunches of stuff
 
 N: Bob Wilson
 E: bob.wilson@acm.org
diff --git a/Makefile.rules b/Makefile.rules
index 2c834aac63fc..0a07be533713 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -1967,7 +1967,7 @@ endif
 # CHECK: Running the test suite
 ###############################################################################
 
-check::
+check:: all
 	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
 	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
 	    $(EchoCmd) Running test suite ; \
diff --git a/README.txt b/README.txt
index 193330f774aa..e957a4dcd02c 100644
--- a/README.txt
+++ b/README.txt
@@ -15,3 +15,4 @@ documentation setup.
 
 If you're writing a package for LLVM, see docs/Packaging.rst for our
 suggestions.
+
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 9d61c24e7ab4..0097db3d681d 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -1493,6 +1493,7 @@ AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h])
 AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h])
 AC_CHECK_HEADERS([valgrind/valgrind.h])
 AC_CHECK_HEADERS([fenv.h])
+AC_CHECK_DECLS([FE_ALL_EXCEPT, FE_INEXACT], [], [], [[#include <fenv.h>]])
 if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
   AC_CHECK_HEADERS(pthread.h,
                    AC_SUBST(HAVE_PTHREAD, 1),
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 7496622542e6..7cad190c11a0 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -80,6 +80,9 @@ check_include_file(utime.h HAVE_UTIME_H)
 check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H)
 check_include_file(windows.h HAVE_WINDOWS_H)
 check_include_file(fenv.h HAVE_FENV_H)
+check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT)
+check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT)
+
 check_include_file(mach/mach.h HAVE_MACH_MACH_H)
 check_include_file(mach-o/dyld.h HAVE_MACH_O_DYLD_H)
 
diff --git a/cmake/modules/GetSVN.cmake b/cmake/modules/GetSVN.cmake
new file mode 100644
index 000000000000..acccc12a94e8
--- /dev/null
+++ b/cmake/modules/GetSVN.cmake
@@ -0,0 +1,25 @@
+# CMake project that writes Subversion revision information to a header.
+#
+# Input variables:
+#   FIRST_SOURCE_DIR  - First source directory
+#   FIRST_REPOSITORY  - The macro to define to the first revision number.
+#   SECOND_SOURCE_DIR - Second source directory
+#   SECOND_REPOSITORY - The macro to define to the second revision number.
+#   HEADER_FILE       - The header file to write
+include(FindSubversion)
+if (Subversion_FOUND AND EXISTS "${FIRST_SOURCE_DIR}/.svn")
+  # Repository information for the first repository.
+  Subversion_WC_INFO(${FIRST_SOURCE_DIR} MY)
+  file(WRITE ${HEADER_FILE}.txt "#define ${FIRST_REPOSITORY} \"${MY_WC_REVISION}\"\n")
+
+  # Repository information for the second repository.
+  if (EXISTS "${SECOND_SOURCE_DIR}/.svn")
+    Subversion_WC_INFO(${SECOND_SOURCE_DIR} MY)
+    file(APPEND ${HEADER_FILE}.txt 
+      "#define ${SECOND_REPOSITORY} \"${MY_WC_REVISION}\"\n")
+  endif ()
+
+  # Copy the file only if it has changed.
+  execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
+    ${HEADER_FILE}.txt ${HEADER_FILE})
+endif()
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 54c660644704..f8a017dcaef2 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -21,6 +21,9 @@ if( LLVM_ENABLE_ASSERTIONS )
   # explicitly undefine it:
   if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
     add_definitions( -UNDEBUG )
+    # Also remove /D NDEBUG to avoid MSVC warnings about conflicting defines.
+    string (REGEX REPLACE "(^| )[/-]D *NDEBUG($| )" " "
+      CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
   endif()
 else()
   if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
@@ -60,9 +63,9 @@ else(WIN32)
 endif(WIN32)
 
 function(add_flag_or_print_warning flag)
-  check_c_compiler_flag(${flag} C_SUPPORTS_${flag})
-  check_cxx_compiler_flag(${flag} CXX_SUPPORTS_${flag})
-  if (C_SUPPORTS_${flag} AND CXX_SUPPORTS_${flag})
+  check_c_compiler_flag(${flag} C_SUPPORTS_FLAG)
+  check_cxx_compiler_flag(${flag} CXX_SUPPORTS_FLAG)
+  if (C_SUPPORTS_FLAG AND CXX_SUPPORTS_FLAG)
     message(STATUS "Building with ${flag}")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE)
@@ -71,12 +74,27 @@ function(add_flag_or_print_warning flag)
   endif()
 endfunction()
 
-function(append_if variable value condition)
-  if (${condition})
+function(append value)
+  foreach(variable ${ARGN})
     set(${variable} "${${variable}} ${value}" PARENT_SCOPE)
+  endforeach(variable)
+endfunction()
+
+function(append_if condition value)
+  if (${condition})
+    foreach(variable ${ARGN})
+      set(${variable} "${${variable}} ${value}" PARENT_SCOPE)
+    endforeach(variable)
   endif()
 endfunction()
 
+macro(add_flag_if_supported flag)
+  check_c_compiler_flag(${flag} C_SUPPORTS_FLAG)
+  append_if(C_SUPPORTS_FLAG "${flag}" CMAKE_C_FLAGS)
+  check_cxx_compiler_flag(${flag} CXX_SUPPORTS_FLAG)
+  append_if(CXX_SUPPORTS_FLAG "${flag}" CMAKE_CXX_FLAGS)
+endmacro()
+
 if( LLVM_ENABLE_PIC )
   if( XCODE )
     # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't
@@ -91,7 +109,7 @@ if( LLVM_ENABLE_PIC )
       # MinGW warns if -fvisibility-inlines-hidden is used.
     else()
       check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG)
-      append_if(CMAKE_CXX_FLAGS "-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG)
+      append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS)
     endif()
   endif()
 endif()
@@ -186,7 +204,7 @@ if( MSVC )
   endif (LLVM_ENABLE_WERROR)
 elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
   if (LLVM_ENABLE_WARNINGS)
-    add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings )
+    append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 
     # Turn off missing field initializer warnings for gcc to avoid noise from
     # false positives with empty {}. Turn them on otherwise (they're off by
@@ -194,27 +212,57 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
     check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
     if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
       if (CMAKE_COMPILER_IS_GNUCXX)
-        add_llvm_definitions( -Wno-missing-field-initializers )
+        append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
       else()
-        add_llvm_definitions( -Wmissing-field-initializers )
+        append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
       endif()
     endif()
 
-    if (LLVM_ENABLE_PEDANTIC)
-      add_llvm_definitions( -pedantic -Wno-long-long )
-    endif (LLVM_ENABLE_PEDANTIC)
+    append_if(LLVM_ENABLE_PEDANTIC "-pedantic -Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
     check_cxx_compiler_flag("-Werror -Wcovered-switch-default" CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
-    append_if(CMAKE_CXX_FLAGS "-Wcovered-switch-default" CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
+    append_if(CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG "-Wcovered-switch-default" CMAKE_CXX_FLAGS)
     check_c_compiler_flag("-Werror -Wcovered-switch-default" C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
-    append_if(CMAKE_C_FLAGS "-Wcovered-switch-default" C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
-    append_if(CMAKE_CXX_FLAGS "-Wno-uninitialized" USE_NO_UNINITIALIZED)
-    append_if(CMAKE_CXX_FLAGS "-Wno-maybe-uninitialized" USE_NO_MAYBE_UNINITIALIZED)
+    append_if(C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG "-Wcovered-switch-default" CMAKE_C_FLAGS)
+    append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS)
+    append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
+    check_cxx_compiler_flag("-Werror -Wnon-virtual-dtor" CXX_SUPPORTS_NON_VIRTUAL_DTOR_FLAG)
+    append_if(CXX_SUPPORTS_NON_VIRTUAL_DTOR_FLAG "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
   endif (LLVM_ENABLE_WARNINGS)
   if (LLVM_ENABLE_WERROR)
     add_llvm_definitions( -Werror )
   endif (LLVM_ENABLE_WERROR)
 endif( MSVC )
 
+macro(append_common_sanitizer_flags)
+  # Append -fno-omit-frame-pointer and turn on debug info to get better
+  # stack traces.
+  add_flag_if_supported("-fno-omit-frame-pointer")
+  if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND
+      NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO")
+    add_flag_if_supported("-gline-tables-only")
+  endif()
+endmacro()
+
+# Turn on sanitizers if necessary.
+if(LLVM_USE_SANITIZER)
+  if (LLVM_ON_UNIX)
+    if (LLVM_USE_SANITIZER STREQUAL "Address")
+      append_common_sanitizer_flags()
+      add_flag_or_print_warning("-fsanitize=address")
+    elseif (LLVM_USE_SANITIZER MATCHES "Memory(WithOrigins)?")
+      append_common_sanitizer_flags()
+      add_flag_or_print_warning("-fsanitize=memory")
+      if(LLVM_USE_SANITIZER STREQUAL "MemoryWithOrigins")
+        add_flag_or_print_warning("-fsanitize-memory-track-origins")
+      endif()
+    else()
+      message(WARNING "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}")
+    endif()
+  else()
+    message(WARNING "LLVM_USE_SANITIZER is not supported on this platform.")
+  endif()
+endif()
+
 add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
 add_llvm_definitions( -D__STDC_FORMAT_MACROS )
 add_llvm_definitions( -D__STDC_LIMIT_MACROS )
diff --git a/configure b/configure
index 8145025119d4..e70b13100a70 100755
--- a/configure
+++ b/configure
@@ -15944,6 +15944,178 @@ fi
 
 done
 
+{ echo "$as_me:$LINENO: checking whether FE_ALL_EXCEPT is declared" >&5
+echo $ECHO_N "checking whether FE_ALL_EXCEPT is declared... $ECHO_C" >&6; }
+if test "${ac_cv_have_decl_FE_ALL_EXCEPT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <fenv.h>
+
+int
+main ()
+{
+#ifndef FE_ALL_EXCEPT
+  char *p = (char *) FE_ALL_EXCEPT;
+  return !p;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_have_decl_FE_ALL_EXCEPT=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_have_decl_FE_ALL_EXCEPT=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_FE_ALL_EXCEPT" >&5
+echo "${ECHO_T}$ac_cv_have_decl_FE_ALL_EXCEPT" >&6; }
+if test $ac_cv_have_decl_FE_ALL_EXCEPT = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FE_ALL_EXCEPT 1
+_ACEOF
+
+
+else
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FE_ALL_EXCEPT 0
+_ACEOF
+
+
+fi
+{ echo "$as_me:$LINENO: checking whether FE_INEXACT is declared" >&5
+echo $ECHO_N "checking whether FE_INEXACT is declared... $ECHO_C" >&6; }
+if test "${ac_cv_have_decl_FE_INEXACT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <fenv.h>
+
+int
+main ()
+{
+#ifndef FE_INEXACT
+  char *p = (char *) FE_INEXACT;
+  return !p;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_have_decl_FE_INEXACT=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_have_decl_FE_INEXACT=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_FE_INEXACT" >&5
+echo "${ECHO_T}$ac_cv_have_decl_FE_INEXACT" >&6; }
+if test $ac_cv_have_decl_FE_INEXACT = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FE_INEXACT 1
+_ACEOF
+
+
+else
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FE_INEXACT 0
+_ACEOF
+
+
+fi
+
+
 if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 
 for ac_header in pthread.h
diff --git a/docs/CMake.rst b/docs/CMake.rst
index 6eab04b9706b..991e4ba6da97 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -168,8 +168,8 @@ LLVM-specific variables
 
 **LLVM_TARGETS_TO_BUILD**:STRING
   Semicolon-separated list of targets to build, or *all* for building all
-  targets. Case-sensitive. For Visual C++ defaults to *X86*. On the other cases
-  defaults to *all*. Example: ``-DLLVM_TARGETS_TO_BUILD="X86;PowerPC"``.
+  targets. Case-sensitive. Defaults to *all*. Example:
+  ``-DLLVM_TARGETS_TO_BUILD="X86;PowerPC"``.
 
 **LLVM_BUILD_TOOLS**:BOOL
   Build LLVM tools. Defaults to ON. Targets for building each tool are generated
@@ -204,7 +204,7 @@ LLVM-specific variables
   tests.
 
 **LLVM_APPEND_VC_REV**:BOOL
-  Append version control revision info (svn revision number or git revision id)
+  Append version control revision info (svn revision number or Git revision id)
   to LLVM version string (stored in the PACKAGE_VERSION macro). For this to work
   cmake must be invoked before the build. Defaults to OFF.
 
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index b5d41809744f..75415ab9ccda 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -1038,6 +1038,24 @@ for your target.  It has the following strengths:
   are used to manipulate the input immediate (in this case, take the high or low
   16-bits of the immediate).
 
+* When using the 'Pat' class to map a pattern to an instruction that has one
+  or more complex operands (like e.g. `X86 addressing mode`_), the pattern may
+  either specify the operand as a whole using a ``ComplexPattern``, or else it
+  may specify the components of the complex operand separately.  The latter is
+  done e.g. for pre-increment instructions by the PowerPC back end:
+
+  ::
+
+    def STWU  : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, memri:$dst),
+                    "stwu $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+
+    def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff),
+              (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc:$ptrreg)>;
+
+  Here, the pair of ``ptroff`` and ``ptrreg`` operands is matched onto the
+  complex operand ``dst`` of class ``memri`` in the ``STWU`` instruction.
+
 * While the system does automate a lot, it still allows you to write custom C++
   code to match special cases if there is something that is hard to
   express.
diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst
index ac8a944a2e76..b3b4bc389edd 100644
--- a/docs/CommandGuide/index.rst
+++ b/docs/CommandGuide/index.rst
@@ -50,3 +50,4 @@ Developer Tools
    tblgen
    lit
    llvm-build
+   llvm-readobj
diff --git a/docs/CommandGuide/lli.rst b/docs/CommandGuide/lli.rst
index 7cc128444dac..a9aaf310e1f3 100644
--- a/docs/CommandGuide/lli.rst
+++ b/docs/CommandGuide/lli.rst
@@ -50,7 +50,7 @@ GENERAL OPTIONS
 
 
 
-**-load**\ =\ *puginfilename*
+**-load**\ =\ *pluginfilename*
 
  Causes **lli** to load the plugin (shared object) named *pluginfilename* and use
  it for optimization.
diff --git a/docs/CommandGuide/llvm-link.rst b/docs/CommandGuide/llvm-link.rst
index e4f22288414b..3bcfa68c2599 100644
--- a/docs/CommandGuide/llvm-link.rst
+++ b/docs/CommandGuide/llvm-link.rst
@@ -1,5 +1,5 @@
-llvm-link - LLVM linker
-=======================
+llvm-link - LLVM bitcode linker
+===============================
 
 SYNOPSIS
 --------
@@ -13,23 +13,9 @@ DESCRIPTION
 into a single LLVM bitcode file.  It writes the output file to standard output,
 unless the :option:`-o` option is used to specify a filename.
 
-:program:`llvm-link` attempts to load the input files from the current
-directory.  If that fails, it looks for each file in each of the directories
-specified by the :option:`-L` options on the command line.  The library search
-paths are global; each one is searched for every input file if necessary.  The
-directories are searched in the order they were specified on the command line.
-
 OPTIONS
 -------
 
-.. option:: -L directory
-
- Add the specified ``directory`` to the library search path.  When looking for
- libraries, :program:`llvm-link` will look in path name for libraries.  This
- option can be specified multiple times; :program:`llvm-link` will search
- inside these directories in the order in which they were specified on the
- command line.
-
 .. option:: -f
 
  Enable binary output on terminals.  Normally, :program:`llvm-link` will refuse
@@ -48,8 +34,8 @@ OPTIONS
 
 .. option:: -d
 
- If specified, :program:`llvm-link` prints a human-readable version of the output
- bitcode file to standard error.
+ If specified, :program:`llvm-link` prints a human-readable version of the
+ output bitcode file to standard error.
 
 .. option:: -help
 
@@ -67,8 +53,4 @@ EXIT STATUS
 If :program:`llvm-link` succeeds, it will exit with 0.  Otherwise, if an error
 occurs, it will exit with a non-zero value.
 
-SEE ALSO
---------
-
-gccld
 
diff --git a/docs/CommandGuide/llvm-readobj.rst b/docs/CommandGuide/llvm-readobj.rst
new file mode 100644
index 000000000000..b1918b548f85
--- /dev/null
+++ b/docs/CommandGuide/llvm-readobj.rst
@@ -0,0 +1,86 @@
+llvm-readobj - LLVM Object Reader
+=================================
+
+SYNOPSIS
+--------
+
+:program:`llvm-readobj` [*options*] [*input...*]
+
+DESCRIPTION
+-----------
+
+The :program:`llvm-readobj` tool displays low-level format-specific information
+about one or more object files. The tool and its output is primarily designed
+for use in FileCheck-based tests.
+
+OPTIONS
+-------
+
+If ``input`` is "``-``" or omitted, :program:`llvm-readobj` reads from standard
+input. Otherwise, it will read from the specified ``filenames``.
+
+.. option:: -help
+
+ Print a summary of command line options.
+
+.. option:: -version
+
+ Display the version of this program
+
+.. option:: -file-headers, -h
+
+ Display file headers.
+
+.. option:: -sections, -s
+
+ Display all sections.
+
+.. option:: -section-data, -sd
+
+ When used with ``-sections``, display section data for each section shown.
+
+.. option:: -section-relocations, -sr
+
+ When used with ``-sections``, display relocations for each section shown.
+
+.. option:: -section-symbols, -st
+
+ When used with ``-sections``, display symbols for each section shown.
+
+.. option:: -relocations, -r
+
+ Display the relocation entries in the file.
+
+.. option:: -symbols, -t
+
+ Display the symbol table.
+
+.. option:: -dyn-symbols
+
+ Display the dynamic symbol table (only for ELF object files).
+
+.. option:: -unwind, -u
+
+ Display unwind information.
+
+.. option:: -expand-relocs
+
+ When used with ``-relocations``, display each relocation in an expanded
+ multi-line format.
+
+.. option:: -dynamic-table
+
+ Display the ELF .dynamic section table (only for ELF object files).
+
+.. option:: -needed-libs
+
+ Display the needed libraries (only for ELF object files).
+
+.. option:: -program-headers
+
+ Display the ELF program headers (only for ELF object files).
+
+EXIT STATUS
+-----------
+
+:program:`llvm-readobj` returns 0.
diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst
index 87add670afb2..e9a7bc876aeb 100644
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@@ -20,11 +20,15 @@ ARM
 
 * `ABI <http://www.arm.com/products/DevTools/ABI.html>`_
 
+* `ARM C Language Extensions <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053a/IHI0053A_acle.pdf>`_
+
 AArch64
 -------
 
 * `ARMv8 Instruction Set Overview <http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.genc010197a/index.html>`_
 
+* `ARM C Language Extensions <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053a/IHI0053A_acle.pdf>`_
+
 Itanium (ia64)
 --------------
 
@@ -107,6 +111,12 @@ OS X
 * `Mach-O Runtime Architecture <http://developer.apple.com/documentation/Darwin/RuntimeArchitecture-date.html>`_
 * `Notes on Mach-O ABI <http://www.unsanity.org/archives/000044.php>`_
 
+NVPTX
+=====
+
+* `CUDA Documentation <http://docs.nvidia.com/cuda/index.html>`_ includes the PTX
+  ISA and Driver API documentation
+
 Miscellaneous Resources
 =======================
 
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index 43bdc8598531..0655559cee10 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -260,7 +260,7 @@ quality patches.  If you would like commit access, please send an email to
    from, e.g. "J. Random Hacker <hacker@yoyodyne.com>".
 
 #. A "password hash" of the password you want to use, e.g. "``2ACR96qjUqsyM``".
-   Note that you don't ever tell us what your password is, you just give it to
+   Note that you don't ever tell us what your password is; you just give it to
    us in an encrypted form.  To get this, run "``htpasswd``" (a utility that
    comes with apache) in crypt mode (often enabled with "``-d``"), or find a web
    page that will do it for you.
@@ -269,17 +269,17 @@ Once you've been granted commit access, you should be able to check out an LLVM
 tree with an SVN URL of "https://username@llvm.org/..." instead of the normal
 anonymous URL of "http://llvm.org/...".  The first time you commit you'll have
 to type in your password.  Note that you may get a warning from SVN about an
-untrusted key, you can ignore this.  To verify that your commit access works,
+untrusted key; you can ignore this.  To verify that your commit access works,
 please do a test commit (e.g. change a comment or add a blank line).  Your first
 commit to a repository may require the autogenerated email to be approved by a
-mailing list.  This is normal, and will be done when the mailing list owner has
+mailing list.  This is normal and will be done when the mailing list owner has
 time.
 
 If you have recently been granted commit access, these policies apply:
 
 #. You are granted *commit-after-approval* to all parts of LLVM.  To get
    approval, submit a `patch`_ to `llvm-commits
-   <http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_. When approved
+   <http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_. When approved,
    you may commit it yourself.
 
 #. You are allowed to commit patches without approval which you think are
@@ -291,7 +291,7 @@ If you have recently been granted commit access, these policies apply:
 #. You are allowed to commit patches without approval to those portions of LLVM
    that you have contributed or maintain (i.e., have been assigned
    responsibility for), with the proviso that such commits must not break the
-   build.  This is a "trust but verify" policy and commits of this nature are
+   build.  This is a "trust but verify" policy, and commits of this nature are
    reviewed after they are committed.
 
 #. Multiple violations of these policies or a single egregious violation may
@@ -300,7 +300,7 @@ If you have recently been granted commit access, these policies apply:
 In any case, your changes are still subject to `code review`_ (either before or
 after they are committed, depending on the nature of the change).  You are
 encouraged to review other peoples' patches as well, but you aren't required
-to.
+to do so.
 
 .. _discuss the change/gather consensus:
 
diff --git a/docs/ExtendingLLVM.rst b/docs/ExtendingLLVM.rst
index 3d8e9ee79a46..3ae676a1b6e7 100644
--- a/docs/ExtendingLLVM.rst
+++ b/docs/ExtendingLLVM.rst
@@ -45,7 +45,7 @@ function and then be turned into an instruction if warranted.
    what the restrictions are.  Talk to other people about it so that you are
    sure it's a good idea.
 
-#. ``llvm/include/llvm/Intrinsics*.td``:
+#. ``llvm/include/llvm/IR/Intrinsics*.td``:
 
    Add an entry for your intrinsic.  Describe its memory access characteristics
    for optimization (this controls whether it will be DCE'd, CSE'd, etc). Note
diff --git a/docs/Extensions.rst b/docs/Extensions.rst
new file mode 100644
index 000000000000..062804a9fc74
--- /dev/null
+++ b/docs/Extensions.rst
@@ -0,0 +1,39 @@
+===============
+LLVM Extensions
+===============
+
+.. contents::
+   :local:
+   :depth: 1
+
+.. toctree::
+   :hidden:
+
+Introduction
+============
+
+This document describes extensions to tools and formats LLVM seeks compatibility
+with.
+
+Machine-specific Assembly Syntax
+================================
+
+X86/COFF-Dependent
+------------------
+
+The following additional relocation type is supported:
+
+**@IMGREL** (AT&T syntax only) generates an image-relative relocation that
+corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or
+``IMAGE_REL_AMD64_ADDR32NB`` (64-bit).
+
+.. code-block:: gas
+
+  .text
+  fun:
+    mov foo@IMGREL(%ebx, %ecx, 4), %eax
+
+  .section .pdata
+    .long fun@IMGREL
+    .long (fun@imgrel + 0x3F)
+    .long $unwind$fun@imgrel
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index 539c75e2d716..0bbbafc6e690 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -521,13 +521,13 @@ By placing it in the ``llvm/projects``, it will be automatically configured by
 the LLVM configure script as well as automatically updated when you run ``svn
 update``.
 
-GIT mirror
+Git Mirror
 ----------
 
-GIT mirrors are available for a number of LLVM subprojects. These mirrors sync
+Git mirrors are available for a number of LLVM subprojects. These mirrors sync
 automatically with each Subversion commit and contain all necessary git-svn
 marks (so, you can recreate git-svn metadata locally). Note that right now
-mirrors reflect only ``trunk`` for each project. You can do the read-only GIT
+mirrors reflect only ``trunk`` for each project. You can do the read-only Git
 clone of LLVM via:
 
 .. code-block:: console
@@ -538,10 +538,23 @@ If you want to check out clang too, run:
 
 .. code-block:: console
 
-  % git clone http://llvm.org/git/llvm.git
   % cd llvm/tools
   % git clone http://llvm.org/git/clang.git
 
+If you want to check out compiler-rt too, run:
+
+.. code-block:: console
+
+  % cd llvm/projects
+  % git clone http://llvm.org/git/compiler-rt.git
+
+If you want to check out the Test Suite Source Code (optional), run:
+
+.. code-block:: console
+
+  % cd llvm/projects
+  % git clone http://llvm.org/git/test-suite.git
+
 Since the upstream repository is in Subversion, you should use ``git
 pull --rebase`` instead of ``git pull`` to avoid generating a non-linear history
 in your clone.  To configure ``git pull`` to pass ``--rebase`` by default on the
@@ -626,8 +639,10 @@ To set up clone from which you can submit code using ``git-svn``, run:
   % git config svn-remote.svn.fetch :refs/remotes/origin/master
   % git svn rebase -l
 
+Likewise for compiler-rt and test-suite.
+
 To update this clone without generating git-svn tags that conflict with the
-upstream git repo, run:
+upstream Git repo, run:
 
 .. code-block:: console
 
@@ -638,12 +653,14 @@ upstream git repo, run:
      git checkout master &&
      git svn rebase -l)
 
+Likewise for compiler-rt and test-suite.
+
 This leaves your working directories on their master branches, so you'll need to
 ``checkout`` each working branch individually and ``rebase`` it on top of its
 parent branch.
 
 For those who wish to be able to update an llvm repo in a simpler fashion,
-consider placing the following git script in your path under the name
+consider placing the following Git script in your path under the name
 ``git-svnup``:
 
 .. code-block:: bash
@@ -991,7 +1008,7 @@ Optional Configuration Items
 ----------------------------
 
 If you're running on a Linux system that supports the `binfmt_misc
-<http://www.tat.physik.uni-tuebingen.de/~rguenth/linux/binfmt_misc.html>`_
+<http://en.wikipedia.org/wiki/binfmt_misc>`_
 module, and you have root access on the system, you can set your system up to
 execute LLVM bitcode files directly. To do this, use commands like this (the
 first command may not be required if you are already using the module):
diff --git a/docs/LLVMBuild.rst b/docs/LLVMBuild.rst
index d9215dd8eb52..040b04480ee6 100644
--- a/docs/LLVMBuild.rst
+++ b/docs/LLVMBuild.rst
@@ -123,8 +123,8 @@ the file format is below:
    boolean_property_name = 1 (or 0)
 
 LLVMBuild files are expected to define a strict set of sections and
-properties. An typical component description file for a library
-component would look typically look like the following example:
+properties. A typical component description file for a library
+component would look like the following example:
 
 .. code-block:: ini
 
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 03004f66df4f..905053fef0b7 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -720,11 +720,6 @@ Currently, only the following parameter attributes are defined:
     This indicates that the pointer parameter can be excised using the
     :ref:`trampoline intrinsics <int_trampoline>`. This is not a valid
     attribute for return values.
-``nobuiltin``
-    This indicates that the callee function at a call site is not
-    recognized as a built-in function. LLVM will retain the original call
-    and not replace it with equivalent code based on the semantics of the
-    built-in function.
 
 .. _gc:
 
@@ -764,10 +759,10 @@ inlined, has a stack alignment of 4, and which shouldn't use SSE instructions:
 .. code-block:: llvm
 
    ; Target-independent attributes:
-   #0 = attributes { alwaysinline alignstack=4 }
+   attributes #0 = { alwaysinline alignstack=4 }
 
    ; Target-dependent attributes:
-   #1 = attributes { "no-sse" }
+   attributes #1 = { "no-sse" }
 
    ; Function @f has attributes: alwaysinline, alignstack=4, and "no-sse".
    define void @f() #0 #1 { ... }
@@ -814,6 +809,12 @@ example:
 ``naked``
     This attribute disables prologue / epilogue emission for the
     function. This can have very system-specific consequences.
+``nobuiltin``
+    This indicates that the callee function at a call site is not
+    recognized as a built-in function. LLVM will retain the original call
+    and not replace it with equivalent code based on the semantics of the
+    built-in function. This is only valid at call sites, not on function
+    declarations or definitions.
 ``noduplicate``
     This attribute indicates that calls to the function cannot be
     duplicated. A call to a ``noduplicate`` function may be moved
@@ -4534,7 +4535,7 @@ The '``load``' instruction is used to read from memory.
 Arguments:
 """"""""""
 
-The argument to the '``load``' instruction specifies the memory address
+The argument to the ``load`` instruction specifies the memory address
 from which to load. The pointer must point to a :ref:`first
 class <t_firstclass>` type. If the ``load`` is marked as ``volatile``,
 then the optimizer is not allowed to modify the number or order of
@@ -4555,14 +4556,14 @@ any defined semantics for atomic loads.
 
 The optional constant ``align`` argument specifies the alignment of the
 operation (that is, the alignment of the memory address). A value of 0
-or an omitted ``align`` argument means that the operation has the abi
+or an omitted ``align`` argument means that the operation has the ABI
 alignment for the target. It is the responsibility of the code emitter
 to ensure that the alignment information is correct. Overestimating the
 alignment results in undefined behavior. Underestimating the alignment
 may produce less efficient code. An alignment of 1 is always safe.
 
 The optional ``!nontemporal`` metadata must reference a single
-metatadata name <index> corresponding to a metadata node with one
+metatadata name ``<index>`` corresponding to a metadata node with one
 ``i32`` entry of value 1. The existence of the ``!nontemporal``
 metatadata on the instruction tells the optimizer and code generator
 that this load is not expected to be reused in the cache. The code
@@ -4570,7 +4571,7 @@ generator may select special instructions to save cache bandwidth, such
 as the ``MOVNT`` instruction on x86.
 
 The optional ``!invariant.load`` metadata must reference a single
-metatadata name <index> corresponding to a metadata node with no
+metatadata name ``<index>`` corresponding to a metadata node with no
 entries. The existence of the ``!invariant.load`` metatadata on the
 instruction tells the optimizer and code generator that this load
 address points to memory which does not change value during program
@@ -4618,10 +4619,10 @@ The '``store``' instruction is used to write to memory.
 Arguments:
 """"""""""
 
-There are two arguments to the '``store``' instruction: a value to store
-and an address at which to store it. The type of the '``<pointer>``'
+There are two arguments to the ``store`` instruction: a value to store
+and an address at which to store it. The type of the ``<pointer>``
 operand must be a pointer to the :ref:`first class <t_firstclass>` type of
-the '``<value>``' operand. If the ``store`` is marked as ``volatile``,
+the ``<value>`` operand. If the ``store`` is marked as ``volatile``,
 then the optimizer is not allowed to modify the number or order of
 execution of this ``store`` with other :ref:`volatile
 operations <volatile>`.
@@ -4638,18 +4639,18 @@ has undefined behavior if the alignment is not set to a value which is
 at least the size in bytes of the pointee. ``!nontemporal`` does not
 have any defined semantics for atomic stores.
 
-The optional constant "align" argument specifies the alignment of the
+The optional constant ``align`` argument specifies the alignment of the
 operation (that is, the alignment of the memory address). A value of 0
-or an omitted "align" argument means that the operation has the abi
+or an omitted ``align`` argument means that the operation has the ABI
 alignment for the target. It is the responsibility of the code emitter
 to ensure that the alignment information is correct. Overestimating the
-alignment results in an undefined behavior. Underestimating the
+alignment results in undefined behavior. Underestimating the
 alignment may produce less efficient code. An alignment of 1 is always
 safe.
 
-The optional !nontemporal metadata must reference a single metatadata
-name <index> corresponding to a metadata node with one i32 entry of
-value 1. The existence of the !nontemporal metatadata on the instruction
+The optional ``!nontemporal`` metadata must reference a single metatadata
+name ``<index>`` corresponding to a metadata node with one ``i32`` entry of
+value 1. The existence of the ``!nontemporal`` metatadata on the instruction
 tells the optimizer and code generator that this load is not expected to
 be reused in the cache. The code generator may select special
 instructions to save cache bandwidth, such as the MOVNT instruction on
@@ -4658,8 +4659,8 @@ x86.
 Semantics:
 """"""""""
 
-The contents of memory are updated to contain '``<value>``' at the
-location specified by the '``<pointer>``' operand. If '``<value>``' is
+The contents of memory are updated to contain ``<value>`` at the
+location specified by the ``<pointer>`` operand. If ``<value>`` is
 of scalar type then the number of bytes written does not exceed the
 minimum number of bytes needed to hold all bits of the type. For
 example, storing an ``i24`` writes at most three bytes. When writing a
@@ -8342,6 +8343,46 @@ strings. This can be useful for special purpose optimizations that want
 to look for these annotations. These have no other defined use; they are
 ignored by code generation and optimization.
 
+'``llvm.ptr.annotation.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use '``llvm.ptr.annotation``' on a
+pointer to an integer of any width. *NOTE* you must specify an address space for
+the pointer. The identifier for the default address space is the integer
+'``0``'.
+
+::
+
+      declare i8*   @llvm.ptr.annotation.p<address space>i8(i8* <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i16*  @llvm.ptr.annotation.p<address space>i16(i16* <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i32*  @llvm.ptr.annotation.p<address space>i32(i32* <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i64*  @llvm.ptr.annotation.p<address space>i64(i64* <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i256* @llvm.ptr.annotation.p<address space>i256(i256* <val>, i8* <str>, i8* <str>, i32  <int>)
+
+Overview:
+"""""""""
+
+The '``llvm.ptr.annotation``' intrinsic.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to an integer value of arbitrary bitwidth
+(result of some expression), the second is a pointer to a global string, the
+third is a pointer to a global string which is the source file name, and the
+last argument is the line number. It returns the value of the first argument.
+
+Semantics:
+""""""""""
+
+This intrinsic allows annotation of a pointer to an integer with arbitrary
+strings. This can be useful for special purpose optimizations that want to look
+for these annotations. These have no other defined use; they are ignored by code
+generation and optimization.
+
 '``llvm.annotation.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/docs/NVPTXUsage.rst b/docs/NVPTXUsage.rst
new file mode 100644
index 000000000000..5451619686d9
--- /dev/null
+++ b/docs/NVPTXUsage.rst
@@ -0,0 +1,276 @@
+=============================
+User Guide for NVPTX Back-end
+=============================
+
+.. contents::
+   :local:
+   :depth: 3
+
+
+Introduction
+============
+
+To support GPU programming, the NVPTX back-end supports a subset of LLVM IR
+along with a defined set of conventions used to represent GPU programming
+concepts. This document provides an overview of the general usage of the back-
+end, including a description of the conventions used and the set of accepted
+LLVM IR.
+
+.. note:: 
+   
+   This document assumes a basic familiarity with CUDA and the PTX
+   assembly language. Information about the CUDA Driver API and the PTX assembly
+   language can be found in the `CUDA documentation
+   <http://docs.nvidia.com/cuda/index.html>`_.
+
+
+
+Conventions
+===========
+
+Marking Functions as Kernels
+----------------------------
+
+In PTX, there are two types of functions: *device functions*, which are only
+callable by device code, and *kernel functions*, which are callable by host
+code. By default, the back-end will emit device functions. Metadata is used to
+declare a function as a kernel function. This metadata is attached to the
+``nvvm.annotations`` named metadata object, and has the following format:
+
+.. code-block:: llvm
+
+   !0 = metadata !{<function-ref>, metadata !"kernel", i32 1}
+
+The first parameter is a reference to the kernel function. The following
+example shows a kernel function calling a device function in LLVM IR. The
+function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
+
+.. code-block:: llvm
+
+    define float @my_fmad(float %x, float %y, float %z) {
+      %mul = fmul float %x, %y
+      %add = fadd float %mul, %z
+      ret float %add
+    }
+
+    define void @my_kernel(float* %ptr) {
+      %val = load float* %ptr
+      %ret = call float @my_fmad(float %val, float %val, float %val)
+      store float %ret, float* %ptr
+      ret void
+    }
+
+    !nvvm.annotations = !{!1}
+    !1 = metadata !{void (float*)* @my_kernel, metadata !"kernel", i32 1}
+
+When compiled, the PTX kernel functions are callable by host-side code.
+
+
+Address Spaces
+--------------
+
+The NVPTX back-end uses the following address space mapping:
+
+   ============= ======================
+   Address Space Memory Space
+   ============= ======================
+   0             Generic
+   1             Global
+   2             Internal Use
+   3             Shared
+   4             Constant
+   5             Local
+   ============= ======================
+
+Every global variable and pointer type is assigned to one of these address
+spaces, with 0 being the default address space. Intrinsics are provided which
+can be used to convert pointers between the generic and non-generic address
+spaces.
+
+As an example, the following IR will define an array ``@g`` that resides in
+global device memory.
+
+.. code-block:: llvm
+
+    @g = internal addrspace(1) global [4 x i32] [ i32 0, i32 1, i32 2, i32 3 ]
+
+LLVM IR functions can read and write to this array, and host-side code can
+copy data to it by name with the CUDA Driver API.
+
+Note that since address space 0 is the generic space, it is illegal to have
+global variables in address space 0.  Address space 0 is the default address
+space in LLVM, so the ``addrspace(N)`` annotation is *required* for global
+variables.
+
+
+NVPTX Intrinsics
+================
+
+Address Space Conversion
+------------------------
+
+'``llvm.nvvm.ptr.*.to.gen``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+These are overloaded intrinsics.  You can use these on any pointer types.
+
+.. code-block:: llvm
+
+    declare i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)*)
+    declare i8* @llvm.nvvm.ptr.shared.to.gen.p0i8.p3i8(i8 addrspace(3)*)
+    declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+    declare i8* @llvm.nvvm.ptr.local.to.gen.p0i8.p5i8(i8 addrspace(5)*)
+
+Overview:
+"""""""""
+
+The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic
+address space to a generic address space pointer.
+
+Semantics:
+""""""""""
+
+These intrinsics modify the pointer value to be a valid generic address space
+pointer.
+
+
+'``llvm.nvvm.ptr.gen.to.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+These are overloaded intrinsics.  You can use these on any pointer types.
+
+.. code-block:: llvm
+
+    declare i8* @llvm.nvvm.ptr.gen.to.global.p1i8.p0i8(i8 addrspace(1)*)
+    declare i8* @llvm.nvvm.ptr.gen.to.shared.p3i8.p0i8(i8 addrspace(3)*)
+    declare i8* @llvm.nvvm.ptr.gen.to.constant.p4i8.p0i8(i8 addrspace(4)*)
+    declare i8* @llvm.nvvm.ptr.gen.to.local.p5i8.p0i8(i8 addrspace(5)*)
+
+Overview:
+"""""""""
+
+The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic
+address space to a pointer in the target address space.  Note that these
+intrinsics are only useful if the address space of the target address space of
+the pointer is known.  It is not legal to use address space conversion
+intrinsics to convert a pointer from one non-generic address space to another
+non-generic address space.
+
+Semantics:
+""""""""""
+
+These intrinsics modify the pointer value to be a valid pointer in the target
+non-generic address space.
+
+
+Reading PTX Special Registers
+-----------------------------
+
+'``llvm.nvvm.read.ptx.sreg.*``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+    declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+    declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+    declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+    declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+    declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+    declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+    declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+
+Overview:
+"""""""""
+
+The '``@llvm.nvvm.read.ptx.sreg.*``' intrinsics provide access to the PTX
+special registers, in particular the kernel launch bounds.  These registers
+map in the following way to CUDA builtins:
+
+   ============ =====================================
+   CUDA Builtin PTX Special Register Intrinsic
+   ============ =====================================
+   ``threadId`` ``@llvm.nvvm.read.ptx.sreg.tid.*``
+   ``blockIdx`` ``@llvm.nvvm.read.ptx.sreg.ctaid.*``
+   ``blockDim`` ``@llvm.nvvm.read.ptx.sreg.ntid.*``
+   ``gridDim``  ``@llvm.nvvm.read.ptx.sreg.nctaid.*``
+   ============ =====================================
+
+
+Barriers
+--------
+
+'``llvm.nvvm.barrier0``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+  declare void @llvm.nvvm.barrier0()
+
+Overview:
+"""""""""
+
+The '``@llvm.nvvm.barrier0()``' intrinsic emits a PTX ``bar.sync 0``
+instruction, equivalent to the ``__syncthreads()`` call in CUDA.
+
+
+Other Intrinsics
+----------------
+
+For the full set of NVPTX intrinsics, please see the
+``include/llvm/IR/IntrinsicsNVVM.td`` file in the LLVM source tree.
+
+
+Executing PTX
+=============
+
+The most common way to execute PTX assembly on a GPU device is to use the CUDA
+Driver API. This API is a low-level interface to the GPU driver and allows for
+JIT compilation of PTX code to native GPU machine code.
+
+Initializing the Driver API:
+
+.. code-block:: c++
+
+    CUdevice device;
+    CUcontext context;
+
+    // Initialize the driver API
+    cuInit(0);
+    // Get a handle to the first compute device
+    cuDeviceGet(&device, 0);
+    // Create a compute device context
+    cuCtxCreate(&context, 0, device);
+
+JIT compiling a PTX string to a device binary:
+
+.. code-block:: c++
+
+    CUmodule module;
+    CUfunction funcion;
+
+    // JIT compile a null-terminated PTX string
+    cuModuleLoadData(&module, (void*)PTXString);
+
+    // Get a handle to the "myfunction" kernel function
+    cuModuleGetFunction(&function, module, "myfunction");
+
+For full examples of executing PTX assembly, please see the `CUDA Samples
+<https://developer.nvidia.com/cuda-downloads>`_ distribution.
diff --git a/docs/Passes.rst b/docs/Passes.rst
index 9cb8ba0c34bc..d279eca3afb6 100644
--- a/docs/Passes.rst
+++ b/docs/Passes.rst
@@ -1018,8 +1018,8 @@ possible, it transforms the individual ``alloca`` instructions into nice clean
 scalar SSA form.
 
 This combines a simple scalar replacement of aggregates algorithm with the
-:ref:`mem2reg <passes-mem2reg>` algorithm because often interact, especially
-for C++ programs.  As such, iterating between ``scalarrepl``, then
+:ref:`mem2reg <passes-mem2reg>` algorithm because they often interact,
+especially for C++ programs.  As such, iterating between ``scalarrepl``, then
 :ref:`mem2reg <passes-mem2reg>` until we run out of things to promote works
 well.
 
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index 4fc459793301..7864165617a0 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -626,6 +626,33 @@ SmallVectors are most useful when on the stack.
 SmallVector also provides a nice portable and efficient replacement for
 ``alloca``.
 
+.. note::
+
+   Prefer to use ``SmallVectorImpl<T>`` as a parameter type.
+
+   In APIs that don't care about the "small size" (most?), prefer to use
+   the ``SmallVectorImpl<T>`` class, which is basically just the "vector
+   header" (and methods) without the elements allocated after it. Note that
+   ``SmallVector<T, N>`` inherits from ``SmallVectorImpl<T>`` so the
+   conversion is implicit and costs nothing. E.g.
+
+   .. code-block:: c++
+
+      // BAD: Clients cannot pass e.g. SmallVector<Foo, 4>.
+      hardcodedSmallSize(SmallVector<Foo, 2> &Out);
+      // GOOD: Clients can pass any SmallVector<Foo, N>.
+      allowsAnySmallSize(SmallVectorImpl<Foo> &Out);
+
+      void someFunc() {
+        SmallVector<Foo, 8> Vec;
+        hardcodedSmallSize(Vec); // Error.
+        allowsAnySmallSize(Vec); // Works.
+      }
+
+   Even though it has "``Impl``" in the name, this is so widely used that
+   it really isn't "private to the implementation" anymore. A name like
+   ``SmallVectorHeader`` would be more appropriate.
+
 .. _dss_vector:
 
 <vector>
@@ -989,7 +1016,9 @@ coupled with a good choice of :ref:`sequential container <ds_sequential>`.
 This combination provides the several nice properties: the result data is
 contiguous in memory (good for cache locality), has few allocations, is easy to
 address (iterators in the final vector are just indices or pointers), and can be
-efficiently queried with a standard binary or radix search.
+efficiently queried with a standard binary search (e.g.
+``std::lower_bound``; if you want the whole range of elements comparing
+equal, use ``std::equal_range``).
 
 .. _dss_smallset:
 
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 5ee67b943349..d8068831773f 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -64,6 +64,12 @@ Non-comprehensive list of changes in this release
   attributes, which are useful for passing information to code generation. See
   :doc:`HowToUseAttributes` for more details.
 
+* TableGen's syntax for instruction selection patterns has been simplified.
+  Instead of specifying types indirectly with register classes, you should now
+  specify types directly in the input patterns. See ``SparcInstrInfo.td`` for
+  examples of the new syntax. The old syntax using register classes still
+  works, but it will be removed in a future LLVM release.
+
 * ... next change ...
 
 .. NOTE
@@ -93,7 +99,9 @@ GNU-style thread local storage and inline assembly.
 Hexagon Target
 --------------
 
-- Removed support for hexagonv2 and hexagonv3 processor architectures.
+- Removed support for legacy hexagonv2 and hexagonv3 processor
+  architectures which are no longer in use. Currently supported
+  architectures are hexagonv4 and hexagonv5.
 
 Loop Vectorizer
 ---------------
@@ -111,6 +119,15 @@ has the following features:
 - Vectorization of function calls
 - Partial unrolling during vectorization
 
+The loop vectorizer is now enabled by default for -O3.
+
+SLP Vectorizer
+--------------
+
+LLVM now has a new SLP vectorizer. The new SLP vectorizer is not enabled by
+default but can be enabled using the clang flag -fslp-vectorize. The BB-vectorizer
+can also be enabled using the command line flag -fslp-vectorize-aggressive.
+
 R600 Backend
 ------------
 
diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst
index 78ce4e0e532e..857479508a5e 100644
--- a/docs/SourceLevelDebugging.rst
+++ b/docs/SourceLevelDebugging.rst
@@ -1811,11 +1811,11 @@ values, we can clarify the contents of the ``BUCKETS``, ``HASHES`` and
   |  HEADER.header_data_len | uint32_t
   |  HEADER_DATA            | HeaderData
   |-------------------------|
-  |  BUCKETS                | uint32_t[bucket_count] // 32 bit hash indexes
+  |  BUCKETS                | uint32_t[n_buckets] // 32 bit hash indexes
   |-------------------------|
-  |  HASHES                 | uint32_t[hashes_count] // 32 bit hash values
+  |  HASHES                 | uint32_t[n_hashes] // 32 bit hash values
   |-------------------------|
-  |  OFFSETS                | uint32_t[hashes_count] // 32 bit offsets to hash value data
+  |  OFFSETS                | uint32_t[n_hashes] // 32 bit offsets to hash value data
   |-------------------------|
   |  ALL HASH DATA          |
   `-------------------------'
@@ -2080,23 +2080,23 @@ array to be:
   HeaderData.atoms[0].form = DW_FORM_data4;
 
 This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is
-  encoded as a 32 bit value (DW_FORM_data4).  This allows a single name to have
-  multiple matching DIEs in a single file, which could come up with an inlined
-  function for instance.  Future tables could include more information about the
-  DIE such as flags indicating if the DIE is a function, method, block,
-  or inlined.
+encoded as a 32 bit value (DW_FORM_data4).  This allows a single name to have
+multiple matching DIEs in a single file, which could come up with an inlined
+function for instance.  Future tables could include more information about the
+DIE such as flags indicating if the DIE is a function, method, block,
+or inlined.
 
 The KeyType for the DWARF table is a 32 bit string table offset into the
-  ".debug_str" table.  The ".debug_str" is the string table for the DWARF which
-  may already contain copies of all of the strings.  This helps make sure, with
-  help from the compiler, that we reuse the strings between all of the DWARF
-  sections and keeps the hash table size down.  Another benefit to having the
-  compiler generate all strings as DW_FORM_strp in the debug info, is that
-  DWARF parsing can be made much faster.
+".debug_str" table.  The ".debug_str" is the string table for the DWARF which
+may already contain copies of all of the strings.  This helps make sure, with
+help from the compiler, that we reuse the strings between all of the DWARF
+sections and keeps the hash table size down.  Another benefit to having the
+compiler generate all strings as DW_FORM_strp in the debug info, is that
+DWARF parsing can be made much faster.
 
 After a lookup is made, we get an offset into the hash data.  The hash data
-  needs to be able to deal with 32 bit hash collisions, so the chunk of data
-  at the offset in the hash data consists of a triple:
+needs to be able to deal with 32 bit hash collisions, so the chunk of data
+at the offset in the hash data consists of a triple:
 
 .. code-block:: c
 
@@ -2105,7 +2105,7 @@ After a lookup is made, we get an offset into the hash data.  The hash data
   HashData[hash_data_count]
 
 If "str_offset" is zero, then the bucket contents are done. 99.9% of the
-  hash data chunks contain a single item (no 32 bit hash collision):
+hash data chunks contain a single item (no 32 bit hash collision):
 
 .. code-block:: none
 
diff --git a/docs/TableGen/LangRef.rst b/docs/TableGen/LangRef.rst
index c9e1efba03fb..bd28a9031d74 100644
--- a/docs/TableGen/LangRef.rst
+++ b/docs/TableGen/LangRef.rst
@@ -286,7 +286,7 @@ given values.
 .. productionlist::
    SimpleValue: "(" `DagArg` `DagArgList` ")"
    DagArgList: `DagArg` ("," `DagArg`)*
-   DagArg: `Value` [":" `TokVarName`]
+   DagArg: `Value` [":" `TokVarName`] | `TokVarName`
 
 The initial :token:`DagArg` is called the "operator" of the dag.
 
diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst
index 4d8c8ce30730..79cedee764f7 100644
--- a/docs/TestingGuide.rst
+++ b/docs/TestingGuide.rst
@@ -224,16 +224,7 @@ Below is an example of legal RUN lines in a ``.ll`` file:
     ; RUN: diff %t1 %t2
 
 As with a Unix shell, the RUN lines permit pipelines and I/O
-redirection to be used. However, the usage is slightly different than
-for Bash. In general, it's useful to read the code of other tests to figure out
-what you can use in yours. The major differences are:
-
--  You can't do ``2>&1``. That will cause :program:`lit` to write to a file
-   named ``&1``. Usually this is done to get stderr to go through a pipe. You
-   can do that with ``|&`` so replace this idiom:
-   ``... 2>&1 | FileCheck`` with ``... |& FileCheck``
--  You can only redirect to a file, not to another descriptor and not
-   from a here document.
+redirection to be used.
 
 There are some quoting rules that you must pay attention to when writing
 your RUN lines. In general nothing needs to be quoted. :program:`lit` won't
@@ -243,7 +234,7 @@ everything enclosed as one value.
 
 In general, you should strive to keep your RUN lines as simple as possible,
 using them only to run tools that generate textual output you can then examine.
-The recommended way to examine output to figure out if the test passes it using
+The recommended way to examine output to figure out if the test passes is using
 the :doc:`FileCheck tool <CommandGuide/FileCheck>`. *[The usage of grep in RUN
 lines is deprecated - please do not send or commit patches that use it.]*
 
diff --git a/docs/Vectorizers.rst b/docs/Vectorizers.rst
index e2d3667bc116..d565c2122c9a 100644
--- a/docs/Vectorizers.rst
+++ b/docs/Vectorizers.rst
@@ -6,10 +6,10 @@ Auto-Vectorization in LLVM
    :local:
 
 LLVM has two vectorizers: The :ref:`Loop Vectorizer <loop-vectorizer>`,
-which operates on Loops, and the :ref:`Basic Block Vectorizer
-<bb-vectorizer>`, which optimizes straight-line code. These vectorizers
+which operates on Loops, and the :ref:`SLP Vectorizer
+<slp-vectorizer>`, which optimizes straight-line code. These vectorizers
 focus on different optimization opportunities and use different techniques.
-The BB vectorizer merges multiple scalars that are found in the code into
+The SLP vectorizer merges multiple scalars that are found in the code into
 vectors while the Loop Vectorizer widens instructions in the original loop
 to operate on multiple consecutive loop iterations.
 
@@ -21,19 +21,13 @@ The Loop Vectorizer
 Usage
 -----
 
-LLVM's Loop Vectorizer is now available and will be useful for many people.
-It is not enabled by default, but can be enabled through clang using the
-command line flag:
+LLVM's Loop Vectorizer is now enabled by default for -O3.
+We plan to enable parts of the Loop Vectorizer on -O2 and -Os in future releases.
+The vectorizer can be disabled using the command line:
 
 .. code-block:: console
 
-   $ clang -fvectorize -O3 file.c
-
-If the ``-fvectorize`` flag is used then the loop vectorizer will be enabled
-when running with ``-O3``, ``-O2``. When ``-Os`` is used, the loop vectorizer
-will only vectorize loops that do not require a major increase in code size.
-
-We plan to enable the Loop Vectorizer by default as part of the LLVM 3.3 release.
+   $ clang ... -fno-vectorize  file.c
 
 Command line flags
 ^^^^^^^^^^^^^^^^^^
@@ -299,25 +293,15 @@ And Linpack-pc with the same configuration. Result is Mflops, higher is better.
 
 .. image:: linpack-pc.png
 
-.. _bb-vectorizer:
+.. _slp-vectorizer:
 
-The Basic Block Vectorizer
-==========================
-
-Usage
-------
-
-The Basic Block Vectorizer is not enabled by default, but it can be enabled
-through clang using the command line flag:
-
-.. code-block:: console
-
-   $ clang -fslp-vectorize file.c
+The SLP Vectorizer
+==================
 
 Details
 -------
 
-The goal of basic-block vectorization (a.k.a. superword-level parallelism) is
+The goal of SLP vectorization (a.k.a. superword-level parallelism) is
 to combine similar independent instructions within simple control-flow regions
 into vector instructions. Memory accesses, arithemetic operations, comparison
 operations and some math functions can all be vectorized using this technique
@@ -329,10 +313,50 @@ into vector operations.
 
 .. code-block:: c++
 
-  int foo(int a1, int a2, int b1, int b2) {
-    int r1 = a1*(a1 + b1)/b1 + 50*b1/a1;
-    int r2 = a2*(a2 + b2)/b2 + 50*b2/a2;
-    return r1 + r2;
+  void foo(int a1, int a2, int b1, int b2, int *A) {
+    A[0] = a1*(a1 + b1)/b1 + 50*b1/a1;
+    A[1] = a2*(a2 + b2)/b2 + 50*b2/a2;
   }
 
+The SLP-vectorizer has two phases, bottom-up, and top-down. The top-down vectorization
+phase is more aggressive, but takes more time to run.
+
+Usage
+------
+
+The SLP Vectorizer is not enabled by default, but it can be enabled
+through clang using the command line flag:
+
+.. code-block:: console
+
+   $ clang -fslp-vectorize file.c
+
+LLVM has a second basic block vectorization phase
+which is more compile-time intensive (The BB vectorizer). This optimization
+can be enabled through clang using the command line flag:
+
+.. code-block:: console
+
+   $ clang -fslp-vectorize-aggressive file.c
+
+
+The SLP vectorizer is in early development stages but can already vectorize
+and accelerate many programs in the LLVM test suite.
+
+=======================   ============
+Benchmark Name              Gain
+=======================   ============
+Misc/flops-7               -32.70%
+Misc/matmul_f64_4x4        -23.23%
+Olden/power                -21.45%
+Misc/flops-4               -14.90%
+ASC_Sequoia/AMGmk          -13.85%
+TSVC/LoopRerolling-flt     -11.76%
+Misc/flops-6               -9.70%
+Misc/flops-5               -8.54%
+Misc/flops                 -8.12%
+TSVC/NodeSplitting-dbl     -6.96%
+Misc-C++/sphereflake       -6.74%
+Ptrdist/yacr2              -6.31%
+=======================   ============
 
diff --git a/docs/WritingAnLLVMBackend.rst b/docs/WritingAnLLVMBackend.rst
index 6d6c2a107081..a03a5e42c22d 100644
--- a/docs/WritingAnLLVMBackend.rst
+++ b/docs/WritingAnLLVMBackend.rst
@@ -760,7 +760,7 @@ target description file (``IntRegs``).
 
   def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
                    "ld [$addr], $dst",
-                   [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+                   [(set i32:$dst, (load ADDRrr:$addr))]>;
 
 The fourth parameter is the input source, which uses the address operand
 ``MEMrr`` that is defined earlier in ``SparcInstrInfo.td``:
@@ -788,7 +788,7 @@ class is defined:
 
   def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
                    "ld [$addr], $dst",
-                   [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+                   [(set i32:$dst, (load ADDRri:$addr))]>;
 
 Writing these definitions for so many similar instructions can involve a lot of
 cut and paste.  In ``.td`` files, the ``multiclass`` directive enables the
@@ -803,11 +803,11 @@ pattern ``F3_12`` is defined to create 2 instruction classes each time
     def rr  : F3_1 <2, Op3Val,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    !strconcat(OpcStr, " $b, $c, $dst"),
-                   [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+                   [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
     def ri  : F3_2 <2, Op3Val,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    !strconcat(OpcStr, " $b, $c, $dst"),
-                   [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+                   [(set i32:$dst, (OpNode i32:$b, simm13:$c))]>;
   }
 
 So when the ``defm`` directive is used for the ``XOR`` and ``ADD``
@@ -856,7 +856,7 @@ format instruction having three operands.
   def XNORrr  : F3_1<2, 0b000111,
                      (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                      "xnor $b, $c, $dst",
-                     [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+                     [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
 
 The instruction templates in ``SparcInstrFormats.td`` show the base class for
 ``F3_1`` is ``InstSP``.
@@ -1124,7 +1124,7 @@ a pattern with the store DAG operator.
 .. code-block:: llvm
 
   def STrr  : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
-                   "st $src, [$addr]", [(store IntRegs:$src, ADDRrr:$addr)]>;
+                   "st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>;
 
 ``ADDRrr`` is a memory mode that is also defined in ``SparcInstrInfo.td``:
 
@@ -1185,7 +1185,7 @@ instruction.
       SDValue CPTmp0;
       SDValue CPTmp1;
 
-      // Pattern: (st:void IntRegs:i32:$src,
+      // Pattern: (st:void i32:i32:$src,
       //           ADDRrr:i32:$addr)<<P:Predicate_store>>
       // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src)
       // Pattern complexity = 13  cost = 1  size = 0
diff --git a/docs/index.rst b/docs/index.rst
index 8f22ef2a774c..6b182dac5607 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -22,7 +22,6 @@ Several introductory papers and presentations.
    :hidden:
 
    LangRef
-   GetElementPtr
 
 :doc:`LangRef`
   Defines the LLVM intermediate representation.
@@ -48,10 +47,6 @@ Several introductory papers and presentations.
 
   .. __: http://llvm.org/pubs/2002-12-LattnerMSThesis.html
 
-:doc:`GetElementPtr`
-  Answers to some very frequent questions about LLVM's most frequently
-  misunderstood instruction.
-
 `Publications mentioning LLVM <http://llvm.org/pubs>`_
    ..
 
@@ -72,7 +67,6 @@ representation.
    CMake
    HowToBuildOnARM
    CommandGuide/index
-   DeveloperPolicy
    GettingStarted
    GettingStartedVS
    FAQ
@@ -87,6 +81,7 @@ representation.
    ReleaseNotes
    Passes
    YamlIO
+   GetElementPtr
 
 :doc:`GettingStarted`
    Discusses how to get up and running quickly with the LLVM infrastructure.
@@ -108,9 +103,6 @@ representation.
    Tutorials about using LLVM. Includes a tutorial about making a custom
    language with LLVM.
 
-:doc:`DeveloperPolicy`
-   The LLVM project's policy towards developers and their contributions.
-
 :doc:`LLVM Command Guide <CommandGuide/index>`
    A reference manual for the LLVM command line utilities ("man" pages for LLVM
    tools).
@@ -149,25 +141,9 @@ representation.
 :doc:`YamlIO`
    A reference guide for using LLVM's YAML I/O library.
 
-IRC
-===
-
-Users and developers of the LLVM project (including subprojects such as Clang)
-can be found in #llvm on `irc.oftc.net <irc://irc.oftc.net/llvm>`_.
-
-This channel has several bots.
-
-* Buildbot reporters
-
-  * llvmbb - Bot for the main LLVM buildbot master.
-    http://lab.llvm.org:8011/console
-  * bb-chapuni - An individually run buildbot master. http://bb.pgr.jp/console
-  * smooshlab - Apple's internal buildbot master.
-
-* robot - Bugzilla linker. %bug <number>
-
-* clang-bot - A `geordi <http://www.eelis.net/geordi/>`_ instance running
-  near-trunk clang instead of gcc.
+:doc:`GetElementPtr`
+  Answers to some very frequent questions about LLVM's most frequently
+  misunderstood instruction.
 
 Programming Documentation
 =========================
@@ -184,6 +160,7 @@ For developers of applications which use LLVM as a library.
    ExtendingLLVM
    HowToSetUpLLVMStyleRTTI
    ProgrammersManual
+   Extensions
 
 :doc:`LLVM Language Reference Manual <LangRef>`
   Defines the LLVM intermediate representation and the assembly form of the
@@ -196,6 +173,9 @@ For developers of applications which use LLVM as a library.
   Introduction to the general layout of the LLVM sourcebase, important classes
   and APIs, and some tips & tricks.
 
+:doc:`Extensions`
+  LLVM-specific extensions to tools and formats LLVM seeks compatibility with.
+
 :doc:`CommandLine`
   Provides information on using the command line parsing library.
 
@@ -248,6 +228,7 @@ For API clients and LLVM developers.
    WritingAnLLVMPass
    TableGen/LangRef
    HowToUseAttributes
+   NVPTXUsage
 
 :doc:`WritingAnLLVMPass`
    Information on how to write LLVM transformations and analyses.
@@ -316,6 +297,10 @@ For API clients and LLVM developers.
 :doc:`HowToUseAttributes`
   Answers some questions about the new Attributes infrastructure.
 
+:doc:`NVPTXUsage`
+   This document describes using the NVPTX back-end to compile GPU kernels.
+
+
 Development Process Documentation
 =================================
 
@@ -324,12 +309,16 @@ Information about LLVM's development process.
 .. toctree::
    :hidden:
 
+   DeveloperPolicy
    MakefileGuide
    Projects
    LLVMBuild
    HowToReleaseLLVM
    Packaging
 
+:doc:`DeveloperPolicy`
+   The LLVM project's policy towards developers and their contributions.
+
 :doc:`Projects`
   How-to guide and templates for new projects that *use* the LLVM
   infrastructure.  The templates (directory organization, Makefiles, and test
@@ -349,46 +338,75 @@ Information about LLVM's development process.
 :doc:`Packaging`
    Advice on packaging LLVM into a distribution.
 
+Community
+=========
+
+LLVM has a thriving community of friendly and helpful developers.
+The two primary communication mechanisms in the LLVM community are mailing
+lists and IRC.
+
 Mailing Lists
-=============
+-------------
 
 If you can't find what you need in these docs, try consulting the mailing
 lists.
 
-`LLVM Announcements List`__
-  This is a low volume list that provides important announcements regarding
-  LLVM.  It gets email about once a month.
-
-  .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce
-
-`Developer's List`__
+`Developer's List (llvmdev)`__
   This list is for people who want to be included in technical discussions of
   LLVM. People post to this list when they have questions about writing code
   for or using the LLVM tools. It is relatively low volume.
 
   .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
 
-`Bugs & Patches Archive`__
-  This list gets emailed every time a bug is opened and closed, and when people
-  submit patches to be included in LLVM.  It is higher volume than the LLVMdev
-  list.
-
-  .. __: http://lists.cs.uiuc.edu/pipermail/llvmbugs/
-
-`Commits Archive`__
+`Commits Archive (llvm-commits)`__
   This list contains all commit messages that are made when LLVM developers
-  commit code changes to the repository. It is useful for those who want to
-  stay on the bleeding edge of LLVM development. This list is very high volume.
+  commit code changes to the repository. It also serves as a forum for
+  patch review (i.e. send patches here). It is useful for those who want to
+  stay on the bleeding edge of LLVM development. This list is very high
+  volume.
 
   .. __: http://lists.cs.uiuc.edu/pipermail/llvm-commits/
 
-`Test Results Archive`__
+`Bugs & Patches Archive (llvmbugs)`__
+  This list gets emailed every time a bug is opened and closed. It is
+  higher volume than the LLVMdev list.
+
+  .. __: http://lists.cs.uiuc.edu/pipermail/llvmbugs/
+
+`Test Results Archive (llvm-testresults)`__
   A message is automatically sent to this list by every active nightly tester
   when it completes.  As such, this list gets email several times each day,
   making it a high volume list.
 
   .. __: http://lists.cs.uiuc.edu/pipermail/llvm-testresults/
 
+`LLVM Announcements List (llvm-announce)`__
+  This is a low volume list that provides important announcements regarding
+  LLVM.  It gets email about once a month.
+
+  .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce
+
+IRC
+---
+
+Users and developers of the LLVM project (including subprojects such as Clang)
+can be found in #llvm on `irc.oftc.net <irc://irc.oftc.net/llvm>`_.
+
+This channel has several bots.
+
+* Buildbot reporters
+
+  * llvmbb - Bot for the main LLVM buildbot master.
+    http://lab.llvm.org:8011/console
+  * bb-chapuni - An individually run buildbot master. http://bb.pgr.jp/console
+  * smooshlab - Apple's internal buildbot master.
+
+* robot - Bugzilla linker. %bug <number>
+
+* clang-bot - A `geordi <http://www.eelis.net/geordi/>`_ instance running
+  near-trunk clang instead of gcc.
+
+
 Indices and tables
 ==================
 
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index e85fb9750503..0dbb13453362 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -352,6 +352,14 @@ typedef enum {
   LLVMLandingPadFilter    /**< A filter clause  */
 } LLVMLandingPadClauseTy;
 
+typedef enum {
+  LLVMNotThreadLocal = 0,
+  LLVMGeneralDynamicTLSModel,
+  LLVMLocalDynamicTLSModel,
+  LLVMInitialExecTLSModel,
+  LLVMLocalExecTLSModel
+} LLVMThreadLocalMode;
+
 /**
  * @}
  */
@@ -1606,6 +1614,10 @@ LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar);
 void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal);
 LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar);
 void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant);
+LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar);
+void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode);
+LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar);
+void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit);
 
 /**
  * @}
@@ -1691,7 +1703,14 @@ void LLVMSetGC(LLVMValueRef Fn, const char *Name);
  *
  * @see llvm::Function::addAttribute()
  */
-void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
+void LLVMAddFunctionAttr(LLVMValueRef Fn, unsigned PA, unsigned HighPA);
+
+/**
+ * Add a target-dependent attribute to a fuction
+ * @see llvm::AttrBuilder::addAttribute()
+ */
+void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
+                                        const char *V);
 
 /**
  * Obtain an attribute from a function.
@@ -2560,6 +2579,8 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(const char *InputData,
 LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(const char *InputData,
                                                               size_t InputDataLength,
                                                               const char *BufferName);
+const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf);
+size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf);
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
 
 /**
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
index 691abdfcb47a..a02161aaa4e4 100644
--- a/include/llvm-c/TargetMachine.h
+++ b/include/llvm-c/TargetMachine.h
@@ -114,6 +114,9 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T);
 LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
   char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage);
 
+/** Compile the LLVM IR stored in \p M and store the result in \p OutMemBuf. */
+LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T, LLVMModuleRef M,
+  LLVMCodeGenFileType codegen, char** ErrorMessage, LLVMMemoryBufferRef *OutMemBuf);
 
 
 
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
index 68a9bdd38854..c9102da60297 100644
--- a/include/llvm-c/Transforms/Vectorize.h
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -39,6 +39,9 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);
 /** See llvm::createLoopVectorizePass function. */
 void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM);
 
+/** See llvm::createSLPVectorizerPass function. */
+void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM);
+
 /**
  * @}
  */
diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index 42e123930d25..11be4c513e2c 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -161,11 +161,11 @@ class APSInt : public APInt {
   }
 
   APSInt& operator++() {
-    static_cast<APInt&>(*this)++;
+    ++(static_cast<APInt&>(*this));
     return *this;
   }
   APSInt& operator--() {
-    static_cast<APInt&>(*this)--;
+    --(static_cast<APInt&>(*this));
     return *this;
   }
   APSInt operator++(int) {
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h
index cda31a261df2..e434417da7c5 100644
--- a/include/llvm/ADT/Hashing.h
+++ b/include/llvm/ADT/Hashing.h
@@ -151,7 +151,7 @@ namespace detail {
 inline uint64_t fetch64(const char *p) {
   uint64_t result;
   memcpy(&result, p, sizeof(result));
-  if (sys::isBigEndianHost())
+  if (sys::IsBigEndianHost)
     return sys::SwapByteOrder(result);
   return result;
 }
@@ -159,7 +159,7 @@ inline uint64_t fetch64(const char *p) {
 inline uint32_t fetch32(const char *p) {
   uint32_t result;
   memcpy(&result, p, sizeof(result));
-  if (sys::isBigEndianHost())
+  if (sys::IsBigEndianHost)
     return sys::SwapByteOrder(result);
   return result;
 }
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index 9e5ab021a50c..b8b88619957e 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -226,13 +226,13 @@ namespace llvm {
 
   template<class T> struct simplify_type<IntrusiveRefCntPtr<T> > {
     typedef T* SimpleType;
-    static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) {
+    static SimpleType getSimplifiedValue(IntrusiveRefCntPtr<T>& Val) {
       return Val.getPtr();
     }
   };
 
   template<class T> struct simplify_type<const IntrusiveRefCntPtr<T> > {
-    typedef T* SimpleType;
+    typedef /*const*/ T* SimpleType;
     static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) {
       return Val.getPtr();
     }
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index 81d73ed8b997..194e53fac213 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -128,20 +128,6 @@ class Optional {
 #endif
 };
 
-template<typename T> struct simplify_type;
-
-template <typename T>
-struct simplify_type<const Optional<T> > {
-  typedef const T* SimpleType;
-  static SimpleType getSimplifiedValue(const Optional<T> &Val) {
-    return Val.getPointer();
-  }
-};
-
-template <typename T>
-struct simplify_type<Optional<T> >
-  : public simplify_type<const Optional<T> > {};
-
 template <typename T> struct isPodLike;
 template <typename T> struct isPodLike<Optional<T> > {
   // An Optional<T> is pod-like if T is.
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index cce2efb6ac99..0299a83c4411 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -29,7 +29,7 @@ struct DenseMapInfo;
 /// on the number of bits available according to PointerLikeTypeTraits for the
 /// type.
 ///
-/// Note that PointerIntPair always puts the Int part in the highest bits
+/// Note that PointerIntPair always puts the IntVal part in the highest bits
 /// possible.  For example, PointerIntPair<void*, 1, bool> will put the bit for
 /// the bool into bit #2, not bit #0, which allows the low two bits to be used
 /// for something else.  For example, this allows:
@@ -57,13 +57,13 @@ class PointerIntPair {
   };
 public:
   PointerIntPair() : Value(0) {}
-  PointerIntPair(PointerTy Ptr, IntType Int) {
+  PointerIntPair(PointerTy PtrVal, IntType IntVal) {
     assert(IntBits <= PtrTraits::NumLowBitsAvailable &&
            "PointerIntPair formed with integer size too large for pointer");
-    setPointerAndInt(Ptr, Int);
+    setPointerAndInt(PtrVal, IntVal);
   }
-  explicit PointerIntPair(PointerTy Ptr) {
-    initWithPointer(Ptr);
+  explicit PointerIntPair(PointerTy PtrVal) {
+    initWithPointer(PtrVal);
   }
 
   PointerTy getPointer() const {
@@ -75,41 +75,41 @@ class PointerIntPair {
     return (IntType)((Value >> IntShift) & IntMask);
   }
 
-  void setPointer(PointerTy Ptr) {
-    intptr_t PtrVal
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
-    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+  void setPointer(PointerTy PtrVal) {
+    intptr_t PtrWord
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
+    assert((PtrWord & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
            "Pointer is not sufficiently aligned");
     // Preserve all low bits, just update the pointer.
-    Value = PtrVal | (Value & ~PointerBitMask);
+    Value = PtrWord | (Value & ~PointerBitMask);
   }
 
-  void setInt(IntType Int) {
-    intptr_t IntVal = Int;
-    assert(IntVal < (1 << IntBits) && "Integer too large for field");
+  void setInt(IntType IntVal) {
+    intptr_t IntWord = static_cast<intptr_t>(IntVal);
+    assert(IntWord < (1 << IntBits) && "Integer too large for field");
     
     // Preserve all bits other than the ones we are updating.
     Value &= ~ShiftedIntMask;     // Remove integer field.
-    Value |= IntVal << IntShift;  // Set new integer.
+    Value |= IntWord << IntShift;  // Set new integer.
   }
 
-  void initWithPointer(PointerTy Ptr) {
-    intptr_t PtrVal
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
-    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+  void initWithPointer(PointerTy PtrVal) {
+    intptr_t PtrWord
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
+    assert((PtrWord & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
            "Pointer is not sufficiently aligned");
-    Value = PtrVal;
+    Value = PtrWord;
   }
 
-  void setPointerAndInt(PointerTy Ptr, IntType Int) {
-    intptr_t PtrVal
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
-    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+  void setPointerAndInt(PointerTy PtrVal, IntType IntVal) {
+    intptr_t PtrWord
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
+    assert((PtrWord & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
            "Pointer is not sufficiently aligned");
-    intptr_t IntVal = Int;
-    assert(IntVal < (1 << IntBits) && "Integer too large for field");
+    intptr_t IntWord = static_cast<intptr_t>(IntVal);
+    assert(IntWord < (1 << IntBits) && "Integer too large for field");
 
-    Value = PtrVal | (IntVal << IntShift);
+    Value = PtrWord | (IntWord << IntShift);
   }
 
   PointerTy const *getAddrOfPointer() const {
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index 7f6350e4443e..59fa3f39c91e 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -260,7 +260,7 @@ class ReversePostOrderTraversal {
   typedef typename GT::NodeType NodeType;
   std::vector<NodeType*> Blocks;       // Block list in normal PO order
   inline void Initialize(NodeType *BB) {
-    copy(po_begin(BB), po_end(BB), back_inserter(Blocks));
+    std::copy(po_begin(BB), po_end(BB), std::back_inserter(Blocks));
   }
 public:
   typedef typename std::vector<NodeType*>::reverse_iterator rpo_iterator;
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 3bb883088c59..8c7304197f34 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -54,8 +54,6 @@ class SmallPtrSetImpl {
   /// then the set is in 'small mode'.
   const void **CurArray;
   /// CurArraySize - The allocated size of CurArray, always a power of two.
-  /// Note that CurArray points to an array that has CurArraySize+1 elements in
-  /// it, so that the end iterator actually points to valid memory.
   unsigned CurArraySize;
 
   // If small, this is # elts allocated consecutively
@@ -68,9 +66,6 @@ class SmallPtrSetImpl {
     SmallArray(SmallStorage), CurArray(SmallStorage), CurArraySize(SmallSize) {
     assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 &&
            "Initial size must be a power of two!");
-    // The end pointer, always valid, is set to a valid element to help the
-    // iterator.
-    CurArray[SmallSize] = 0;
     clear();
   }
   ~SmallPtrSetImpl();
@@ -147,9 +142,11 @@ class SmallPtrSetImpl {
 class SmallPtrSetIteratorImpl {
 protected:
   const void *const *Bucket;
+  const void *const *End;
 public:
-  explicit SmallPtrSetIteratorImpl(const void *const *BP) : Bucket(BP) {
-    AdvanceIfNotValid();
+  explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E)
+    : Bucket(BP), End(E) {
+      AdvanceIfNotValid();
   }
 
   bool operator==(const SmallPtrSetIteratorImpl &RHS) const {
@@ -164,8 +161,10 @@ class SmallPtrSetIteratorImpl {
   /// that is.   This is guaranteed to stop because the end() bucket is marked
   /// valid.
   void AdvanceIfNotValid() {
-    while (*Bucket == SmallPtrSetImpl::getEmptyMarker() ||
-           *Bucket == SmallPtrSetImpl::getTombstoneMarker())
+    assert(Bucket <= End);
+    while (Bucket != End &&
+           (*Bucket == SmallPtrSetImpl::getEmptyMarker() ||
+            *Bucket == SmallPtrSetImpl::getTombstoneMarker()))
       ++Bucket;
   }
 };
@@ -182,12 +181,13 @@ class SmallPtrSetIterator : public SmallPtrSetIteratorImpl {
   typedef std::ptrdiff_t            difference_type;
   typedef std::forward_iterator_tag iterator_category;
   
-  explicit SmallPtrSetIterator(const void *const *BP)
-    : SmallPtrSetIteratorImpl(BP) {}
+  explicit SmallPtrSetIterator(const void *const *BP, const void *const *E)
+    : SmallPtrSetIteratorImpl(BP, E) {}
 
   // Most methods provided by baseclass.
 
   const PtrTy operator*() const {
+    assert(Bucket < End);
     return PtrTraits::getFromVoidPointer(const_cast<void*>(*Bucket));
   }
 
@@ -236,9 +236,8 @@ template<class PtrType, unsigned SmallSize>
 class SmallPtrSet : public SmallPtrSetImpl {
   // Make sure that SmallSize is a power of two, round up if not.
   enum { SmallSizePowTwo = RoundUpToPowerOfTwo<SmallSize>::Val };
-  /// SmallStorage - Fixed size storage used in 'small mode'.  The extra element
-  /// ensures that the end iterator actually points to valid memory.
-  const void *SmallStorage[SmallSizePowTwo+1];
+  /// SmallStorage - Fixed size storage used in 'small mode'.
+  const void *SmallStorage[SmallSizePowTwo];
   typedef PointerLikeTypeTraits<PtrType> PtrTraits;
 public:
   SmallPtrSet() : SmallPtrSetImpl(SmallStorage, SmallSizePowTwo) {}
@@ -275,10 +274,10 @@ class SmallPtrSet : public SmallPtrSetImpl {
   typedef SmallPtrSetIterator<PtrType> iterator;
   typedef SmallPtrSetIterator<PtrType> const_iterator;
   inline iterator begin() const {
-    return iterator(CurArray);
+    return iterator(CurArray, CurArray+CurArraySize);
   }
   inline iterator end() const {
-    return iterator(CurArray+CurArraySize);
+    return iterator(CurArray+CurArraySize, CurArray+CurArraySize);
   }
 
   // Allow assignment from any smallptrset with the same element type even if it
diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h
index 4eb7de895a68..5dfe924f6d78 100644
--- a/include/llvm/ADT/SmallSet.h
+++ b/include/llvm/ADT/SmallSet.h
@@ -55,6 +55,7 @@ class SmallSet {
   }
 
   /// insert - Insert an element into the set if it isn't already there.
+  /// Returns true if the element is inserted (it was not in the set before).
   bool insert(const T &V) {
     if (!isSmall())
       return Set.insert(V).second;
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 9167f872185d..7ba0a714bfc7 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -16,6 +16,7 @@
 
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/type_traits.h"
 #include <algorithm>
 #include <cassert>
@@ -267,7 +268,8 @@ template <typename T, bool isPodLike>
 void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
   size_t CurCapacity = this->capacity();
   size_t CurSize = this->size();
-  size_t NewCapacity = 2*CurCapacity + 1; // Always grow, even from zero.
+  // Always grow, even from zero.  
+  size_t NewCapacity = size_t(NextPowerOf2(CurCapacity+2));
   if (NewCapacity < MinSize)
     NewCapacity = MinSize;
   T *NewElts = static_cast<T*>(malloc(NewCapacity*sizeof(T)));
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index cc290d51d272..e16c6b491386 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -236,7 +236,7 @@ namespace llvm {
     /// getLHSKind - Get the NodeKind of the left-hand side.
     NodeKind getLHSKind() const { return (NodeKind) LHSKind; }
 
-    /// getRHSKind - Get the NodeKind of the left-hand side.
+    /// getRHSKind - Get the NodeKind of the right-hand side.
     NodeKind getRHSKind() const { return (NodeKind) RHSKind; }
 
     /// printOneChild - Print one child from a twine.
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index aeb78484c61b..71dab2ef551c 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -274,12 +274,12 @@ template<typename From> struct simplify_type;
 template<typename NodeTy> struct simplify_type<ilist_iterator<NodeTy> > {
   typedef NodeTy* SimpleType;
 
-  static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) {
+  static SimpleType getSimplifiedValue(ilist_iterator<NodeTy> &Node) {
     return &*Node;
   }
 };
 template<typename NodeTy> struct simplify_type<const ilist_iterator<NodeTy> > {
-  typedef NodeTy* SimpleType;
+  typedef /*const*/ NodeTy* SimpleType;
 
   static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) {
     return &*Node;
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 163331c262c1..da007072e559 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -20,7 +20,6 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ValueHandle.h"
 #include <vector>
 
diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h
index f220c582449f..b3e2d18eb2c6 100644
--- a/include/llvm/Analysis/BlockFrequencyImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyImpl.h
@@ -271,7 +271,7 @@ class BlockFrequencyImpl {
 
     BlockT *EntryBlock = fn->begin();
 
-    copy(po_begin(EntryBlock), po_end(EntryBlock), back_inserter(POT));
+    std::copy(po_begin(EntryBlock), po_end(EntryBlock), std::back_inserter(POT));
 
     unsigned RPOidx = 0;
     for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) {
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 63262eb9a364..488338302ada 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -146,14 +146,6 @@ static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) {
 bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
                    const TargetLibraryInfo *TLI, bool RoundToAlign = false);
 
-/// \brief Compute the size of the underlying object pointed by Ptr. Returns
-/// true and the object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-bool getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
-                             const DataLayout *TD, const TargetLibraryInfo *TLI,
-                             bool RoundToAlign = false);
-
 
 
 typedef std::pair<APInt, APInt> SizeOffsetType;
@@ -163,14 +155,12 @@ typedef std::pair<APInt, APInt> SizeOffsetType;
 class ObjectSizeOffsetVisitor
   : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> {
 
-  typedef DenseMap<const Value*, SizeOffsetType> CacheMapTy;
-
   const DataLayout *TD;
   const TargetLibraryInfo *TLI;
   bool RoundToAlign;
   unsigned IntTyBits;
   APInt Zero;
-  CacheMapTy CacheMap;
+  SmallPtrSet<Instruction *, 8> SeenInsts;
 
   APInt align(APInt Size, uint64_t Align);
 
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 69cc29381136..e87319516cd1 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -266,6 +266,24 @@ class Region : public RegionNode {
   /// @param BB  The new exit basic block of the region.
   void replaceExit(BasicBlock *BB);
 
+  /// @brief Recursively replace the entry basic block of the region.
+  ///
+  /// This function replaces the entry basic block with a new basic block. It
+  /// also updates all child regions that have the same entry basic block as
+  /// this region.
+  ///
+  /// @param NewEntry The new entry basic block.
+  void replaceEntryRecursive(BasicBlock *NewEntry);
+
+  /// @brief Recursively replace the exit basic block of the region.
+  ///
+  /// This function replaces the exit basic block with a new basic block. It
+  /// also updates all child regions that have the same exit basic block as
+  /// this region.
+  ///
+  /// @param NewExit The new exit basic block.
+  void replaceExitRecursive(BasicBlock *NewExit);
+
   /// @brief Get the exit BasicBlock of the Region.
   /// @return The exit BasicBlock of the Region, NULL if this is the TopLevel
   ///         Region.
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 42fd3927c852..306549fba46c 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -338,6 +338,10 @@ namespace llvm {
       /// getMax - Get the max backedge taken count for the loop.
       const SCEV *getMax(ScalarEvolution *SE) const;
 
+      /// Return true if any backedge taken count expressions refer to the given
+      /// subexpression.
+      bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
+
       /// clear - Invalidate this result and free associated memory.
       void clear();
     };
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index ccab19495f6e..a9d6725d86b0 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -263,6 +263,13 @@ class TargetTransformInfo {
     SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
   };
 
+  /// \brief Additonal information about an operand's possible values.
+  enum OperandValueKind {
+    OK_AnyValue,            // Operand can have any value.
+    OK_UniformValue,        // Operand is uniform (splat of a value).
+    OK_UniformConstantValue // Operand is uniform constant.
+  };
+
   /// \return The number of scalar or vector registers that the target has.
   /// If 'Vectors' is true, it returns the number of vector registers. If it is
   /// set to false, it returns the number of scalar registers.
@@ -277,7 +284,9 @@ class TargetTransformInfo {
   virtual unsigned getMaximumUnrollFactor() const;
 
   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                  OperandValueKind Opd1Info = OK_AnyValue,
+                                  OperandValueKind Opd2Info = OK_AnyValue) const;
 
   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
   /// The index and subtype parameters are used by the subvector insertion and
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 2d2976cde13c..f3139739cd18 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -126,7 +126,7 @@ class BitstreamReader {
   }
 };
 
-  
+
 /// BitstreamEntry - When advancing through a bitstream cursor, each advance can
 /// discover a few different kinds of entries:
 ///   Error    - Malformed bitcode was found.
@@ -142,7 +142,7 @@ struct BitstreamEntry {
     SubBlock,
     Record
   } Kind;
-  
+
   unsigned ID;
 
   static BitstreamEntry getError() {
@@ -170,7 +170,7 @@ class BitstreamCursor {
   BitstreamReader *BitStream;
   size_t NextChar;
 
-  
+
   /// CurWord/word_t - This is the current data we have pulled from the stream
   /// but have not returned to the client.  This is specifically and
   /// intentionally defined to follow the word size of the host machine for
@@ -199,7 +199,7 @@ class BitstreamCursor {
   /// BlockScope - This tracks the codesize of parent blocks.
   SmallVector<Block, 8> BlockScope;
 
-  
+
 public:
   BitstreamCursor() : BitStream(0), NextChar(0) {
   }
@@ -231,7 +231,7 @@ class BitstreamCursor {
   void operator=(const BitstreamCursor &RHS);
 
   void freeState();
-  
+
   bool isEndPos(size_t pos) {
     return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
   }
@@ -278,7 +278,7 @@ class BitstreamCursor {
     /// returned just like normal records.
     AF_DontAutoprocessAbbrevs = 2
   };
-  
+
   /// advance - Advance the current bitstream, returning the next entry in the
   /// stream.
   BitstreamEntry advance(unsigned Flags = 0) {
@@ -290,10 +290,10 @@ class BitstreamCursor {
           return BitstreamEntry::getError();
         return BitstreamEntry::getEndBlock();
       }
-      
+
       if (Code == bitc::ENTER_SUBBLOCK)
         return BitstreamEntry::getSubBlock(ReadSubBlockID());
-      
+
       if (Code == bitc::DEFINE_ABBREV &&
           !(Flags & AF_DontAutoprocessAbbrevs)) {
         // We read and accumulate abbrev's, the client can't do anything with
@@ -314,7 +314,7 @@ class BitstreamCursor {
       BitstreamEntry Entry = advance(Flags);
       if (Entry.Kind != BitstreamEntry::SubBlock)
         return Entry;
-      
+
       // If we found a sub-block, just skip over it and check the next entry.
       if (SkipBlock())
         return BitstreamEntry::getError();
@@ -345,7 +345,7 @@ class BitstreamCursor {
   uint32_t Read(unsigned NumBits) {
     assert(NumBits && NumBits <= 32 &&
            "Cannot return zero or more than 32 bits!");
-    
+
     // If the field is fully contained by CurWord, return it quickly.
     if (BitsInCurWord >= NumBits) {
       uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
@@ -365,15 +365,15 @@ class BitstreamCursor {
 
     // Read the next word from the stream.
     uint8_t Array[sizeof(word_t)] = {0};
-    
+
     BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array),
                                            Array, NULL);
-    
+
     // Handle big-endian byte-swapping if necessary.
     support::detail::packed_endian_specific_integral
       <word_t, support::little, support::unaligned> EndianValue;
     memcpy(&EndianValue, Array, sizeof(Array));
-    
+
     CurWord = EndianValue;
 
     NextChar += sizeof(word_t);
@@ -450,7 +450,7 @@ class BitstreamCursor {
       BitsInCurWord = 32;
       return;
     }
-    
+
     BitsInCurWord = 0;
     CurWord = 0;
   }
@@ -493,7 +493,7 @@ class BitstreamCursor {
   /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
   /// the block, and return true if the block has an error.
   bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0);
-  
+
   bool ReadBlockEnd() {
     if (BlockScope.empty()) return true;
 
@@ -529,7 +529,7 @@ class BitstreamCursor {
   void readAbbreviatedField(const BitCodeAbbrevOp &Op,
                             SmallVectorImpl<uint64_t> &Vals);
   void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
-  
+
 public:
 
   /// getAbbrev - Return the abbreviation for the specified AbbrevId.
@@ -541,7 +541,7 @@ class BitstreamCursor {
 
   /// skipRecord - Read the current record and discard it.
   void skipRecord(unsigned AbbrevID);
-  
+
   unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
                       StringRef *Blob = 0);
 
@@ -549,7 +549,7 @@ class BitstreamCursor {
   // Abbrev Processing
   //===--------------------------------------------------------------------===//
   void ReadAbbrevRecord();
-  
+
   bool ReadBlockInfoBlock();
 };
 
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 7b68f8761afa..a837211875f5 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -458,7 +458,7 @@ class BitstreamWriter {
   template<typename uintty>
   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
                           const char *ArrayData, unsigned ArrayLen) {
-    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 
+    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
                                                             ArrayLen));
   }
 
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 705db7e64340..471e9bfc40bb 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -123,12 +123,28 @@ class FastISel {
   /// index value.
   std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
 
-  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// \brief We're checking to see if we can fold \p LI into \p FoldInst.
+  /// Note that we could have a sequence where multiple LLVM IR instructions
+  /// are folded into the same machineinstr.  For example we could have:
+  ///   A: x = load i32 *P
+  ///   B: y = icmp A, 42
+  ///   C: br y, ...
+  ///
+  /// In this scenario, \p LI is "A", and \p FoldInst is "C".  We know
+  /// about "B" (and any other folded instructions) because it is between
+  /// A and C.
+  ///
+  /// If we succeed folding, return true.
+  ///
+  bool tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst);
+
+  /// \brief The specified machine instr operand is a vreg, and that
   /// vreg is being provided by the specified load instruction.  If possible,
   /// try to fold the load as an operand to the instruction, returning true if
   /// possible.
-  virtual bool TryToFoldLoad(MachineInstr * /*MI*/, unsigned /*OpNo*/,
-                             const LoadInst * /*LI*/) {
+  /// This method should be implemented by targets.
+  virtual bool tryToFoldLoadIntoMI(MachineInstr * /*MI*/, unsigned /*OpNo*/,
+                                   const LoadInst * /*LI*/) {
     return false;
   }
 
diff --git a/include/llvm/CodeGen/GCs.h b/include/llvm/CodeGen/GCs.h
index c407b6167485..456d2dcb51a6 100644
--- a/include/llvm/CodeGen/GCs.h
+++ b/include/llvm/CodeGen/GCs.h
@@ -26,6 +26,12 @@ namespace llvm {
   
   /// Creates an ocaml-compatible metadata printer.
   void linkOcamlGCPrinter();
+
+  /// Creates an erlang-compatible garbage collector.
+  void linkErlangGC();
+
+  /// Creates an erlang-compatible metadata printer.
+  void linkErlangGCPrinter();
   
   /// Creates a shadow stack garbage collector. This collector requires no code
   /// generator support.
diff --git a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index 7d1b1fe477a5..c3046da90b8d 100644
--- a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -29,6 +29,7 @@ namespace {
         return;
 
       llvm::linkOcamlGCPrinter();
+      llvm::linkErlangGCPrinter();
 
     }
   } ForceAsmWriterLinking; // Force link by creating a global definition.
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 2145eb80bdaa..916c0f233ef8 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -37,6 +37,7 @@ namespace {
       (void) llvm::createDefaultPBQPRegisterAllocator();
 
       llvm::linkOcamlGC();
+      llvm::linkErlangGC();
       llvm::linkShadowStackGC();
 
       (void) llvm::createBURRListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 463236815772..7d72f37255b4 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -20,9 +20,7 @@
 #ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
 #define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
 
-#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -36,6 +34,7 @@
 namespace llvm {
 
   class AliasAnalysis;
+  class BitVector;
   class LiveRangeCalc;
   class LiveVariables;
   class MachineDominatorTree;
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index def7b00ce761..8a32a3c11a82 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -83,7 +83,7 @@ class LiveRangeEdit {
   /// allUsesAvailableAt - Return true if all registers used by OrigMI at
   /// OrigIdx are also available with the same value at UseIdx.
   bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
-                          SlotIndex UseIdx);
+                          SlotIndex UseIdx) const;
 
   /// foldAsLoad - If LI has a single use and a single def that can be folded as
   /// a load, eliminate the register by folding the def into the use.
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 3c5ed9297e8b..6628fd278e45 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -29,21 +29,19 @@
 #ifndef LLVM_CODEGEN_LIVEVARIABLES_H
 #define LLVM_CODEGEN_LIVEVARIABLES_H
 
-#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
 
+class MachineBasicBlock;
 class MachineRegisterInfo;
-class TargetRegisterInfo;
 
 class LiveVariables : public MachineFunctionPass {
 public:
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 1c9bdd954c33..98dd03b45cf7 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -1,4 +1,3 @@
-
 //==- MachineBranchProbabilityInfo.h - Machine Branch Probability Analysis -==//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 1db6252ef435..a3acec809547 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -295,7 +295,7 @@ class MachineModuleInfo : public ImmutablePass {
   /// isUsedFunction - Return true if the functions in the llvm.used list.  This
   /// does not return true for things in llvm.compiler.used unless they are also
   /// in llvm.used.
-  bool isUsedFunction(const Function *F) {
+  bool isUsedFunction(const Function *F) const {
     return UsedFunctions.count(F);
   }
 
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 4b43cc10951a..24ba7bb1ac58 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -157,6 +157,12 @@ class MachineRegisterInfo {
   // Strictly for use by MachineInstr.cpp.
   void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps);
 
+  /// Verify the sanity of the use list for Reg.
+  void verifyUseList(unsigned Reg) const;
+
+  /// Verify the use list of all registers.
+  void verifyUseLists() const;
+
   /// reg_begin/reg_end - Provide iteration support to walk over all definitions
   /// and uses of a register within the MachineFunction that corresponds to this
   /// MachineRegisterInfo object.
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 57febe77464c..99cbd870ec37 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -297,6 +297,10 @@ class ScheduleDAGMI : public ScheduleDAGInstrs {
   /// reorderable instructions.
   virtual void schedule();
 
+  /// Change the position of an instruction within the basic block and update
+  /// live ranges and region boundary iterators.
+  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+
   /// Get current register pressure for the top scheduled instructions.
   const IntervalPressure &getTopPressure() const { return TopPressure; }
   const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
@@ -362,7 +366,6 @@ class ScheduleDAGMI : public ScheduleDAGInstrs {
 
   void updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure);
 
-  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
   bool checkSchedLimit();
 
   void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h
index eaaa70a67d8c..2775a0485821 100644
--- a/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -107,6 +107,13 @@ class MachineTraceMetrics : public MachineFunctionPass {
   /// Get the fixed resource information about MBB. Compute it on demand.
   const FixedBlockInfo *getResources(const MachineBasicBlock*);
 
+  /// Get the scaled number of cycles used per processor resource in MBB.
+  /// This is an array with SchedModel.getNumProcResourceKinds() entries.
+  /// The getResources() function above must have been called first.
+  ///
+  /// These numbers have already been scaled by SchedModel.getResourceFactor().
+  ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
+
   /// A virtual register or regunit required by a basic block or its trace
   /// successors.
   struct LiveInReg {
@@ -284,6 +291,8 @@ class MachineTraceMetrics : public MachineFunctionPass {
   class Ensemble {
     SmallVector<TraceBlockInfo, 4> BlockInfo;
     DenseMap<const MachineInstr*, InstrCycles> Cycles;
+    SmallVector<unsigned, 0> ProcResourceDepths;
+    SmallVector<unsigned, 0> ProcResourceHeights;
     friend class Trace;
 
     void computeTrace(const MachineBasicBlock*);
@@ -303,6 +312,8 @@ class MachineTraceMetrics : public MachineFunctionPass {
     const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
     const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
     const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
+    ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
+    ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
 
   public:
     virtual ~Ensemble();
@@ -343,8 +354,22 @@ class MachineTraceMetrics : public MachineFunctionPass {
   // One entry per basic block, indexed by block number.
   SmallVector<FixedBlockInfo, 4> BlockInfo;
 
+  // Cycles consumed on each processor resource per block.
+  // The number of processor resource kinds is constant for a given subtarget,
+  // but it is not known at compile time. The number of cycles consumed by
+  // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
+  // where Kinds = SchedModel.getNumProcResourceKinds().
+  SmallVector<unsigned, 0> ProcResourceCycles;
+
   // One ensemble per strategy.
   Ensemble* Ensembles[TS_NumStrategies];
+
+  // Convert scaled resource usage to a cycle count that can be compared with
+  // latencies.
+  unsigned getCycles(unsigned Scaled) {
+    unsigned Factor = SchedModel.getLatencyFactor();
+    return (Scaled + Factor - 1) / Factor;
+  }
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS,
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 4d559b5799a2..b02f63e70b9f 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -35,6 +35,48 @@ namespace llvm {
 
 class PassConfigImpl;
 
+/// Discriminated union of Pass ID types.
+///
+/// The PassConfig API prefers dealing with IDs because they are safer and more
+/// efficient. IDs decouple configuration from instantiation. This way, when a
+/// pass is overriden, it isn't unnecessarily instantiated. It is also unsafe to
+/// refer to a Pass pointer after adding it to a pass manager, which deletes
+/// redundant pass instances.
+///
+/// However, it is convient to directly instantiate target passes with
+/// non-default ctors. These often don't have a registered PassInfo. Rather than
+/// force all target passes to implement the pass registry boilerplate, allow
+/// the PassConfig API to handle either type.
+///
+/// AnalysisID is sadly char*, so PointerIntPair won't work.
+class IdentifyingPassPtr {
+  union {
+    AnalysisID ID;
+    Pass *P;
+  };
+  bool IsInstance;
+public:
+  IdentifyingPassPtr() : P(0), IsInstance(false) {}
+  IdentifyingPassPtr(AnalysisID IDPtr) : ID(IDPtr), IsInstance(false) {}
+  IdentifyingPassPtr(Pass *InstancePtr) : P(InstancePtr), IsInstance(true) {}
+
+  bool isValid() const { return P; }
+  bool isInstance() const { return IsInstance; }
+
+  AnalysisID getID() const {
+    assert(!IsInstance && "Not a Pass ID");
+    return ID;
+  }
+  Pass *getInstance() const {
+    assert(IsInstance && "Not a Pass Instance");
+    return P;
+  }
+};
+
+template <> struct isPodLike<IdentifyingPassPtr> {
+  static const bool value = true;
+};
+
 /// Target-Independent Code Generator Pass Configuration Options.
 ///
 /// This is an ImmutablePass solely for the purpose of exposing CodeGen options
@@ -117,20 +159,22 @@ class TargetPassConfig : public ImmutablePass {
   /// Allow the target to override a specific pass without overriding the pass
   /// pipeline. When passes are added to the standard pipeline at the
   /// point where StandardID is expected, add TargetID in its place.
-  void substitutePass(AnalysisID StandardID, AnalysisID TargetID);
+  void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
 
   /// Insert InsertedPassID pass after TargetPassID pass.
-  void insertPass(AnalysisID TargetPassID, AnalysisID InsertedPassID);
+  void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID);
 
   /// Allow the target to enable a specific standard pass by default.
   void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
 
   /// Allow the target to disable a specific standard pass by default.
-  void disablePass(AnalysisID PassID) { substitutePass(PassID, 0); }
+  void disablePass(AnalysisID PassID) {
+    substitutePass(PassID, IdentifyingPassPtr());
+  }
 
   /// Return the pass substituted for StandardID by the target.
   /// If no substitution exists, return StandardID.
-  AnalysisID getPassSubstitution(AnalysisID StandardID) const;
+  IdentifyingPassPtr getPassSubstitution(AnalysisID StandardID) const;
 
   /// Return true if the optimized regalloc pipeline is enabled.
   bool getOptimizeRegAlloc() const;
@@ -222,17 +266,6 @@ class TargetPassConfig : public ImmutablePass {
     return false;
   }
 
-  /// addFinalizeRegAlloc - This method may be implemented by targets that want
-  /// to run passes within the regalloc pipeline, immediately after the register
-  /// allocation pass itself. These passes run as soon as virtual regisiters
-  /// have been rewritten to physical registers but before and other postRA
-  /// optimization happens. Targets that have marked instructions for bundling
-  /// must have finalized those bundles by the time these passes have run,
-  /// because subsequent passes are not guaranteed to be bundle-aware.
-  virtual bool addFinalizeRegAlloc() {
-    return false;
-  }
-
   /// addPostRegAlloc - This method may be implemented by targets that want to
   /// run passes after register allocation pass pipeline but before
   /// prolog-epilog insertion.  This should return true if -print-machineinstrs
@@ -444,10 +477,6 @@ namespace llvm {
   /// information.
   extern char &MachineBlockPlacementStatsID;
 
-  /// Code Placement - This pass optimize code placement and aligns loop
-  /// headers to target specific alignment boundary.
-  extern char &CodePlacementOptID;
-
   /// GCLowering Pass - Performs target-independent LLVM IR transformations for
   /// highly portable strategies.
   ///
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index b617c145585c..8b8e3d90f73e 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -29,6 +29,7 @@ namespace llvm {
   class MachineFunction;
   class MachineLoopInfo;
   class TargetRegisterInfo;
+  template<class T> class OwningPtr;
 
   /// This class wraps up a PBQP instance representing a register allocation
   /// problem, plus the structures necessary to map back from the PBQP solution
@@ -123,11 +124,9 @@ namespace llvm {
 
     /// Build a PBQP instance to represent the register allocation problem for
     /// the given MachineFunction.
-    virtual std::auto_ptr<PBQPRAProblem> build(
-                                              MachineFunction *mf,
-                                              const LiveIntervals *lis,
-                                              const MachineLoopInfo *loopInfo,
-                                              const RegSet &vregs);
+    virtual PBQPRAProblem *build(MachineFunction *mf, const LiveIntervals *lis,
+                                 const MachineLoopInfo *loopInfo,
+                                 const RegSet &vregs);
   private:
 
     void addSpillCosts(PBQP::Vector &costVec, PBQP::PBQPNum spillCost);
@@ -144,11 +143,9 @@ namespace llvm {
  
     /// Build a PBQP instance to represent the register allocation problem for
     /// the given MachineFunction.
-    virtual std::auto_ptr<PBQPRAProblem> build(
-                                              MachineFunction *mf,
-                                              const LiveIntervals *lis,
-                                              const MachineLoopInfo *loopInfo,
-                                              const RegSet &vregs);   
+    virtual PBQPRAProblem *build(MachineFunction *mf, const LiveIntervals *lis,
+                                 const MachineLoopInfo *loopInfo,
+                                 const RegSet &vregs);   
 
   private:
 
@@ -161,7 +158,7 @@ namespace llvm {
                             PBQP::PBQPNum benefit);
   };
 
-  FunctionPass* createPBQPRegisterAllocator(std::auto_ptr<PBQPBuilder> builder,
+  FunctionPass* createPBQPRegisterAllocator(OwningPtr<PBQPBuilder> &builder,
                                             char *customPassID=0);
 }
 
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 01199205b593..95bf29167c20 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -40,21 +40,23 @@ class RegScavenger {
   /// registers.
   bool Tracking;
 
-  /// ScavengingFrameIndex - Special spill slot used for scavenging a register
-  /// post register allocation.
-  int ScavengingFrameIndex;
+  /// Information on scavenged registers (held in a spill slot).
+  struct ScavengedInfo {
+    ScavengedInfo(int FI = -1) : FrameIndex(FI), Reg(0), Restore(NULL) {}
 
-  /// ScavengedReg - If none zero, the specific register is currently being
-  /// scavenged. That is, it is spilled to the special scavenging stack slot.
-  unsigned ScavengedReg;
+    /// A spill slot used for scavenging a register post register allocation.
+    int FrameIndex;
 
-  /// ScavengedRC - Register class of the scavenged register.
-  ///
-  const TargetRegisterClass *ScavengedRC;
+    /// If non-zero, the specific register is currently being
+    /// scavenged. That is, it is spilled to this scavenging stack slot.
+    unsigned Reg;
+
+    /// The instruction that restores the scavenged register from stack.
+    const MachineInstr *Restore;
+  };
 
-  /// ScavengeRestore - Instruction that restores the scavenged register from
-  /// stack.
-  const MachineInstr *ScavengeRestore;
+  /// A vector of information on scavenged registers.
+  SmallVector<ScavengedInfo, 2> Scavenged;
 
   /// CalleeSavedrRegs - A bitvector of callee saved registers for the target.
   ///
@@ -71,8 +73,7 @@ class RegScavenger {
 
 public:
   RegScavenger()
-    : MBB(NULL), NumPhysRegs(0), Tracking(false),
-      ScavengingFrameIndex(-1), ScavengedReg(0), ScavengedRC(NULL) {}
+    : MBB(NULL), NumPhysRegs(0), Tracking(false) {}
 
   /// enterBasicBlock - Start tracking liveness from the begin of the specific
   /// basic block.
@@ -92,9 +93,25 @@ class RegScavenger {
     while (MBBI != I) forward();
   }
 
+  /// Invert the behavior of forward() on the current instruction (undo the
+  /// changes to the available registers made by forward()).
+  void unprocess();
+
+  /// Unprocess instructions until you reach the provided iterator.
+  void unprocess(MachineBasicBlock::iterator I) {
+    while (MBBI != I) unprocess();
+  }
+
   /// skipTo - Move the internal MBB iterator but do not update register states.
-  ///
-  void skipTo(MachineBasicBlock::iterator I) { MBBI = I; }
+  void skipTo(MachineBasicBlock::iterator I) {
+    if (I == MachineBasicBlock::iterator(NULL))
+      Tracking = false;
+    MBBI = I;
+  }
+
+  MachineBasicBlock::iterator getCurrentPosition() const {
+    return MBBI;
+  }
 
   /// getRegsUsed - return all registers currently in use in used.
   void getRegsUsed(BitVector &used, bool includeReserved);
@@ -107,10 +124,28 @@ class RegScavenger {
   /// Return 0 if none is found.
   unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const;
 
-  /// setScavengingFrameIndex / getScavengingFrameIndex - accessor and setter of
-  /// ScavengingFrameIndex.
-  void setScavengingFrameIndex(int FI) { ScavengingFrameIndex = FI; }
-  int getScavengingFrameIndex() const { return ScavengingFrameIndex; }
+  /// Add a scavenging frame index.
+  void addScavengingFrameIndex(int FI) {
+    Scavenged.push_back(ScavengedInfo(FI));
+  }
+
+  /// Query whether a frame index is a scavenging frame index.
+  bool isScavengingFrameIndex(int FI) const {
+    for (SmallVector<ScavengedInfo, 2>::const_iterator I = Scavenged.begin(),
+         IE = Scavenged.end(); I != IE; ++I)
+      if (I->FrameIndex == FI)
+        return true;
+
+    return false;
+  }
+
+  /// Get an array of scavenging frame indices.
+  void getScavengingFrameIndices(SmallVectorImpl<int> &A) const {
+    for (SmallVector<ScavengedInfo, 2>::const_iterator I = Scavenged.begin(),
+         IE = Scavenged.end(); I != IE; ++I)
+      if (I->FrameIndex >= 0)
+        A.push_back(I->FrameIndex);
+  }
 
   /// scavengeRegister - Make a register of the specific register class
   /// available and do the appropriate bookkeeping. SPAdj is the stack
@@ -149,6 +184,10 @@ class RegScavenger {
     RegsAvailable |= Regs;
   }
 
+  /// Processes the current instruction and fill the KillRegs and DefRegs bit
+  /// vectors.
+  void determineKillsAndDefs();
+
   /// Add Reg and all its sub-registers to BV.
   void addRegWithSubRegs(BitVector &BV, unsigned Reg);
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 8c959da696d8..e13636c25046 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -302,6 +302,7 @@ namespace llvm {
     bool isCallOp         : 1;          // Is a function call operand.
     bool isTwoAddress     : 1;          // Is a two-address instruction.
     bool isCommutable     : 1;          // Is a commutable instruction.
+    bool hasPhysRegUses   : 1;          // Has physreg uses.
     bool hasPhysRegDefs   : 1;          // Has physreg defs that are being used.
     bool hasPhysRegClobbers : 1;        // Has any physreg defs, used or not.
     bool isPending        : 1;          // True once pending.
@@ -331,10 +332,10 @@ namespace llvm {
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
         Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
-        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
-        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
-        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
-        isCloned(false), SchedulingPref(Sched::None),
+        isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+        isAvailable(false), isScheduled(false), isScheduleHigh(false),
+        isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
@@ -345,10 +346,10 @@ namespace llvm {
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
         Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
-        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
-        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
-        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
-        isCloned(false), SchedulingPref(Sched::None),
+        isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+        isAvailable(false), isScheduled(false), isScheduleHigh(false),
+        isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
@@ -358,10 +359,10 @@ namespace llvm {
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
         Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
-        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
-        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
-        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
-        isCloned(false), SchedulingPref(Sched::None),
+        isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+        isAvailable(false), isScheduled(false), isScheduleHigh(false),
+        isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index e5adf6724931..8c064bf2d446 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -810,31 +810,32 @@ class SelectionDAG {
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
                                 SDValue Op1, SDValue Op2);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
-                         SDValue Op1, SDValue Op2, SDValue Op3);
+                                SDValue Op1, SDValue Op2, SDValue Op3);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
-                         const SDValue *Ops, unsigned NumOps);
+                                ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         SDValue Op1);
-  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
-                         EVT VT2, SDValue Op1, SDValue Op2);
-  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
-                         EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
+                                SDValue Op1);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                                SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                                SDValue Op1, SDValue Op2, SDValue Op3);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         const SDValue *Ops, unsigned NumOps);
+                                ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         EVT VT3, SDValue Op1, SDValue Op2);
+                                EVT VT3, SDValue Op1, SDValue Op2);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3);
+                                EVT VT3, SDValue Op1, SDValue Op2,
+                                SDValue Op3);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         EVT VT3, const SDValue *Ops, unsigned NumOps);
+                                EVT VT3, ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps);
+                                EVT VT3, EVT VT4, ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl,
-                         ArrayRef<EVT> ResultTys, const SDValue *Ops,
-                         unsigned NumOps);
+                                ArrayRef<EVT> ResultTys,
+                                ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, SDVTList VTs,
-                         const SDValue *Ops, unsigned NumOps);
+                                ArrayRef<SDValue> Ops);
 
   /// getTargetExtractSubreg - A convenience function for creating
   /// TargetInstrInfo::EXTRACT_SUBREG nodes.
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 5f503deff10e..a4721db6851f 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -259,9 +259,6 @@ class SelectionDAGISel : public MachineFunctionPass {
   void SelectBasicBlock(BasicBlock::const_iterator Begin,
                         BasicBlock::const_iterator End,
                         bool &HadTailCall);
-
-  bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst,
-                             FastISel *FastIS);
   void FinishBasicBlock();
 
   void CodeGenAndEmitDAG();
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 05f3b1494fa1..fef567f56bce 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -196,14 +196,14 @@ template <> struct isPodLike<SDValue> { static const bool value = true; };
 /// SDValues as if they were SDNode*'s.
 template<> struct simplify_type<SDValue> {
   typedef SDNode* SimpleType;
-  static SimpleType getSimplifiedValue(const SDValue &Val) {
-    return static_cast<SimpleType>(Val.getNode());
+  static SimpleType getSimplifiedValue(SDValue &Val) {
+    return Val.getNode();
   }
 };
 template<> struct simplify_type<const SDValue> {
-  typedef SDNode* SimpleType;
+  typedef /*const*/ SDNode* SimpleType;
   static SimpleType getSimplifiedValue(const SDValue &Val) {
-    return static_cast<SimpleType>(Val.getNode());
+    return Val.getNode();
   }
 };
 
@@ -295,14 +295,8 @@ class SDUse {
 /// SDValues as if they were SDNode*'s.
 template<> struct simplify_type<SDUse> {
   typedef SDNode* SimpleType;
-  static SimpleType getSimplifiedValue(const SDUse &Val) {
-    return static_cast<SimpleType>(Val.getNode());
-  }
-};
-template<> struct simplify_type<const SDUse> {
-  typedef SDNode* SimpleType;
-  static SimpleType getSimplifiedValue(const SDUse &Val) {
-    return static_cast<SimpleType>(Val.getNode());
+  static SimpleType getSimplifiedValue(SDUse &Val) {
+    return Val.getNode();
   }
 };
 
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index a27708046686..26d0433f3e87 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -53,6 +53,20 @@ namespace llvm {
       this->index = index;
     }
 
+#ifdef EXPENSIVE_CHECKS
+    // When EXPENSIVE_CHECKS is defined, "erased" index list entries will
+    // actually be moved to a "graveyard" list, and have their pointers
+    // poisoned, so that dangling SlotIndex access can be reliably detected.
+    void setPoison() {
+      intptr_t tmp = reinterpret_cast<intptr_t>(mi);
+      assert(((tmp & 0x1) == 0x0) && "Pointer already poisoned?");  
+      tmp |= 0x1;
+      mi = reinterpret_cast<MachineInstr*>(tmp);
+    }
+
+    bool isPoisoned() const { return (reinterpret_cast<intptr_t>(mi) & 0x1) == 0x1; }
+#endif // EXPENSIVE_CHECKS
+
   };
 
   template <>
@@ -109,6 +123,10 @@ namespace llvm {
 
     IndexListEntry* listEntry() const {
       assert(isValid() && "Attempt to compare reserved index.");
+#ifdef EXPENSIVE_CHECKS
+      assert(!lie.getPointer()->isPoisoned() &&
+             "Attempt to access deleted list-entry.");
+#endif // EXPENSIVE_CHECKS
       return lie.getPointer();
     }
 
@@ -282,7 +300,6 @@ namespace llvm {
 
   template <> struct isPodLike<SlotIndex> { static const bool value = true; };
 
-
   inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) {
     li.print(os);
     return os;
@@ -313,6 +330,10 @@ namespace llvm {
     typedef ilist<IndexListEntry> IndexList;
     IndexList indexList;
 
+#ifdef EXPENSIVE_CHECKS
+    IndexList graveyardList;
+#endif // EXPENSIVE_CHECKS
+
     MachineFunction *mf;
 
     typedef DenseMap<const MachineInstr*, SlotIndex> Mi2IndexMap;
@@ -643,6 +664,32 @@ namespace llvm {
       std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
     }
 
+    /// \brief Free the resources that were required to maintain a SlotIndex.
+    ///
+    /// Once an index is no longer needed (for instance because the instruction
+    /// at that index has been moved), the resources required to maintain the
+    /// index can be relinquished to reduce memory use and improve renumbering
+    /// performance. Any remaining SlotIndex objects that point to the same
+    /// index are left 'dangling' (much the same as a dangling pointer to a
+    /// freed object) and should not be accessed, except to destruct them.
+    /// 
+    /// Like dangling pointers, access to dangling SlotIndexes can cause
+    /// painful-to-track-down bugs, especially if the memory for the index
+    /// previously pointed to has been re-used. To detect dangling SlotIndex
+    /// bugs, build with EXPENSIVE_CHECKS=1. This will cause "erased" indexes to
+    /// be retained in a graveyard instead of being freed. Operations on indexes
+    /// in the graveyard will trigger an assertion.
+    void eraseIndex(SlotIndex index) {
+      IndexListEntry *entry = index.listEntry();
+#ifdef EXPENSIVE_CHECKS
+      indexList.remove(entry);
+      graveyardList.push_back(entry);
+      entry->setPoison();
+#else
+      indexList.erase(entry);
+#endif
+    }
+
   };
 
 
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 76df6ac8e65b..da269859491b 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -44,13 +44,13 @@ def v4i8   : ValueType<32 , 20>;   //  4 x i8  vector value
 def v8i8   : ValueType<64 , 21>;   //  8 x i8  vector value
 def v16i8  : ValueType<128, 22>;   // 16 x i8  vector value
 def v32i8  : ValueType<256, 23>;   // 32 x i8 vector value
-def v64i8  : ValueType<256, 24>;   // 64 x i8 vector value
+def v64i8  : ValueType<512, 24>;   // 64 x i8 vector value
 def v1i16  : ValueType<16 , 25>;   //  1 x i16 vector value
 def v2i16  : ValueType<32 , 26>;   //  2 x i16 vector value
 def v4i16  : ValueType<64 , 27>;   //  4 x i16 vector value
 def v8i16  : ValueType<128, 28>;   //  8 x i16 vector value
 def v16i16 : ValueType<256, 29>;   // 16 x i16 vector value
-def v32i16 : ValueType<256, 30>;   // 32 x i16 vector value
+def v32i16 : ValueType<512, 30>;   // 32 x i16 vector value
 def v1i32  : ValueType<32 , 31>;   //  1 x i32 vector value
 def v2i32  : ValueType<64 , 32>;   //  2 x i32 vector value
 def v4i32  : ValueType<128, 33>;   //  4 x i32 vector value
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 2f9e6ffe2351..5a3d02c553ee 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -78,6 +78,14 @@
 /* Define to 1 if you have the <ctype.h> header file. */
 #undef HAVE_CTYPE_H
 
+/* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you
+   don't. */
+#undef HAVE_DECL_FE_ALL_EXCEPT
+
+/* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you
+   don't. */
+#undef HAVE_DECL_FE_INEXACT
+
 /* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
    don't. */
 #undef HAVE_DECL_STRERROR_S
diff --git a/include/llvm/DIBuilder.h b/include/llvm/DIBuilder.h
index d59847558a13..bcbcd8475958 100644
--- a/include/llvm/DIBuilder.h
+++ b/include/llvm/DIBuilder.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines a DIBuilder that is useful for creating debugging 
+// This file defines a DIBuilder that is useful for creating debugging
 // information entries in LLVM IR form.
 //
 //===----------------------------------------------------------------------===//
@@ -82,18 +82,18 @@ namespace llvm {
     /// @param Lang     Source programming language, eg. dwarf::DW_LANG_C99
     /// @param File     File name
     /// @param Dir      Directory
-    /// @param Producer String identify producer of debugging information. 
+    /// @param Producer String identify producer of debugging information.
     ///                 Usuall this is a compiler version string.
     /// @param isOptimized A boolean flag which indicates whether optimization
     ///                    is ON or not.
-    /// @param Flags    This string lists command line options. This string is 
+    /// @param Flags    This string lists command line options. This string is
     ///                 directly embedded in debug info output which may be used
     ///                 by a tool analyzing generated debugging information.
-    /// @param RV       This indicates runtime version for languages like 
+    /// @param RV       This indicates runtime version for languages like
     ///                 Objective-C.
     /// @param SplitName The name of the file that we'll split debug info out
     ///                  into.
-    void createCompileUnit(unsigned Lang, StringRef File, StringRef Dir, 
+    void createCompileUnit(unsigned Lang, StringRef File, StringRef Dir,
                            StringRef Producer, bool isOptimized,
                            StringRef Flags, unsigned RV,
                            StringRef SplitName = StringRef());
@@ -101,14 +101,14 @@ namespace llvm {
     /// createFile - Create a file descriptor to hold debugging information
     /// for a file.
     DIFile createFile(StringRef Filename, StringRef Directory);
-                           
+
     /// createEnumerator - Create a single enumerator value.
     DIEnumerator createEnumerator(StringRef Name, uint64_t Val);
 
     /// createNullPtrType - Create C++0x nullptr type.
     DIType createNullPtrType(StringRef Name);
 
-    /// createBasicType - Create debugging information entry for a basic 
+    /// createBasicType - Create debugging information entry for a basic
     /// type.
     /// @param Name        Type name.
     /// @param SizeInBits  Size of the type.
@@ -158,7 +158,7 @@ namespace llvm {
     /// @param Ty           Original type.
     /// @param BaseTy       Base type. Ty is inherits from base.
     /// @param BaseOffset   Base offset.
-    /// @param Flags        Flags to describe inheritance attribute, 
+    /// @param Flags        Flags to describe inheritance attribute,
     ///                     e.g. private
     DIDerivedType createInheritance(DIType Ty, DIType BaseTy,
                                     uint64_t BaseOffset, unsigned Flags);
@@ -209,8 +209,8 @@ namespace llvm {
     ///                           selector.
     /// @param PropertyAttributes Objective C property attributes.
     DIType createObjCIVar(StringRef Name, DIFile File,
-                          unsigned LineNo, uint64_t SizeInBits, 
-                          uint64_t AlignInBits, uint64_t OffsetInBits, 
+                          unsigned LineNo, uint64_t SizeInBits,
+                          uint64_t AlignInBits, uint64_t OffsetInBits,
                           unsigned Flags, DIType Ty,
                           StringRef PropertyName = StringRef(),
                           StringRef PropertyGetterName = StringRef(),
@@ -229,8 +229,8 @@ namespace llvm {
     /// @param Ty           Parent type.
     /// @param PropertyNode Property associated with this ivar.
     DIType createObjCIVar(StringRef Name, DIFile File,
-                          unsigned LineNo, uint64_t SizeInBits, 
-                          uint64_t AlignInBits, uint64_t OffsetInBits, 
+                          unsigned LineNo, uint64_t SizeInBits,
+                          uint64_t AlignInBits, uint64_t OffsetInBits,
                           unsigned Flags, DIType Ty,
                           MDNode *PropertyNode);
 
@@ -249,7 +249,7 @@ namespace llvm {
                                       StringRef SetterName,
                                       unsigned PropertyAttributes,
                                       DIType Ty);
-      
+
     /// createClassType - Create debugging information entry for a class.
     /// @param Scope        Scope in which this class is defined.
     /// @param Name         class name.
@@ -261,16 +261,17 @@ namespace llvm {
     /// @param Flags        Flags to encode member attribute, e.g. private
     /// @param Elements     class members.
     /// @param VTableHolder Debug info of the base class that contains vtable
-    ///                     for this type. This is used in 
+    ///                     for this type. This is used in
     ///                     DW_AT_containing_type. See DWARF documentation
     ///                     for more info.
     /// @param TemplateParms Template type parameters.
-    DIType createClassType(DIDescriptor Scope, StringRef Name, DIFile File,
-                           unsigned LineNumber, uint64_t SizeInBits,
-                           uint64_t AlignInBits, uint64_t OffsetInBits,
-                           unsigned Flags, DIType DerivedFrom, 
-                           DIArray Elements, MDNode *VTableHolder = 0,
-                           MDNode *TemplateParms = 0);
+    DICompositeType createClassType(DIDescriptor Scope, StringRef Name,
+                                    DIFile File, unsigned LineNumber,
+                                    uint64_t SizeInBits, uint64_t AlignInBits,
+                                    uint64_t OffsetInBits, unsigned Flags,
+                                    DIType DerivedFrom, DIArray Elements,
+                                    MDNode *VTableHolder = 0,
+                                    MDNode *TemplateParms = 0);
 
     /// createStructType - Create debugging information entry for a struct.
     /// @param Scope        Scope in which this struct is defined.
@@ -345,22 +346,25 @@ namespace llvm {
     /// @param AlignInBits  Alignment.
     /// @param Ty           Element type.
     /// @param Subscripts   Subscripts.
-    DIType createVectorType(uint64_t Size, uint64_t AlignInBits, 
+    DIType createVectorType(uint64_t Size, uint64_t AlignInBits,
                             DIType Ty, DIArray Subscripts);
 
-    /// createEnumerationType - Create debugging information entry for an 
+    /// createEnumerationType - Create debugging information entry for an
     /// enumeration.
-    /// @param Scope        Scope in which this enumeration is defined.
-    /// @param Name         Union name.
-    /// @param File         File where this member is defined.
-    /// @param LineNumber   Line number.
-    /// @param SizeInBits   Member size.
-    /// @param AlignInBits  Member alignment.
-    /// @param Elements     Enumeration elements.
-    DICompositeType createEnumerationType(
-        DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
-        uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
-        DIType ClassType);
+    /// @param Scope          Scope in which this enumeration is defined.
+    /// @param Name           Union name.
+    /// @param File           File where this member is defined.
+    /// @param LineNumber     Line number.
+    /// @param SizeInBits     Member size.
+    /// @param AlignInBits    Member alignment.
+    /// @param Elements       Enumeration elements.
+    /// @param UnderlyingType Underlying type of a C++11/ObjC fixed enum.
+    DICompositeType createEnumerationType(DIDescriptor Scope, StringRef Name,
+                                          DIFile File, unsigned LineNumber,
+                                          uint64_t SizeInBits,
+                                          uint64_t AlignInBits,
+                                          DIArray Elements,
+                                          DIType UnderlyingType);
 
     /// createSubroutineType - Create subroutine type.
     /// @param File           File in which this subroutine is defined.
@@ -375,16 +379,12 @@ namespace llvm {
     /// flag set.
     DIType createObjectPointerType(DIType Ty);
 
-    /// createTemporaryType - Create a temporary forward-declared type.
-    DIType createTemporaryType();
-    DIType createTemporaryType(DIFile F);
-
     /// createForwardDecl - Create a temporary forward-declared type.
     DIType createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
                              DIFile F, unsigned Line, unsigned RuntimeLang = 0,
                              uint64_t SizeInBits = 0, uint64_t AlignInBits = 0);
 
-    /// retainType - Retain DIType in a module even if it is not referenced 
+    /// retainType - Retain DIType in a module even if it is not referenced
     /// through debug info anchors.
     void retainType(DIType T);
 
@@ -411,8 +411,21 @@ namespace llvm {
     createGlobalVariable(StringRef Name, DIFile File, unsigned LineNo,
                          DIType Ty, bool isLocalToUnit, llvm::Value *Val);
 
+    /// \brief Create a new descriptor for the specified global.
+    /// @param Name        Name of the variable.
+    /// @param LinkageName Mangled variable name.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type.
+    /// @param isLocalToUnit Boolean flag indicate whether this variable is
+    ///                      externally visible or not.
+    /// @param Val         llvm::Value of the variable.
+    DIGlobalVariable
+    createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile File,
+                         unsigned LineNo, DIType Ty, bool isLocalToUnit,
+                         llvm::Value *Val);
 
-    /// createStaticVariable - Create a new descriptor for the specified 
+    /// createStaticVariable - Create a new descriptor for the specified
     /// variable.
     /// @param Context     Variable scope.
     /// @param Name        Name of the variable.
@@ -425,13 +438,13 @@ namespace llvm {
     /// @param Val         llvm::Value of the variable.
     /// @param Decl        Reference to the corresponding declaration.
     DIGlobalVariable
-    createStaticVariable(DIDescriptor Context, StringRef Name, 
-                         StringRef LinkageName, DIFile File, unsigned LineNo, 
+    createStaticVariable(DIDescriptor Context, StringRef Name,
+                         StringRef LinkageName, DIFile File, unsigned LineNo,
                          DIType Ty, bool isLocalToUnit, llvm::Value *Val,
                          MDNode *Decl = NULL);
 
 
-    /// createLocalVariable - Create a new descriptor for the specified 
+    /// createLocalVariable - Create a new descriptor for the specified
     /// local variable.
     /// @param Tag         Dwarf TAG. Usually DW_TAG_auto_variable or
     ///                    DW_TAG_arg_variable.
@@ -508,7 +521,7 @@ namespace llvm {
     /// @param Ty            Function type.
     /// @param isLocalToUnit True if this function is not externally visible..
     /// @param isDefinition  True if this is a function definition.
-    /// @param Virtuality    Attributes describing virtualness. e.g. pure 
+    /// @param Virtuality    Attributes describing virtualness. e.g. pure
     ///                      virtual function.
     /// @param VTableIndex   Index no of this method in virtual table.
     /// @param VTableHolder  Type that holds vtable.
@@ -546,7 +559,7 @@ namespace llvm {
     /// @param File        Source file.
     DILexicalBlockFile createLexicalBlockFile(DIDescriptor Scope,
                                               DIFile File);
-    
+
     /// createLexicalBlock - This creates a descriptor for a lexical block
     /// with the specified parent context.
     /// @param Scope       Parent lexical scope.
@@ -577,16 +590,16 @@ namespace llvm {
     /// @param VarInfo      Variable's debug info descriptor.
     /// @param InsertAtEnd Location for the new intrinsic.
     Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
-                                         DIVariable VarInfo, 
+                                         DIVariable VarInfo,
                                          BasicBlock *InsertAtEnd);
-    
+
     /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
     /// @param Val          llvm::Value of the variable
     /// @param Offset       Offset
     /// @param VarInfo      Variable's debug info descriptor.
     /// @param InsertBefore Location for the new intrinsic.
     Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
-                                         DIVariable VarInfo, 
+                                         DIVariable VarInfo,
                                          Instruction *InsertBefore);
 
   };
diff --git a/include/llvm/DebugInfo.h b/include/llvm/DebugInfo.h
index a9d5a4b7b662..9e193b70cf2d 100644
--- a/include/llvm/DebugInfo.h
+++ b/include/llvm/DebugInfo.h
@@ -177,6 +177,7 @@ namespace llvm {
       if (DbgNode && !isFile())
         DbgNode = 0;
     }
+    MDNode *getFileNode() const;
     bool Verify() const;
   };
 
@@ -188,24 +189,18 @@ namespace llvm {
     explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {}
 
     unsigned getLanguage() const { return getUnsignedField(2); }
-    StringRef getFilename() const {
-      return getFieldAs<DIFile>(3).getFilename();
-    }
-    StringRef getDirectory() const {
-      return getFieldAs<DIFile>(3).getDirectory();
-    }
-    StringRef getProducer() const { return getStringField(4); }
+    StringRef getProducer() const { return getStringField(3); }
 
-    bool isOptimized() const { return getUnsignedField(5) != 0; }
-    StringRef getFlags() const { return getStringField(6); }
-    unsigned getRunTimeVersion() const { return getUnsignedField(7); }
+    bool isOptimized() const { return getUnsignedField(4) != 0; }
+    StringRef getFlags() const { return getStringField(5); }
+    unsigned getRunTimeVersion() const { return getUnsignedField(6); }
 
     DIArray getEnumTypes() const;
     DIArray getRetainedTypes() const;
     DIArray getSubprograms() const;
     DIArray getGlobalVariables() const;
 
-    StringRef getSplitDebugFilename() const { return getStringField(12); }
+    StringRef getSplitDebugFilename() const { return getStringField(11); }
 
     /// Verify - Verify that a compile unit is well formed.
     bool Verify() const;
@@ -241,9 +236,8 @@ namespace llvm {
     explicit DIType(const MDNode *N);
     explicit DIType() {}
 
-    DIScope getContext() const          { return getFieldAs<DIScope>(1); }
-    StringRef getName() const           { return getStringField(2);     }
-    DIFile getFile() const              { return getFieldAs<DIFile>(3); }
+    DIScope getContext() const          { return getFieldAs<DIScope>(2); }
+    StringRef getName() const           { return getStringField(3);     }
     unsigned getLineNumber() const      { return getUnsignedField(4); }
     uint64_t getSizeInBits() const      { return getUInt64Field(5); }
     uint64_t getAlignInBits() const     { return getUInt64Field(6); }
@@ -288,12 +282,6 @@ namespace llvm {
     bool isValid() const {
       return DbgNode && (isBasicType() || isDerivedType() || isCompositeType());
     }
-    StringRef getDirectory() const  {
-      return getFieldAs<DIFile>(3).getDirectory();
-    }
-    StringRef getFilename() const  {
-      return getFieldAs<DIFile>(3).getFilename();
-    }
 
     /// isUnsignedDIType - Return true if type encoding is unsigned.
     bool isUnsignedDIType();
@@ -354,7 +342,10 @@ namespace llvm {
 
   /// DICompositeType - This descriptor holds a type that can refer to multiple
   /// other types, like a function or struct.
-  /// FIXME: Why is this a DIDerivedType??
+  /// DICompositeType is derived from DIDerivedType because some
+  /// composite types (such as enums) can be derived from basic types
+  // FIXME: Make this derive from DIType directly & just store the
+  // base type in a single DIType field.
   class DICompositeType : public DIDerivedType {
     friend class DIDescriptor;
     void printInternal(raw_ostream &OS) const;
@@ -366,10 +357,12 @@ namespace llvm {
     }
 
     DIArray getTypeArray() const { return getFieldAs<DIArray>(10); }
+    void setTypeArray(DIArray Elements, DIArray TParams = DIArray());
     unsigned getRunTimeLang() const { return getUnsignedField(11); }
     DICompositeType getContainingType() const {
       return getFieldAs<DICompositeType>(12);
     }
+    void setContainingType(DICompositeType ContainingType);
     DIArray getTemplateParams() const { return getFieldAs<DIArray>(13); }
 
     /// Verify - Verify that a composite type descriptor is well formed.
@@ -426,78 +419,66 @@ namespace llvm {
     StringRef getName() const         { return getStringField(3); }
     StringRef getDisplayName() const  { return getStringField(4); }
     StringRef getLinkageName() const  { return getStringField(5); }
-    unsigned getLineNumber() const      { return getUnsignedField(7); }
-    DICompositeType getType() const { return getFieldAs<DICompositeType>(8); }
+    unsigned getLineNumber() const      { return getUnsignedField(6); }
+    DICompositeType getType() const { return getFieldAs<DICompositeType>(7); }
 
     /// getReturnTypeName - Subprogram return types are encoded either as
     /// DIType or as DICompositeType.
     StringRef getReturnTypeName() const {
-      DICompositeType DCT(getFieldAs<DICompositeType>(8));
+      DICompositeType DCT(getFieldAs<DICompositeType>(7));
       if (DCT.Verify()) {
         DIArray A = DCT.getTypeArray();
         DIType T(A.getElement(0));
         return T.getName();
       }
-      DIType T(getFieldAs<DIType>(8));
+      DIType T(getFieldAs<DIType>(7));
       return T.getName();
     }
 
     /// isLocalToUnit - Return true if this subprogram is local to the current
     /// compile unit, like 'static' in C.
-    unsigned isLocalToUnit() const     { return getUnsignedField(9); }
-    unsigned isDefinition() const      { return getUnsignedField(10); }
+    unsigned isLocalToUnit() const     { return getUnsignedField(8); }
+    unsigned isDefinition() const      { return getUnsignedField(9); }
 
-    unsigned getVirtuality() const { return getUnsignedField(11); }
-    unsigned getVirtualIndex() const { return getUnsignedField(12); }
+    unsigned getVirtuality() const { return getUnsignedField(10); }
+    unsigned getVirtualIndex() const { return getUnsignedField(11); }
 
     DICompositeType getContainingType() const {
-      return getFieldAs<DICompositeType>(13);
+      return getFieldAs<DICompositeType>(12);
     }
 
     unsigned getFlags() const {
-      return getUnsignedField(14);
+      return getUnsignedField(13);
     }
 
     unsigned isArtificial() const    {
-      return (getUnsignedField(14) & FlagArtificial) != 0;
+      return (getUnsignedField(13) & FlagArtificial) != 0;
     }
     /// isPrivate - Return true if this subprogram has "private"
     /// access specifier.
     bool isPrivate() const    {
-      return (getUnsignedField(14) & FlagPrivate) != 0;
+      return (getUnsignedField(13) & FlagPrivate) != 0;
     }
     /// isProtected - Return true if this subprogram has "protected"
     /// access specifier.
     bool isProtected() const    {
-      return (getUnsignedField(14) & FlagProtected) != 0;
+      return (getUnsignedField(13) & FlagProtected) != 0;
     }
     /// isExplicit - Return true if this subprogram is marked as explicit.
     bool isExplicit() const    {
-      return (getUnsignedField(14) & FlagExplicit) != 0;
+      return (getUnsignedField(13) & FlagExplicit) != 0;
     }
     /// isPrototyped - Return true if this subprogram is prototyped.
     bool isPrototyped() const    {
-      return (getUnsignedField(14) & FlagPrototyped) != 0;
+      return (getUnsignedField(13) & FlagPrototyped) != 0;
     }
 
     unsigned isOptimized() const;
 
-    StringRef getFilename() const    {
-      return getFieldAs<DIFile>(6).getFilename();
-    }
-
-    StringRef getDirectory() const   {
-      return getFieldAs<DIFile>(6).getDirectory();
-    }
-
-    DIFile getFile() const {
-      return getFieldAs<DIFile>(6);
-    }
-
     /// getScopeLineNumber - Get the beginning of the scope of the
     /// function, not necessarily where the name of the program
     /// starts.
-    unsigned getScopeLineNumber() const { return getUnsignedField(20); }
+    unsigned getScopeLineNumber() const { return getUnsignedField(19); }
 
     /// Verify - Verify that a subprogram descriptor is well formed.
     bool Verify() const;
@@ -506,11 +487,11 @@ namespace llvm {
     /// information for the function F.
     bool describes(const Function *F);
 
-    Function *getFunction() const { return getFunctionField(16); }
-    void replaceFunction(Function *F) { replaceFunctionField(16, F); }
-    DIArray getTemplateParams() const { return getFieldAs<DIArray>(17); }
+    Function *getFunction() const { return getFunctionField(15); }
+    void replaceFunction(Function *F) { replaceFunctionField(15, F); }
+    DIArray getTemplateParams() const { return getFieldAs<DIArray>(16); }
     DISubprogram getFunctionDeclaration() const {
-      return getFieldAs<DISubprogram>(18);
+      return getFieldAs<DISubprogram>(17);
     }
     MDNode *getVariablesNodes() const;
     DIArray getVariables() const;
@@ -614,15 +595,9 @@ namespace llvm {
   class DILexicalBlock : public DIScope {
   public:
     explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {}
-    DIScope getContext() const       { return getFieldAs<DIScope>(1);      }
-    unsigned getLineNumber() const   { return getUnsignedField(2);         }
-    unsigned getColumnNumber() const { return getUnsignedField(3);         }
-    StringRef getDirectory() const {
-      return getFieldAs<DIFile>(4).getDirectory();
-    }
-    StringRef getFilename() const {
-      return getFieldAs<DIFile>(4).getFilename();
-    }
+    DIScope getContext() const       { return getFieldAs<DIScope>(2);      }
+    unsigned getLineNumber() const   { return getUnsignedField(3);         }
+    unsigned getColumnNumber() const { return getUnsignedField(4);         }
     bool Verify() const;
   };
 
@@ -634,28 +609,18 @@ namespace llvm {
     DIScope getContext() const { if (getScope().isSubprogram()) return getScope(); return getScope().getContext(); }
     unsigned getLineNumber() const { return getScope().getLineNumber(); }
     unsigned getColumnNumber() const { return getScope().getColumnNumber(); }
-    StringRef getDirectory() const {
-      return getFieldAs<DIFile>(2).getDirectory();
-    }
-    StringRef getFilename() const {
-      return getFieldAs<DIFile>(2).getFilename();
-    }
-    DILexicalBlock getScope() const { return getFieldAs<DILexicalBlock>(1); }
+    DILexicalBlock getScope() const { return getFieldAs<DILexicalBlock>(2); }
     bool Verify() const;
   };
 
   /// DINameSpace - A wrapper for a C++ style name space.
   class DINameSpace : public DIScope {
+    friend class DIDescriptor;
+    void printInternal(raw_ostream &OS) const;
   public:
     explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {}
-    DIScope getContext() const     { return getFieldAs<DIScope>(1);      }
-    StringRef getName() const      { return getStringField(2);           }
-    StringRef getDirectory() const  {
-      return getFieldAs<DIFile>(3).getDirectory();
-    }
-    StringRef getFilename() const  {
-      return getFieldAs<DIFile>(3).getFilename();
-    }
+    DIScope getContext() const     { return getFieldAs<DIScope>(2);      }
+    StringRef getName() const      { return getStringField(3);           }
     unsigned getLineNumber() const { return getUnsignedField(4);         }
     bool Verify() const;
   };
diff --git a/lib/DebugInfo/DWARFFormValue.h b/include/llvm/DebugInfo/DWARFFormValue.h
similarity index 96%
rename from lib/DebugInfo/DWARFFormValue.h
rename to include/llvm/DebugInfo/DWARFFormValue.h
index b863001e4af8..eaaccfb4f33b 100644
--- a/lib/DebugInfo/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARFFormValue.h
@@ -74,7 +74,7 @@ class DWARFFormValue {
                         uint32_t *offset_ptr, const DWARFCompileUnit *cu);
   static bool isBlockForm(uint16_t form);
   static bool isDataForm(uint16_t form);
-  static const uint8_t *getFixedFormSizesForAddressSize(uint8_t addr_size);
+  static const uint8_t *getFixedFormSizes(uint8_t AddrSize, uint16_t Version);
 };
 
 }
diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h
index 21b99421b32d..0e92f79eba8f 100644
--- a/include/llvm/ExecutionEngine/GenericValue.h
+++ b/include/llvm/ExecutionEngine/GenericValue.h
@@ -35,14 +35,19 @@ struct GenericValue {
     struct IntPair  UIntPairVal;
     unsigned char   Untyped[8];
   };
-  APInt IntVal;   // also used for long doubles
-
-  GenericValue() : DoubleVal(0.0), IntVal(1,0) {}
+  APInt IntVal;   // also used for long doubles.
+  // For aggregate data types.
+  std::vector<GenericValue> AggregateVal;
+
+  // to make code faster, set GenericValue to zero could be omitted, but it is
+  // potentially can cause problems, since GenericValue to store garbage
+  // instead of zero.
+  GenericValue() : IntVal(1,0) {UIntPairVal.first = 0; UIntPairVal.second = 0;}
   explicit GenericValue(void *V) : PointerVal(V), IntVal(1,0) { }
 };
 
 inline GenericValue PTOGV(void *P) { return GenericValue(P); }
 inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; }
 
-} // End llvm namespace
+} // End llvm namespace.
 #endif
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 074b38779ae8..bca77c680945 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -68,6 +68,7 @@ class Attribute {
                            ///< 0 means unaligned (different from align(1))
     AlwaysInline,          ///< inline=always
     ByVal,                 ///< Pass structure by value
+    FixedStackSegment,     ///< Fixed-size stack segment
     InlineHint,            ///< Source said inlining was desirable
     InReg,                 ///< Force argument to be passed in register
     MinSize,               ///< Function must be optimized for size first
@@ -209,7 +210,7 @@ class AttributeSet {
   AttributeSetImpl *pImpl;
 
   /// \brief The attributes for the specified index are returned.
-  AttributeSetNode *getAttributes(unsigned Idx) const;
+  AttributeSetNode *getAttributes(unsigned Index) const;
 
   /// \brief Create an AttributeSet with the specified parameters in it.
   static AttributeSet get(LLVMContext &C,
@@ -233,35 +234,35 @@ class AttributeSet {
 
   /// \brief Return an AttributeSet with the specified parameters in it.
   static AttributeSet get(LLVMContext &C, ArrayRef<AttributeSet> Attrs);
-  static AttributeSet get(LLVMContext &C, unsigned Idx,
+  static AttributeSet get(LLVMContext &C, unsigned Index,
                           ArrayRef<Attribute::AttrKind> Kind);
-  static AttributeSet get(LLVMContext &C, unsigned Idx, AttrBuilder &B);
+  static AttributeSet get(LLVMContext &C, unsigned Index, AttrBuilder &B);
 
   /// \brief Add an attribute to the attribute set at the given index. Since
   /// attribute sets are immutable, this returns a new set.
-  AttributeSet addAttribute(LLVMContext &C, unsigned Idx,
+  AttributeSet addAttribute(LLVMContext &C, unsigned Index,
                             Attribute::AttrKind Attr) const;
 
   /// \brief Add an attribute to the attribute set at the given index. Since
   /// attribute sets are immutable, this returns a new set.
-  AttributeSet addAttribute(LLVMContext &C, unsigned Idx,
+  AttributeSet addAttribute(LLVMContext &C, unsigned Index,
                             StringRef Kind) const;
 
   /// \brief Add attributes to the attribute set at the given index. Since
   /// attribute sets are immutable, this returns a new set.
-  AttributeSet addAttributes(LLVMContext &C, unsigned Idx,
+  AttributeSet addAttributes(LLVMContext &C, unsigned Index,
                              AttributeSet Attrs) const;
 
   /// \brief Remove the specified attribute at the specified index from this
   /// attribute list. Since attribute lists are immutable, this returns the new
   /// list.
-  AttributeSet removeAttribute(LLVMContext &C, unsigned Idx, 
+  AttributeSet removeAttribute(LLVMContext &C, unsigned Index, 
                                Attribute::AttrKind Attr) const;
 
   /// \brief Remove the specified attributes at the specified index from this
   /// attribute list. Since attribute lists are immutable, this returns the new
   /// list.
-  AttributeSet removeAttributes(LLVMContext &C, unsigned Idx, 
+  AttributeSet removeAttributes(LLVMContext &C, unsigned Index, 
                                 AttributeSet Attrs) const;
 
   //===--------------------------------------------------------------------===//
@@ -272,7 +273,7 @@ class AttributeSet {
   LLVMContext &getContext() const;
 
   /// \brief The attributes for the specified index are returned.
-  AttributeSet getParamAttributes(unsigned Idx) const;
+  AttributeSet getParamAttributes(unsigned Index) const;
 
   /// \brief The attributes for the ret value are returned.
   AttributeSet getRetAttributes() const;
@@ -300,18 +301,19 @@ class AttributeSet {
   Attribute getAttribute(unsigned Index, StringRef Kind) const;
 
   /// \brief Return the alignment for the specified function parameter.
-  unsigned getParamAlignment(unsigned Idx) const;
+  unsigned getParamAlignment(unsigned Index) const;
 
   /// \brief Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
 
   /// \brief Return the attributes at the index as a string.
-  std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
+  std::string getAsString(unsigned Index, bool TargetIndependent = true,
+                          bool InAttrGrp = false) const;
 
   typedef ArrayRef<Attribute>::iterator iterator;
 
-  iterator begin(unsigned Idx) const;
-  iterator end(unsigned Idx) const;
+  iterator begin(unsigned Slot) const;
+  iterator end(unsigned Slot) const;
 
   /// operator==/!= - Provide equality predicates.
   bool operator==(const AttributeSet &RHS) const {
@@ -473,9 +475,6 @@ class AttrBuilder {
 
   bool td_empty() const              { return TargetDepAttrs.empty(); }
 
-  /// \brief Remove attributes that are used on functions only.
-  void removeFunctionOnlyAttrs();
-
   bool operator==(const AttrBuilder &B);
   bool operator!=(const AttrBuilder &B) {
     return !(*this == B);
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index 4d3016c72f22..b0def6be3f4d 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -22,6 +22,8 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/DataTypes.h"
 
@@ -169,13 +171,13 @@ class DataLayout : public ImmutablePass {
   /// Initialize target data from properties stored in the module.
   explicit DataLayout(const Module *M);
 
-  DataLayout(const DataLayout &TD) :
+  DataLayout(const DataLayout &DL) :
     ImmutablePass(ID),
-    LittleEndian(TD.isLittleEndian()),
-    StackNaturalAlign(TD.StackNaturalAlign),
-    LegalIntWidths(TD.LegalIntWidths),
-    Alignments(TD.Alignments),
-    Pointers(TD.Pointers),
+    LittleEndian(DL.isLittleEndian()),
+    StackNaturalAlign(DL.StackNaturalAlign),
+    LegalIntWidths(DL.LegalIntWidths),
+    Alignments(DL.Alignments),
+    Pointers(DL.Pointers),
     LayoutMap(0)
   { }
 
@@ -350,6 +352,10 @@ class DataLayout : public ImmutablePass {
   /// type.
   Type *getIntPtrType(Type *) const;
 
+  /// getSmallestLegalIntType - Return the smallest integer type with size at
+  /// least as big as Width bits.
+  Type *getSmallestLegalIntType(LLVMContext &C, unsigned Width = 0) const;
+
   /// getIndexedOffset - return the offset from the beginning of the type for
   /// the specified indices.  This is used to implement getelementptr.
   uint64_t getIndexedOffset(Type *Ty, ArrayRef<Value *> Indices) const;
@@ -420,9 +426,52 @@ class StructLayout {
 
 private:
   friend class DataLayout;   // Only DataLayout can create this class
-  StructLayout(StructType *ST, const DataLayout &TD);
+  StructLayout(StructType *ST, const DataLayout &DL);
 };
 
+
+// The implementation of this method is provided inline as it is particularly
+// well suited to constant folding when called on a specific Type subclass.
+inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
+  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+  switch (Ty->getTypeID()) {
+  case Type::LabelTyID:
+    return getPointerSizeInBits(0);
+  case Type::PointerTyID:
+    return getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace());
+  case Type::ArrayTyID: {
+    ArrayType *ATy = cast<ArrayType>(Ty);
+    return ATy->getNumElements() *
+           getTypeAllocSizeInBits(ATy->getElementType());
+  }
+  case Type::StructTyID:
+    // Get the layout annotation... which is lazily created on demand.
+    return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+  case Type::IntegerTyID:
+    return cast<IntegerType>(Ty)->getBitWidth();
+  case Type::HalfTyID:
+    return 16;
+  case Type::FloatTyID:
+    return 32;
+  case Type::DoubleTyID:
+  case Type::X86_MMXTyID:
+    return 64;
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:
+    return 128;
+    // In memory objects this is always aligned to a higher boundary, but
+  // only 80 bits contain information.
+  case Type::X86_FP80TyID:
+    return 80;
+  case Type::VectorTyID: {
+    VectorType *VTy = cast<VectorType>(Ty);
+    return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType());
+  }
+  default:
+    llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
+  }
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index cde39ccd3c52..5664f7925027 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -117,28 +117,33 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   // Loads.  These don't map directly to GCC builtins because they represent the
   // source address with a single pointer.
   def int_ppc_altivec_lvx :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
   def int_ppc_altivec_lvxl :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
   def int_ppc_altivec_lvebx :
-              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
   def int_ppc_altivec_lvehx :
-              Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
   def int_ppc_altivec_lvewx :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
 
   // Stores.  These don't map directly to GCC builtins because they represent the
   // source address with a single pointer.
   def int_ppc_altivec_stvx :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
   def int_ppc_altivec_stvxl :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
   def int_ppc_altivec_stvebx :
-              Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
   def int_ppc_altivec_stvehx :
-              Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
   def int_ppc_altivec_stvewx :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
 
   // Comparisons setting a vector.
   def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">,
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index d2463c0efa14..69e0ab4fa2ed 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -2550,7 +2550,9 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 
 //===----------------------------------------------------------------------===//
-// RDRAND intrinsics. Return a random value and whether it is valid.
+// RDRAND intrinsics - Return a random value and whether it is valid.
+// RDSEED intrinsics - Return a NIST SP800-90B & C compliant random value and
+// whether it is valid.
 
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   // These are declared side-effecting so they don't get eliminated by CSE or
@@ -2558,6 +2560,9 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_rdrand_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>;
   def int_x86_rdrand_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>;
   def int_x86_rdrand_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
+  def int_x86_rdseed_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>;
+  def int_x86_rdseed_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>;
+  def int_x86_rdseed_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2570,4 +2575,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[], [], []>;
   def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">,
               Intrinsic<[], [llvm_i8_ty], [IntrNoReturn]>;
+  def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">,
+              Intrinsic<[llvm_i32_ty], [], []>;
 }
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
index 604051b90a5f..074db78a767a 100644
--- a/include/llvm/IR/MDBuilder.h
+++ b/include/llvm/IR/MDBuilder.h
@@ -156,6 +156,41 @@ class MDBuilder {
     return MDNode::get(Context, Vals);
   }
 
+  /// \brief Return metadata for a TBAA struct node in the type DAG
+  /// with the given name, a list of pairs (offset, field type in the type DAG).
+  MDNode *createTBAAStructTypeNode(StringRef Name,
+             ArrayRef<std::pair<uint64_t, MDNode*> > Fields) {
+    SmallVector<Value *, 4> Ops(Fields.size() * 2 + 1);
+    Type *Int64 = IntegerType::get(Context, 64);
+    Ops[0] = createString(Name);
+    for (unsigned i = 0, e = Fields.size(); i != e; ++i) {
+      Ops[i * 2 + 1] = ConstantInt::get(Int64, Fields[i].first);
+      Ops[i * 2 + 2] = Fields[i].second;
+    }
+    return MDNode::get(Context, Ops);
+  }
+
+  /// \brief Return metadata for a TBAA scalar type node with the
+  /// given name, an offset and a parent in the TBAA type DAG.
+  MDNode *createTBAAScalarTypeNode(StringRef Name, uint64_t Offset,
+                                   MDNode *Parent) {
+    SmallVector<Value *, 4> Ops(3);
+    Type *Int64 = IntegerType::get(Context, 64);
+    Ops[0] = createString(Name);
+    Ops[1] = ConstantInt::get(Int64, Offset);
+    Ops[2] = Parent;
+    return MDNode::get(Context, Ops);
+  }
+
+  /// \brief Return metadata for a TBAA tag node with the given
+  /// base type, access type and offset relative to the base type.
+  MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
+                                  uint64_t Offset) {
+    Type *Int64 = IntegerType::get(Context, 64);
+    Value *Ops[3] = { BaseType, AccessType, ConstantInt::get(Int64, Offset) };
+    return MDNode::get(Context, Ops);
+  }
+
 };
 
 } // end namespace llvm
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
index 33a69c468665..4bc7ce500058 100644
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@@ -149,14 +149,14 @@ class Use {
 // casting operators.
 template<> struct simplify_type<Use> {
   typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const Use &Val) {
-    return static_cast<SimpleType>(Val.get());
+  static SimpleType getSimplifiedValue(Use &Val) {
+    return Val.get();
   }
 };
 template<> struct simplify_type<const Use> {
-  typedef Value* SimpleType;
+  typedef /*const*/ Value* SimpleType;
   static SimpleType getSimplifiedValue(const Use &Val) {
-    return static_cast<SimpleType>(Val.get());
+    return Val.get();
   }
 };
 
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index a4d0a5d7df03..505bdeb178e9 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -183,27 +183,17 @@ class User : public Value {
 
 template<> struct simplify_type<User::op_iterator> {
   typedef Value* SimpleType;
-
-  static SimpleType getSimplifiedValue(const User::op_iterator &Val) {
-    return static_cast<SimpleType>(Val->get());
+  static SimpleType getSimplifiedValue(User::op_iterator &Val) {
+    return Val->get();
   }
 };
-
-template<> struct simplify_type<const User::op_iterator>
-  : public simplify_type<User::op_iterator> {};
-
 template<> struct simplify_type<User::const_op_iterator> {
-  typedef Value* SimpleType;
-
-  static SimpleType getSimplifiedValue(const User::const_op_iterator &Val) {
-    return static_cast<SimpleType>(Val->get());
+  typedef /*const*/ Value* SimpleType;
+  static SimpleType getSimplifiedValue(User::const_op_iterator &Val) {
+    return Val->get();
   }
 };
 
-template<> struct simplify_type<const User::const_op_iterator>
-  : public simplify_type<User::const_op_iterator> {};
-
-
 // value_use_iterator::getOperandNo - Requires the definition of the User class.
 template<typename UserTy>
 unsigned value_use_iterator<UserTy>::getOperandNo() const {
diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h
new file mode 100644
index 000000000000..e2ae5f7164b2
--- /dev/null
+++ b/include/llvm/IRReader/IRReader.h
@@ -0,0 +1,55 @@
+//===---- llvm/IRReader/IRReader.h - Reader for LLVM IR files ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions for reading LLVM IR. They support both
+// Bitcode and Assembly, automatically detecting the input format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IRREADER_IRREADER_H
+#define LLVM_IRREADER_IRREADER_H
+
+#include <string>
+
+namespace llvm {
+
+class Module;
+class MemoryBuffer;
+class SMDiagnostic;
+class LLVMContext;
+
+/// If the given MemoryBuffer holds a bitcode image, return a Module for it
+/// which does lazy deserialization of function bodies.  Otherwise, attempt to
+/// parse it as LLVM Assembly and return a fully populated Module. This
+/// function *always* takes ownership of the given MemoryBuffer.
+Module *getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
+                        LLVMContext &Context);
+
+/// If the given file holds a bitcode image, return a Module
+/// for it which does lazy deserialization of function bodies.  Otherwise,
+/// attempt to parse it as LLVM Assembly and return a fully populated
+/// Module.
+Module *getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
+                            LLVMContext &Context);
+
+/// If the given MemoryBuffer holds a bitcode image, return a Module
+/// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
+/// a Module for it. This function *always* takes ownership of the given
+/// MemoryBuffer.
+Module *ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, LLVMContext &Context);
+
+/// If the given file holds a bitcode image, return a Module for it.
+/// Otherwise, attempt to parse it as LLVM Assembly and return a Module
+/// for it.
+Module *ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
+                    LLVMContext &Context);
+
+}
+
+#endif
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index a6dbb0c7d2be..5b2cd603c3f9 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -91,7 +91,6 @@ void initializeCFGViewerPass(PassRegistry&);
 void initializeCalculateSpillWeightsPass(PassRegistry&);
 void initializeCallGraphAnalysisGroup(PassRegistry&);
 void initializeCodeGenPreparePass(PassRegistry&);
-void initializeCodePlacementOptPass(PassRegistry&);
 void initializeConstantMergePass(PassRegistry&);
 void initializeConstantPropagationPass(PassRegistry&);
 void initializeMachineCopyPropagationPass(PassRegistry&);
@@ -272,6 +271,7 @@ void initializeInstSimplifierPass(PassRegistry&);
 void initializeUnpackMachineBundlesPass(PassRegistry&);
 void initializeFinalizeMachineBundlesPass(PassRegistry&);
 void initializeLoopVectorizePass(PassRegistry&);
+void initializeSLPVectorizerPass(PassRegistry&);
 void initializeBBVectorizePass(PassRegistry&);
 void initializeMachineFunctionPrinterPassPass(PassRegistry&);
 }
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 1f017e471de5..ca1c13924e96 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -161,6 +161,7 @@ namespace {
       (void) llvm::createMemDepPrinter();
       (void) llvm::createInstructionSimplifierPass();
       (void) llvm::createLoopVectorizePass();
+      (void) llvm::createSLPVectorizerPass();
       (void) llvm::createBBVectorizePass();
 
       (void)new llvm::IntervalPartition();
diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h
index 8bf9efa8e2c7..679638427d67 100644
--- a/include/llvm/Linker.h
+++ b/include/llvm/Linker.h
@@ -6,10 +6,6 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the interface to the module/file/archive linker.
-//
-//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LINKER_H
 #define LLVM_LINKER_H
@@ -19,7 +15,6 @@
 #include <vector>
 
 namespace llvm {
-  namespace sys { class Path; }
 
 class Module;
 class LLVMContext;
@@ -31,8 +26,7 @@ class StringRef;
 /// In this case the Linker still retains ownership of the Module. If the
 /// releaseModule() method is used, the ownership of the Module is transferred
 /// to the caller and the Linker object is only suitable for destruction.
-/// The Linker can link Modules from memory.  It retains a set of search paths
-/// in which to find any libraries presented to it. By default, the linker
+/// The Linker can link Modules from memory. By default, the linker
 /// will generate error and warning messages to stderr but this capability can
 /// be turned off with the QuietWarnings and QuietErrors flags. It can also be
 /// instructed to verbosely print out the linking actions it is taking with
@@ -96,16 +90,10 @@ class Linker {
     /// must arrange for its destruct. After this method is called, the Linker
     /// terminates the linking session for the returned Module. It will no
     /// longer utilize the returned Module but instead resets itself for
-    /// subsequent linking as if the constructor had been called. The Linker's
-    /// LibPaths and flags to be reset, and memory will be released.
+    /// subsequent linking as if the constructor had been called.
     /// @brief Release the linked/composite module.
     Module* releaseModule();
 
-    /// This method gets the list of libraries that form the path that the
-    /// Linker will search when it is presented with a library name.
-    /// @brief Get the Linkers library path
-    const std::vector<sys::Path>& getLibPaths() const { return LibPaths; }
-
     /// This method returns an error string suitable for printing to the user.
     /// The return value will be empty unless an error occurred in one of the
     /// LinkIn* methods. In those cases, the LinkIn* methods will have returned
@@ -120,31 +108,6 @@ class Linker {
   /// @name Mutators
   /// @{
   public:
-    /// Add a path to the list of paths that the Linker will search. The Linker
-    /// accumulates the set of libraries added
-    /// library paths for the target platform. The standard libraries will
-    /// always be searched last. The added libraries will be searched in the
-    /// order added.
-    /// @brief Add a path.
-    void addPath(const sys::Path& path);
-
-    /// Add a set of paths to the list of paths that the linker will search. The
-    /// Linker accumulates the set of libraries added. The \p paths will be
-    /// added to the end of the Linker's list. Order will be retained.
-    /// @brief Add a set of paths.
-    void addPaths(const std::vector<std::string>& paths);
-
-    /// This method augments the Linker's list of library paths with the system
-    /// paths of the host operating system, include LLVM_LIB_SEARCH_PATH.
-    /// @brief Add the system paths.
-    void addSystemPaths();
-
-    /// Control optional linker behavior by setting a group of flags. The flags
-    /// are defined in the ControlFlags enumeration.
-    /// @see ControlFlags
-    /// @brief Set control flags.
-    void setFlags(unsigned flags) { Flags = flags; }
-
     /// This method links the \p Src module into the Linker's Composite module
     /// by calling LinkModules.
     /// @see LinkModules
@@ -185,7 +148,6 @@ class Linker {
   private:
     LLVMContext& Context; ///< The context for global information
     Module* Composite; ///< The composite module linked together
-    std::vector<sys::Path> LibPaths; ///< The library search paths
     unsigned Flags;    ///< Flags to control optional behavior.
     std::string Error; ///< Text of error that occurred.
     std::string ProgramName; ///< Name of the program being linked
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 43fbdc9301ac..38a70f0adf01 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -451,7 +451,7 @@ class MCLEBFragment : public MCFragment {
 
   SmallString<8> Contents;
 public:
-  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSectionData *SD)
+  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSectionData *SD = 0)
     : MCFragment(FT_LEB, SD),
       Value(&Value_), IsSigned(IsSigned_) { Contents.push_back(0); }
 
@@ -487,7 +487,7 @@ class MCDwarfLineAddrFragment : public MCFragment {
 
 public:
   MCDwarfLineAddrFragment(int64_t _LineDelta, const MCExpr &_AddrDelta,
-                      MCSectionData *SD)
+                      MCSectionData *SD = 0)
     : MCFragment(FT_Dwarf, SD),
       LineDelta(_LineDelta), AddrDelta(&_AddrDelta) { Contents.push_back(0); }
 
@@ -518,7 +518,7 @@ class MCDwarfCallFrameFragment : public MCFragment {
   SmallString<8> Contents;
 
 public:
-  MCDwarfCallFrameFragment(const MCExpr &_AddrDelta,  MCSectionData *SD)
+  MCDwarfCallFrameFragment(const MCExpr &_AddrDelta,  MCSectionData *SD = 0)
     : MCFragment(FT_DwarfFrame, SD),
       AddrDelta(&_AddrDelta) { Contents.push_back(0); }
 
@@ -590,6 +590,10 @@ class MCSectionData : public ilist_node<MCSectionData> {
   /// it.
   unsigned HasInstructions : 1;
 
+  /// Mapping from subsection number to insertion point for subsection numbers
+  /// below that number.
+  SmallVector<std::pair<unsigned, MCFragment *>, 1> SubsectionFragmentMap;
+
   /// @}
 
 public:
@@ -633,6 +637,8 @@ class MCSectionData : public ilist_node<MCSectionData> {
 
   bool empty() const { return Fragments.empty(); }
 
+  iterator getSubsectionInsertionPoint(unsigned Subsection);
+
   bool isBundleLocked() const {
     return BundleLockState != NotBundleLocked;
   }
diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h
index 682cf7cd76c6..ae5bf0bc2069 100644
--- a/include/llvm/MC/MCAtom.h
+++ b/include/llvm/MC/MCAtom.h
@@ -46,8 +46,8 @@ class MCAtom {
     : Type(T), Parent(P), Begin(B), End(E) { }
 
 public:
-  bool isTextAtom() { return Type == TextAtom; }
-  bool isDataAtom() { return Type == DataAtom; }
+  bool isTextAtom() const { return Type == TextAtom; }
+  bool isDataAtom() const { return Type == DataAtom; }
 
   void addInst(const MCInst &I, uint64_t Address, unsigned Size);
   void addData(const MCData &D);
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
index 6fb2d22be2e7..55c05b0363dd 100644
--- a/include/llvm/MC/MCELFStreamer.h
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -50,7 +50,8 @@ class MCELFStreamer : public MCObjectStreamer {
 
   virtual void InitSections();
   virtual void InitToTextSection();
-  virtual void ChangeSection(const MCSection *Section);
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection);
   virtual void EmitLabel(MCSymbol *Symbol);
   virtual void EmitDebugLabel(MCSymbol *Symbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index b5bfed18eca4..a2c5bd3f7661 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -216,7 +216,9 @@ class MCSymbolRefExpr : public MCExpr {
     VK_Mips_GOT_HI16,
     VK_Mips_GOT_LO16,
     VK_Mips_CALL_HI16,
-    VK_Mips_CALL_LO16
+    VK_Mips_CALL_LO16,
+
+    VK_COFF_IMGREL32 // symbol@imgrel (image-relative)
   };
 
 private:
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index e91c6a2e8ee7..4766815da556 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -171,7 +171,7 @@ class MCInst {
   void clear() { Operands.clear(); }
   size_t size() { return Operands.size(); }
 
-  typedef SmallVector<MCOperand, 8>::iterator iterator;
+  typedef SmallVectorImpl<MCOperand>::iterator iterator;
   iterator begin() { return Operands.begin(); }
   iterator end()   { return Operands.end();   }
   iterator insert(iterator I, const MCOperand &Op) {
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index c8d748420e31..a5853b602c87 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -46,10 +46,15 @@ class MCObjectFileInfo {
   unsigned FDEEncoding;
   unsigned FDECFIEncoding;
   unsigned TTypeEncoding;
-  // Section flags for eh_frame
+
+  /// Section flags for eh_frame
   unsigned EHSectionType;
   unsigned EHSectionFlags;
 
+  /// CompactUnwindDwarfEHFrameOnly - Compact unwind encoding indicating that we
+  /// should emit only an EH frame.
+  unsigned CompactUnwindDwarfEHFrameOnly;
+
   /// TextSection - Section directive for standard text.
   ///
   const MCSection *TextSection;
@@ -201,6 +206,10 @@ class MCObjectFileInfo {
   }
   unsigned getTTypeEncoding() const { return TTypeEncoding; }
 
+  unsigned getCompactUnwindDwarfEHFrameOnly() const {
+    return CompactUnwindDwarfEHFrameOnly;
+  }
+
   const MCSection *getTextSection() const { return TextSection; }
   const MCSection *getDataSection() const { return DataSection; }
   const MCSection *getBSSSection() const { return BSSSection; }
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index f06c49ff082a..22a283986975 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_MC_MCOBJECTSTREAMER_H
 #define LLVM_MC_MCOBJECTSTREAMER_H
 
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCStreamer.h"
 
 namespace llvm {
@@ -32,6 +33,7 @@ class raw_ostream;
 class MCObjectStreamer : public MCStreamer {
   MCAssembler *Assembler;
   MCSectionData *CurSectionData;
+  MCSectionData::iterator CurInsertionPoint;
 
   virtual void EmitInstToData(const MCInst &Inst) = 0;
   virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
@@ -56,6 +58,11 @@ class MCObjectStreamer : public MCStreamer {
 
   MCFragment *getCurrentFragment() const;
 
+  void insert(MCFragment *F) const {
+    CurSectionData->getFragmentList().insert(CurInsertionPoint, F);
+    F->setParent(CurSectionData);
+  }
+
   /// Get a data fragment to write into, creating a new one if the current
   /// fragment is not a data fragment.
   MCDataFragment *getOrCreateDataFragment() const;
@@ -76,7 +83,8 @@ class MCObjectStreamer : public MCStreamer {
   virtual void EmitULEB128Value(const MCExpr *Value);
   virtual void EmitSLEB128Value(const MCExpr *Value);
   virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void ChangeSection(const MCSection *Section);
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection);
   virtual void EmitInstruction(const MCInst &Inst);
 
   /// \brief Emit an instruction to a special fragment, because this instruction
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index d7e3902ac478..601f8f7734b4 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -151,6 +151,13 @@ class MCAsmParser {
   virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0;
   bool parseExpression(const MCExpr *&Res);
 
+  /// parsePrimaryExpr - Parse a primary expression.
+  ///
+  /// @param Res - The value of the expression. The result is undefined
+  /// on error.
+  /// @result - False on success.
+  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) = 0;
+
   /// parseParenExpression - Parse an arbitrary expression, assuming that an
   /// initial '(' has already been consumed.
   ///
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index c78cd976f2ad..88d13e348dc2 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -37,15 +37,7 @@ class MCParsedAsmOperand {
   void setMCOperandNum (unsigned OpNum) { MCOperandNum = OpNum; }
   unsigned getMCOperandNum() { return MCOperandNum; }
 
-  unsigned getNameLen() {
-    assert (getStartLoc().isValid() && "Invalid StartLoc!");
-    assert (getEndLoc().isValid() && "Invalid EndLoc!");
-    return getEndLoc().getPointer() - getStartLoc().getPointer();
-  }
-
-  StringRef getName() {
-    return StringRef(getStartLoc().getPointer(), getNameLen());
-  }
+  virtual StringRef getSymName() { return StringRef(); }
 
   /// isToken - Is this a token operand?
   virtual bool isToken() const = 0;
@@ -57,19 +49,12 @@ class MCParsedAsmOperand {
 
   /// isMem - Is this a memory operand?
   virtual bool isMem() const = 0;
-  virtual unsigned getMemSize() const { return 0; }
 
   /// getStartLoc - Get the location of the first token of this operand.
   virtual SMLoc getStartLoc() const = 0;
   /// getEndLoc - Get the location of the last token of this operand.
   virtual SMLoc getEndLoc() const = 0;
 
-  /// needAsmRewrite - AsmRewrites happen in both the target-independent and
-  /// target-dependent parsers.  The target-independent parser calls this
-  /// function to determine if the target-dependent parser has already taken
-  /// care of the rewrites.  Only valid when parsing MS-style inline assembly.
-  virtual bool needAsmRewrite() const { return true; }
-
   /// needAddressOf - Do we need to emit code to get the address of the
   /// variable/label?   Only valid when parsing MS-style inline assembly.
   virtual bool needAddressOf() const { return false; }
@@ -82,10 +67,6 @@ class MCParsedAsmOperand {
   /// getOffsetOfLoc - Get the location of the offset operator.
   virtual SMLoc getOffsetOfLoc() const { return SMLoc(); }
 
-  /// needSizeDirective - Do we need to emit a sizing directive for this
-  /// operand?  Only valid when parsing MS-style inline assembly.
-  virtual bool needSizeDirective() const { return false; }
-
   /// print - Print a debug representation of the operand to the given stream.
   virtual void print(raw_ostream &OS) const = 0;
   /// dump - Print to the debug stream.
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index e5754249e91b..de2678adad61 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -20,6 +20,7 @@
 
 namespace llvm {
   class MCAsmInfo;
+  class MCExpr;
   class raw_ostream;
 
   /// MCSection - Instances of this class represent a uniqued identifier for a
@@ -48,7 +49,8 @@ namespace llvm {
     SectionVariant getVariant() const { return Variant; }
 
     virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                      raw_ostream &OS) const = 0;
+                                      raw_ostream &OS,
+                                      const MCExpr *Subsection) const = 0;
 
     // Convenience routines to get label names for the beginning/end of a
     // section.
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index 07c47144cbdc..50e33a5b040a 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -60,7 +60,8 @@ namespace llvm {
     int getSelection () const { return Selection; }
 
     virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                      raw_ostream &OS) const;
+                                      raw_ostream &OS,
+                                      const MCExpr *Subsection) const;
     virtual bool UseCodeAlign() const;
     virtual bool isVirtualSection() const;
 
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 4b8b849c79ed..59799158ad3c 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -70,7 +70,8 @@ class MCSectionELF : public MCSection {
   const MCSymbol *getGroup() const { return Group; }
 
   void PrintSwitchToSection(const MCAsmInfo &MAI,
-                            raw_ostream &OS) const;
+                            raw_ostream &OS,
+                            const MCExpr *Subsection) const;
   virtual bool UseCodeAlign() const;
   virtual bool isVirtualSection() const;
 
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 898f5714907f..b68bd8596801 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -175,7 +175,8 @@ class MCSectionMachO : public MCSection {
                                            unsigned  &StubSize); // Out.
 
   virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                    raw_ostream &OS) const;
+                                    raw_ostream &OS,
+                                    const MCExpr *Subsection) const;
   virtual bool UseCodeAlign() const;
   virtual bool isVirtualSection() const;
 
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index a069a2b0cafa..2cab481c3d8d 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -37,6 +37,8 @@ namespace llvm {
   class raw_ostream;
   class formatted_raw_ostream;
 
+  typedef std::pair<const MCSection *, const MCExpr *> MCSectionSubPair;
+
   /// MCStreamer - Streaming machine code generation interface.  This interface
   /// is intended to provide a programatic interface that is very similar to the
   /// level that an assembler .s file provides.  It has callbacks to emit bytes,
@@ -86,8 +88,7 @@ namespace llvm {
 
     /// SectionStack - This is stack of current and previous section
     /// values saved by PushSection.
-    SmallVector<std::pair<const MCSection *,
-                const MCSection *>, 4> SectionStack;
+    SmallVector<std::pair<MCSectionSubPair, MCSectionSubPair>, 4> SectionStack;
 
     bool AutoInitSections;
 
@@ -174,25 +175,25 @@ namespace llvm {
 
     /// getCurrentSection - Return the current section that the streamer is
     /// emitting code to.
-    const MCSection *getCurrentSection() const {
+    MCSectionSubPair getCurrentSection() const {
       if (!SectionStack.empty())
         return SectionStack.back().first;
-      return NULL;
+      return MCSectionSubPair();
     }
 
     /// getPreviousSection - Return the previous section that the streamer is
     /// emitting code to.
-    const MCSection *getPreviousSection() const {
+    MCSectionSubPair getPreviousSection() const {
       if (!SectionStack.empty())
         return SectionStack.back().second;
-      return NULL;
+      return MCSectionSubPair();
     }
 
     /// ChangeSection - Update streamer for a new active section.
     ///
     /// This is called by PopSection and SwitchSection, if the current
     /// section changes.
-    virtual void ChangeSection(const MCSection *) = 0;
+    virtual void ChangeSection(const MCSection *, const MCExpr *) = 0;
 
     /// pushSection - Save the current and previous section on the
     /// section stack.
@@ -208,11 +209,19 @@ namespace llvm {
     bool PopSection() {
       if (SectionStack.size() <= 1)
         return false;
-      const MCSection *oldSection = SectionStack.pop_back_val().first;
-      const MCSection *curSection = SectionStack.back().first;
+      MCSectionSubPair oldSection = SectionStack.pop_back_val().first;
+      MCSectionSubPair curSection = SectionStack.back().first;
 
       if (oldSection != curSection)
-        ChangeSection(curSection);
+        ChangeSection(curSection.first, curSection.second);
+      return true;
+    }
+
+    bool SubSection(const MCExpr *Subsection) {
+      if (SectionStack.empty())
+        return false;
+
+      SwitchSection(SectionStack.back().first.first, Subsection);
       return true;
     }
 
@@ -220,25 +229,26 @@ namespace llvm {
     /// @p Section.  This is required to update CurSection.
     ///
     /// This corresponds to assembler directives like .section, .text, etc.
-    void SwitchSection(const MCSection *Section) {
+    void SwitchSection(const MCSection *Section, const MCExpr *Subsection = 0) {
       assert(Section && "Cannot switch to a null section!");
-      const MCSection *curSection = SectionStack.back().first;
+      MCSectionSubPair curSection = SectionStack.back().first;
       SectionStack.back().second = curSection;
-      if (Section != curSection) {
-        SectionStack.back().first = Section;
-        ChangeSection(Section);
+      if (MCSectionSubPair(Section, Subsection) != curSection) {
+        SectionStack.back().first = MCSectionSubPair(Section, Subsection);
+        ChangeSection(Section, Subsection);
       }
     }
 
     /// SwitchSectionNoChange - Set the current section where code is being
     /// emitted to @p Section.  This is required to update CurSection. This
     /// version does not call ChangeSection.
-    void SwitchSectionNoChange(const MCSection *Section) {
+    void SwitchSectionNoChange(const MCSection *Section,
+                               const MCExpr *Subsection = 0) {
       assert(Section && "Cannot switch to a null section!");
-      const MCSection *curSection = SectionStack.back().first;
+      MCSectionSubPair curSection = SectionStack.back().first;
       SectionStack.back().second = curSection;
-      if (Section != curSection)
-        SectionStack.back().first = Section;
+      if (MCSectionSubPair(Section, Subsection) != curSection)
+        SectionStack.back().first = MCSectionSubPair(Section, Subsection);
     }
 
     /// Initialize the streamer.
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index 4c5b17612569..6e878df3cb1e 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -22,6 +22,7 @@ class MCInst;
 template <typename T> class SmallVectorImpl;
 
 enum AsmRewriteKind {
+  AOK_Delete = 0,     // Rewrite should be ignored.
   AOK_Align,          // Rewrite align as .align.
   AOK_DotOperator,    // Rewrite a dot operator expression as an immediate.
                       // E.g., [eax].foo.bar -> [eax].8
@@ -34,6 +35,19 @@ enum AsmRewriteKind {
   AOK_Skip            // Skip emission (e.g., offset/type operators).
 };
 
+const char AsmRewritePrecedence [] = {
+  0, // AOK_Delete
+  1, // AOK_Align
+  1, // AOK_DotOperator
+  1, // AOK_Emit
+  3, // AOK_Imm
+  3, // AOK_ImmPrefix
+  2, // AOK_Input
+  2, // AOK_Output
+  4, // AOK_SizeDirective
+  1  // AOK_Skip
+};
+
 struct AsmRewrite {
   AsmRewriteKind Kind;
   SMLoc Loc;
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index 11df5749d450..f13e7d548026 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -11,7 +11,9 @@
 #define LLVM_MC_MCWINCOFFOBJECTWRITER_H
 
 namespace llvm {
+  class MCFixup;
   class MCObjectWriter;
+  class MCValue;
   class raw_ostream;
 
   class MCWinCOFFObjectTargetWriter {
@@ -24,7 +26,9 @@ namespace llvm {
     virtual ~MCWinCOFFObjectTargetWriter() {}
 
     unsigned getMachine() const { return Machine; }
-    virtual unsigned getRelocType(unsigned FixupKind) const = 0;
+    virtual unsigned getRelocType(const MCValue &Target,
+                                  const MCFixup &Fixup,
+                                  bool IsCrossSection) const = 0;
   };
 
   /// \brief Construct a new Win COFF writer instance.
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 37ae03b45ca2..8862c8b76296 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -62,10 +62,8 @@ struct SubtargetInfoKV {
 ///
 /// SubtargetFeatures - Manages the enabling and disabling of subtarget
 /// specific features.  Features are encoded as a string of the form
-///   "cpu,+attr1,+attr2,-attr3,...,+attrN"
+///   "+attr1,+attr2,-attr3,...,+attrN"
 /// A comma separates each feature from the next (all lowercase.)
-/// The first feature is always the CPU subtype (eg. pentiumm).  If the CPU
-/// value is "generic" then the CPU subtype should be generic for the target.
 /// Each of the remaining features is prefixed with + or - indicating whether
 /// that feature should be enabled or disabled contrary to the cpu
 /// specification.
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index 8bbcd8b4d4c6..78fcf6feb851 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -41,11 +41,17 @@ class Binary {
     // Object and children.
     ID_StartObjects,
     ID_COFF,
+
     ID_ELF32L, // ELF 32-bit, little endian
     ID_ELF32B, // ELF 32-bit, big endian
     ID_ELF64L, // ELF 64-bit, little endian
     ID_ELF64B, // ELF 64-bit, big endian
-    ID_MachO,
+
+    ID_MachO32L, // MachO 32-bit, little endian
+    ID_MachO32B, // MachO 32-bit, big endian
+    ID_MachO64L, // MachO 64-bit, little endian
+    ID_MachO64B, // MachO 64-bit, big endian
+
     ID_EndObjects
   };
 
@@ -56,6 +62,13 @@ class Binary {
       return is64Bits ? ID_ELF64B : ID_ELF32B;
   }
 
+  static unsigned int getMachOType(bool isLE, bool is64Bits) {
+    if (isLE)
+      return is64Bits ? ID_MachO64L : ID_MachO32L;
+    else
+      return is64Bits ? ID_MachO64B : ID_MachO32B;
+  }
+
 public:
   virtual ~Binary();
 
@@ -79,7 +92,7 @@ class Binary {
   }
 
   bool isMachO() const {
-    return TypeID == ID_MachO;
+    return TypeID >= ID_MachO32L && TypeID <= ID_MachO64B;
   }
 
   bool isCOFF() const {
@@ -87,7 +100,8 @@ class Binary {
   }
 
   bool isLittleEndian() const {
-    return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B);
+    return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B ||
+             TypeID == ID_MachO32B || TypeID == ID_MachO64B);
   }
 };
 
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index 36e35f574816..02840230f5c0 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -81,9 +81,8 @@ template<class ELFT>
 struct ELFDataTypeTypedefHelper;
 
 /// ELF 32bit types.
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, false> >
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, MaxAlign, false> >
   : ELFDataTypeTypedefHelperCommon<TargetEndianness, MaxAlign> {
   typedef uint32_t value_type;
   typedef support::detail::packed_endian_specific_integral
@@ -95,9 +94,8 @@ struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, false> >
 };
 
 /// ELF 64bit types.
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, true> >
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, MaxAlign, true> >
   : ELFDataTypeTypedefHelperCommon<TargetEndianness, MaxAlign> {
   typedef uint64_t value_type;
   typedef support::detail::packed_endian_specific_integral
@@ -109,27 +107,29 @@ struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, true> >
 };
 
 // I really don't like doing this, but the alternative is copypasta.
-#define LLVM_ELF_IMPORT_TYPES(ELFT) \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Addr Elf_Addr; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Off Elf_Off; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Half Elf_Half; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Word Elf_Word; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Sword Elf_Sword; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Xword Elf_Xword; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Sxword Elf_Sxword;
-
-// This is required to get template types into a macro :(
-#define LLVM_ELF_COMMA ,
-
-  // Section header.
+#define LLVM_ELF_IMPORT_TYPES(E, M, W)                                         \
+typedef typename ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Addr Elf_Addr; \
+typedef typename ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Off Elf_Off;   \
+typedef typename ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Half Elf_Half; \
+typedef typename ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Word Elf_Word; \
+typedef typename                                                               \
+  ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Sword Elf_Sword;              \
+typedef typename                                                               \
+  ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Xword Elf_Xword;              \
+typedef typename                                                               \
+  ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Sxword Elf_Sxword;
+
+#define LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)                                       \
+  LLVM_ELF_IMPORT_TYPES(ELFT::TargetEndianness, ELFT::MaxAlignment,            \
+  ELFT::Is64Bits)
+
+// Section header.
 template<class ELFT>
 struct Elf_Shdr_Base;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Shdr_Base<ELFT<TargetEndianness, MaxAlign, false> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Shdr_Base<ELFType<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Word sh_name;     // Section name (index into string table)
   Elf_Word sh_type;     // Section type (SHT_*)
   Elf_Word sh_flags;    // Section flags (SHF_*)
@@ -142,11 +142,9 @@ struct Elf_Shdr_Base<ELFT<TargetEndianness, MaxAlign, false> > {
   Elf_Word sh_entsize;  // Size of records contained within the section
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Shdr_Base<ELFT<TargetEndianness, MaxAlign, true> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Shdr_Base<ELFType<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Word  sh_name;     // Section name (index into string table)
   Elf_Word  sh_type;     // Section type (SHT_*)
   Elf_Xword sh_flags;    // Section flags (SHF_*)
@@ -175,11 +173,9 @@ struct Elf_Shdr_Impl : Elf_Shdr_Base<ELFT> {
 template<class ELFT>
 struct Elf_Sym_Base;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Sym_Base<ELFT<TargetEndianness, MaxAlign, false> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Sym_Base<ELFType<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Word      st_name;  // Symbol name (index into string table)
   Elf_Addr      st_value; // Value or address associated with the symbol
   Elf_Word      st_size;  // Size of the symbol
@@ -188,11 +184,9 @@ struct Elf_Sym_Base<ELFT<TargetEndianness, MaxAlign, false> > {
   Elf_Half      st_shndx; // Which section (header table index) it's defined in
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Sym_Base<ELFT<TargetEndianness, MaxAlign, true> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Sym_Base<ELFType<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Word      st_name;  // Symbol name (index into string table)
   unsigned char st_info;  // Symbol's type and binding attributes
   unsigned char st_other; // Must be zero; reserved
@@ -220,7 +214,7 @@ struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
 /// (.gnu.version). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Versym_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   Elf_Half vs_index;   // Version index with flags (e.g. VERSYM_HIDDEN)
 };
 
@@ -231,7 +225,7 @@ struct Elf_Verdaux_Impl;
 /// (.gnu.version_d). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Verdef_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   typedef Elf_Verdaux_Impl<ELFT> Elf_Verdaux;
   Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT)
   Elf_Half vd_flags;   // Bitwise flags (VER_DEF_*)
@@ -251,7 +245,7 @@ struct Elf_Verdef_Impl {
 /// section (.gnu.version_d). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Verdaux_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   Elf_Word vda_name; // Version name (offset in string table)
   Elf_Word vda_next; // Offset to next Verdaux entry (in bytes)
 };
@@ -260,7 +254,7 @@ struct Elf_Verdaux_Impl {
 /// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Verneed_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   Elf_Half vn_version; // Version of this structure (e.g. VER_NEED_CURRENT)
   Elf_Half vn_cnt;     // Number of associated Vernaux entries
   Elf_Word vn_file;    // Library name (string table offset)
@@ -272,7 +266,7 @@ struct Elf_Verneed_Impl {
 /// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Vernaux_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   Elf_Word vna_hash;  // Hash of dependency name
   Elf_Half vna_flags; // Bitwise Flags (VER_FLAG_*)
   Elf_Half vna_other; // Version index, used in .gnu.version entries
@@ -285,11 +279,9 @@ struct Elf_Vernaux_Impl {
 template<class ELFT>
 struct Elf_Dyn_Base;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Dyn_Base<ELFT<TargetEndianness, MaxAlign, false> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Dyn_Base<ELFType<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Sword d_tag;
   union {
     Elf_Word d_val;
@@ -297,11 +289,9 @@ struct Elf_Dyn_Base<ELFT<TargetEndianness, MaxAlign, false> > {
   } d_un;
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Dyn_Base<ELFT<TargetEndianness, MaxAlign, true> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Dyn_Base<ELFType<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Sxword d_tag;
   union {
     Elf_Xword d_val;
@@ -323,90 +313,129 @@ struct Elf_Dyn_Impl : Elf_Dyn_Base<ELFT> {
 template<class ELFT, bool isRela>
 struct Elf_Rel_Base;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, false> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, false> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Word      r_info;  // Symbol table index and type of relocation to apply
+
+  uint32_t getRInfo(bool isMips64EL) const {
+    assert(!isMips64EL);
+    return r_info;
+  }
+  void setRInfo(uint32_t R) {
+    r_info = R;
+  }
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, false> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, false> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
+
+  uint64_t getRInfo(bool isMips64EL) const {
+    uint64_t t = r_info;
+    if (!isMips64EL)
+      return t;
+    // Mip64 little endian has a "special" encoding of r_info. Instead of one
+    // 64 bit little endian number, it is a little ending 32 bit number followed
+    // by a 32 bit big endian number.
+    return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) |
+      ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff);
+    return r_info;
+  }
+  void setRInfo(uint64_t R) {
+    // FIXME: Add mips64el support.
+    r_info = R;
+  }
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, true> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, true> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Word      r_info;   // Symbol table index and type of relocation to apply
   Elf_Sword     r_addend; // Compute value for relocatable field by adding this
+
+  uint32_t getRInfo(bool isMips64EL) const {
+    assert(!isMips64EL);
+    return r_info;
+  }
+  void setRInfo(uint32_t R) {
+    r_info = R;
+  }
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, true> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, true> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
   Elf_Sxword    r_addend; // Compute value for relocatable field by adding this.
+
+  uint64_t getRInfo(bool isMips64EL) const {
+    // Mip64 little endian has a "special" encoding of r_info. Instead of one
+    // 64 bit little endian number, it is a little ending 32 bit number followed
+    // by a 32 bit big endian number.
+    uint64_t t = r_info;
+    if (!isMips64EL)
+      return t;
+    return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) |
+      ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff);
+  }
+  void setRInfo(uint64_t R) {
+    // FIXME: Add mips64el support.
+    r_info = R;
+  }
 };
 
 template<class ELFT, bool isRela>
 struct Elf_Rel_Impl;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
-struct Elf_Rel_Impl<ELFT<TargetEndianness, MaxAlign, true>, isRela>
-       : Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, isRela> {
-  using Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, isRela>::r_info;
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
+struct Elf_Rel_Impl<ELFType<TargetEndianness, MaxAlign, true>, isRela>
+       : Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, isRela> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
 
   // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
   // and ELF64_R_INFO macros defined in the ELF specification:
-  uint32_t getSymbol() const { return (uint32_t) (r_info >> 32); }
-  uint32_t getType() const {
-    return (uint32_t) (r_info & 0xffffffffL);
+  uint32_t getSymbol(bool isMips64EL) const {
+    return (uint32_t) (this->getRInfo(isMips64EL) >> 32);
+  }
+  uint32_t getType(bool isMips64EL) const {
+    return (uint32_t) (this->getRInfo(isMips64EL) & 0xffffffffL);
   }
   void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); }
   void setType(uint32_t t) { setSymbolAndType(getSymbol(), t); }
   void setSymbolAndType(uint32_t s, uint32_t t) {
-    r_info = ((uint64_t)s << 32) + (t&0xffffffffL);
+    this->setRInfo(((uint64_t)s << 32) + (t&0xffffffffL));
   }
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
-struct Elf_Rel_Impl<ELFT<TargetEndianness, MaxAlign, false>, isRela>
-       : Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, isRela> {
-  using Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, isRela>::r_info;
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
+struct Elf_Rel_Impl<ELFType<TargetEndianness, MaxAlign, false>, isRela>
+       : Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, isRela> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
 
   // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
   // and ELF32_R_INFO macros defined in the ELF specification:
-  uint32_t getSymbol() const { return (r_info >> 8); }
-  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  uint32_t getSymbol(bool isMips64EL) const {
+    return this->getRInfo(isMips64EL) >> 8;
+  }
+  unsigned char getType(bool isMips64EL) const {
+    return (unsigned char) (this->getRInfo(isMips64EL) & 0x0ff);
+  }
   void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); }
   void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
   void setSymbolAndType(uint32_t s, unsigned char t) {
-    r_info = (s << 8) + t;
+    this->setRInfo((s << 8) + t);
   }
 };
 
 template<class ELFT>
 struct Elf_Ehdr_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
   Elf_Half e_type;     // Type of file (see ET_*)
   Elf_Half e_machine;  // Required architecture for this file (see EM_*)
@@ -432,11 +461,9 @@ struct Elf_Ehdr_Impl {
 template<class ELFT>
 struct Elf_Phdr_Impl;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, false> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Phdr_Impl<ELFType<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Word p_type;   // Type of segment
   Elf_Off  p_offset; // FileOffset where segment is located, in bytes
   Elf_Addr p_vaddr;  // Virtual Address of beginning of segment
@@ -447,11 +474,9 @@ struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, false> > {
   Elf_Word p_align;  // Segment alignment constraint
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, true> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Phdr_Impl<ELFType<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Word p_type;   // Type of segment
   Elf_Word p_flags;  // Segment flags
   Elf_Off  p_offset; // FileOffset where segment is located, in bytes
@@ -464,7 +489,7 @@ struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, true> > {
 
 template<class ELFT>
 class ELFObjectFile : public ObjectFile {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
 
 public:
   /// \brief Iterate over constant sized entities.
@@ -638,6 +663,7 @@ class ELFObjectFile : public ObjectFile {
 protected:
   const Elf_Sym  *getSymbol(DataRefImpl Symb) const; // FIXME: Should be private?
   void            validateSymbol(DataRefImpl Symb) const;
+  StringRef       getRelocationTypeName(uint32_t Type) const;
 
 public:
   error_code      getSymbolName(const Elf_Shdr *section,
@@ -703,6 +729,13 @@ class ELFObjectFile : public ObjectFile {
 
 public:
   ELFObjectFile(MemoryBuffer *Object, error_code &ec);
+
+  bool isMips64EL() const {
+    return Header->e_machine == ELF::EM_MIPS &&
+      Header->getFileClass() == ELF::ELFCLASS64 &&
+      Header->getDataEncoding() == ELF::ELFDATA2LSB;
+  }
+
   virtual symbol_iterator begin_symbols() const;
   virtual symbol_iterator end_symbols() const;
 
@@ -790,6 +823,7 @@ class ELFObjectFile : public ObjectFile {
   uint64_t getNumSections() const;
   uint64_t getStringTableIndex() const;
   ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const;
+  const Elf_Ehdr *getElfHeader() const;
   const Elf_Shdr *getSection(const Elf_Sym *symb) const;
   const Elf_Shdr *getElfSection(section_iterator &It) const;
   const Elf_Sym *getElfSymbol(symbol_iterator &It) const;
@@ -968,6 +1002,12 @@ ELFObjectFile<ELFT>::getSection(const Elf_Sym *symb) const {
   return getSection(symb->st_shndx);
 }
 
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Ehdr *
+ELFObjectFile<ELFT>::getElfHeader() const {
+  return Header;
+}
+
 template<class ELFT>
 const typename ELFObjectFile<ELFT>::Elf_Shdr *
 ELFObjectFile<ELFT>::getElfSection(section_iterator &It) const {
@@ -1058,6 +1098,11 @@ error_code ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb,
       IsRelocatable = true;
     }
     Result = symb->st_value;
+
+    // Clear the ARM/Thumb indicator flag.
+    if (Header->e_machine == ELF::EM_ARM)
+      Result &= ~1;
+
     if (IsRelocatable && Section != 0)
       Result += Section->sh_addr;
     return object_error::success;
@@ -1373,8 +1418,16 @@ template<class ELFT>
 error_code ELFObjectFile<ELFT>::sectionContainsSymbol(DataRefImpl Sec,
                                                       DataRefImpl Symb,
                                                       bool &Result) const {
-  // FIXME: Unimplemented.
-  Result = false;
+  validateSymbol(Symb);
+
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  const Elf_Sym  *symb = getSymbol(Symb);
+
+  unsigned shndx = symb->st_shndx;
+  bool Reserved = shndx >= ELF::SHN_LORESERVE
+               && shndx <= ELF::SHN_HIRESERVE;
+
+  Result = !Reserved && (sec == getSection(symb->st_shndx));
   return object_error::success;
 }
 
@@ -1447,11 +1500,11 @@ error_code ELFObjectFile<ELFT>::getRelocationSymbol(DataRefImpl Rel,
     default :
       report_fatal_error("Invalid section type in Rel!");
     case ELF::SHT_REL : {
-      symbolIdx = getRel(Rel)->getSymbol();
+      symbolIdx = getRel(Rel)->getSymbol(isMips64EL());
       break;
     }
     case ELF::SHT_RELA : {
-      symbolIdx = getRela(Rel)->getSymbol();
+      symbolIdx = getRela(Rel)->getSymbol(isMips64EL());
       break;
     }
   }
@@ -1517,11 +1570,11 @@ error_code ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel,
     default :
       report_fatal_error("Invalid section type in Rel!");
     case ELF::SHT_REL : {
-      Result = getRel(Rel)->getType();
+      Result = getRel(Rel)->getType(isMips64EL());
       break;
     }
     case ELF::SHT_RELA : {
-      Result = getRela(Rel)->getType();
+      Result = getRela(Rel)->getType(isMips64EL());
       break;
     }
   }
@@ -1529,29 +1582,14 @@ error_code ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel,
 }
 
 #define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \
-  case ELF::enum: res = #enum; break;
+  case ELF::enum: Res = #enum; break;
 
 template<class ELFT>
-error_code ELFObjectFile<ELFT>::getRelocationTypeName(
-    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  uint32_t type;
-  StringRef res;
-  switch (sec->sh_type) {
-    default :
-      return object_error::parse_failed;
-    case ELF::SHT_REL : {
-      type = getRel(Rel)->getType();
-      break;
-    }
-    case ELF::SHT_RELA : {
-      type = getRela(Rel)->getType();
-      break;
-    }
-  }
+StringRef ELFObjectFile<ELFT>::getRelocationTypeName(uint32_t Type) const {
+  StringRef Res = "Unknown";
   switch (Header->e_machine) {
   case ELF::EM_X86_64:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32);
@@ -1579,17 +1617,22 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPLT64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLTOFF64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC);
-    default:
-      res = "Unknown";
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_IRELATIVE);
+    default: break;
     }
     break;
   case ELF::EM_386:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32);
@@ -1630,12 +1673,68 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE);
-    default:
-      res = "Unknown";
+    default: break;
+    }
+    break;
+  case ELF::EM_MIPS:
+    switch (Type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_26);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LITERAL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT5);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT6);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_DISP);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_PAGE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_OFST);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SUB);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_A);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_B);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_DELETE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHER);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHEST);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SCN_DISP);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_ADD_IMMEDIATE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PJUMP);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_RELGOT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JALR);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_LDM);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GOTTPREL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NUM);
+    default: break;
     }
     break;
   case ELF::EM_AARCH64:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32);
@@ -1709,13 +1808,11 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL);
-
-    default:
-      res = "Unknown";
+    default: break;
     }
     break;
   case ELF::EM_ARM:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PC24);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS32);
@@ -1847,12 +1944,11 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ME_TOO);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ16);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ32);
-    default:
-      res = "Unknown";
+    default: break;
     }
     break;
   case ELF::EM_HEXAGON:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B22_PCREL);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B15_PCREL);
@@ -1939,19 +2035,117 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_32_6_X);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_16_X);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_11_X);
-    default:
-      res = "Unknown";
+    default: break;
     }
     break;
-  default:
-    res = "Unknown";
+  case ELF::EM_PPC:
+    switch (Type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR24);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_HI);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14_BRTAKEN);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14_BRNTAKEN);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL24);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14_BRTAKEN);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14_BRNTAKEN);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_HA);
+    default: break;
+    }
+    break;
+  case ELF::EM_PPC64:
+    switch (Type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HI);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL24);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHER);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHEST);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_LO_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_LO_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_LO_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLSGD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLSLD);
+    default: break;
+    }
+    break;
+  default: break;
   }
-  Result.append(res.begin(), res.end());
-  return object_error::success;
+  return Res;
 }
 
 #undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME
 
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationTypeName(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  uint32_t type;
+  switch (sec->sh_type) {
+    default :
+      return object_error::parse_failed;
+    case ELF::SHT_REL : {
+      type = getRel(Rel)->getType(isMips64EL());
+      break;
+    }
+    case ELF::SHT_RELA : {
+      type = getRela(Rel)->getType(isMips64EL());
+      break;
+    }
+  }
+
+  if (!isMips64EL()) {
+    StringRef Name = getRelocationTypeName(type);
+    Result.append(Name.begin(), Name.end());
+  } else {
+    uint8_t Type1 = (type >>  0) & 0xFF;
+    uint8_t Type2 = (type >>  8) & 0xFF;
+    uint8_t Type3 = (type >> 16) & 0xFF;
+
+    // Concat all three relocation type names.
+    StringRef Name = getRelocationTypeName(Type1);
+    Result.append(Name.begin(), Name.end());
+
+    Name = getRelocationTypeName(Type2);
+    Result.append(1, '/');
+    Result.append(Name.begin(), Name.end());
+
+    Name = getRelocationTypeName(Type3);
+    Result.append(1, '/');
+    Result.append(Name.begin(), Name.end());
+  }
+
+  return object_error::success;
+}
+
 template<class ELFT>
 error_code ELFObjectFile<ELFT>::getRelocationAdditionalInfo(
     DataRefImpl Rel, int64_t &Result) const {
@@ -1982,14 +2176,14 @@ error_code ELFObjectFile<ELFT>::getRelocationValueString(
     default:
       return object_error::parse_failed;
     case ELF::SHT_REL: {
-      type = getRel(Rel)->getType();
-      symbol_index = getRel(Rel)->getSymbol();
+      type = getRel(Rel)->getType(isMips64EL());
+      symbol_index = getRel(Rel)->getSymbol(isMips64EL());
       // TODO: Read implicit addend from section data.
       break;
     }
     case ELF::SHT_RELA: {
-      type = getRela(Rel)->getType();
-      symbol_index = getRela(Rel)->getSymbol();
+      type = getRela(Rel)->getType(isMips64EL());
+      symbol_index = getRela(Rel)->getSymbol(isMips64EL());
       addend = getRela(Rel)->r_addend;
       break;
     }
@@ -2054,8 +2248,7 @@ ELFObjectFile<ELFT>::ELFObjectFile(MemoryBuffer *Object, error_code &ec)
   : ObjectFile(getELFType(
       static_cast<endianness>(ELFT::TargetEndianness) == support::little,
       ELFT::Is64Bits),
-      Object,
-      ec)
+      Object)
   , isDyldELFObject(false)
   , SectionHeaderTable(0)
   , dot_shstrtab_sec(0)
@@ -2303,8 +2496,9 @@ ELFObjectFile<ELFT>::end_dynamic_table(bool NULLEnd) const {
 
     if (NULLEnd) {
       Elf_Dyn_iterator Start = begin_dynamic_table();
-      for (; Start != Ret && Start->getTag() != ELF::DT_NULL; ++Start)
-        ;
+      while (Start != Ret && Start->getTag() != ELF::DT_NULL)
+        ++Start;
+
       // Include the DT_NULL.
       if (Start != Ret)
         ++Start;
@@ -2321,10 +2515,9 @@ StringRef ELFObjectFile<ELFT>::getLoadName() const {
     // Find the DT_SONAME entry
     Elf_Dyn_iterator it = begin_dynamic_table();
     Elf_Dyn_iterator ie = end_dynamic_table();
-    for (; it != ie; ++it) {
-      if (it->getTag() == ELF::DT_SONAME)
-        break;
-    }
+    while (it != ie && it->getTag() != ELF::DT_SONAME)
+      ++it;
+
     if (it != ie) {
       if (dot_dynstr_sec == NULL)
         report_fatal_error("Dynamic string table is missing");
@@ -2341,10 +2534,8 @@ library_iterator ELFObjectFile<ELFT>::begin_libraries_needed() const {
   // Find the first DT_NEEDED entry
   Elf_Dyn_iterator i = begin_dynamic_table();
   Elf_Dyn_iterator e = end_dynamic_table();
-  for (; i != e; ++i) {
-    if (i->getTag() == ELF::DT_NEEDED)
-      break;
-  }
+  while (i != e && i->getTag() != ELF::DT_NEEDED)
+    ++i;
 
   DataRefImpl DRI;
   DRI.p = reinterpret_cast<uintptr_t>(i.get());
@@ -2359,11 +2550,10 @@ error_code ELFObjectFile<ELFT>::getLibraryNext(DataRefImpl Data,
                                         reinterpret_cast<const char *>(Data.p));
   Elf_Dyn_iterator e = end_dynamic_table();
 
-  // Skip the current dynamic table entry.
-  ++i;
-
-  // Find the next DT_NEEDED entry.
-  for (; i != e && i->getTag() != ELF::DT_NEEDED; ++i);
+  // Skip the current dynamic table entry and find the next DT_NEEDED entry.
+  do
+    ++i;
+  while (i != e && i->getTag() != ELF::DT_NEEDED);
 
   DataRefImpl DRI;
   DRI.p = reinterpret_cast<uintptr_t>(i.get());
@@ -2704,6 +2894,21 @@ static inline error_code GetELFSymbolVersion(const ObjectFile *Obj,
   llvm_unreachable("Object passed to GetELFSymbolVersion() is not ELF");
 }
 
+/// This function returns the hash value for a symbol in the .dynsym section
+/// Name of the API remains consistent as specified in the libelf
+/// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
+static inline unsigned elf_hash(StringRef &symbolName) {
+  unsigned h = 0, g;
+  for (unsigned i = 0, j = symbolName.size(); i < j; i++) {
+    h = (h << 4) + symbolName[i];
+    g = h & 0xf0000000L;
+    if (g != 0)
+      h ^= g >> 24;
+    h &= ~g;
+  }
+  return h;
+}
+
 }
 }
 
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index ed7aabd2c868..cec649e4b178 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -7,16 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the MachOObjectFile class, which binds the MachOObject
-// class to the generic ObjectFile wrapper.
+// This file declares the MachOObjectFile class, which implement the ObjectFile
+// interface for MachO files.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_OBJECT_MACHO_H
 #define LLVM_OBJECT_MACHO_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachOFormat.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
@@ -24,46 +25,25 @@
 namespace llvm {
 namespace object {
 
-typedef MachOObject::LoadCommandInfo LoadCommandInfo;
-
 class MachOObjectFile : public ObjectFile {
 public:
-  MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec);
-
-  virtual symbol_iterator begin_symbols() const;
-  virtual symbol_iterator end_symbols() const;
-  virtual symbol_iterator begin_dynamic_symbols() const;
-  virtual symbol_iterator end_dynamic_symbols() const;
-  virtual library_iterator begin_libraries_needed() const;
-  virtual library_iterator end_libraries_needed() const;
-  virtual section_iterator begin_sections() const;
-  virtual section_iterator end_sections() const;
-
-  virtual uint8_t getBytesInAddress() const;
-  virtual StringRef getFileFormatName() const;
-  virtual unsigned getArch() const;
-  virtual StringRef getLoadName() const;
+  struct LoadCommandInfo {
+    const char *Ptr;      // Where in memory the load command is.
+    macho::LoadCommand C; // The command itself.
+  };
 
-  // In a MachO file, sections have a segment name. This is used in the .o
-  // files. They have a single segment, but this field specifies which segment
-  // a section should be put in in the final object.
-  error_code getSectionFinalSegmentName(DataRefImpl Sec, StringRef &Res) const;
-
-  MachOObject *getObject() { return MachOObj.get(); }
+  MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian, bool Is64Bits,
+                  error_code &ec);
 
-  static inline bool classof(const Binary *v) {
-    return v->isMachO();
-  }
-
-protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
-  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolType(DataRefImpl Symb,
+                                   SymbolRef::Type &Res) const;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
   virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const;
-  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const;
   virtual error_code getSymbolSection(DataRefImpl Symb,
                                       section_iterator &Res) const;
   virtual error_code getSymbolValue(DataRefImpl Symb, uint64_t &Val) const;
@@ -82,21 +62,17 @@ class MachOObjectFile : public ObjectFile {
   virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const;
   virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const;
   virtual error_code isSectionReadOnlyData(DataRefImpl Sec, bool &Res) const;
-  virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S,
+  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
                                            bool &Result) const;
   virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const;
   virtual relocation_iterator getSectionRelEnd(DataRefImpl Sec) const;
 
   virtual error_code getRelocationNext(DataRefImpl Rel,
                                        RelocationRef &Res) const;
-  virtual error_code getRelocationAddress(DataRefImpl Rel,
-                                          uint64_t &Res) const;
-  virtual error_code getRelocationOffset(DataRefImpl Rel,
-                                         uint64_t &Res) const;
-  virtual error_code getRelocationSymbol(DataRefImpl Rel,
-                                         SymbolRef &Res) const;
-  virtual error_code getRelocationType(DataRefImpl Rel,
-                                       uint64_t &Res) const;
+  virtual error_code getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const;
+  virtual error_code getRelocationOffset(DataRefImpl Rel, uint64_t &Res) const;
+  virtual error_code getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const;
+  virtual error_code getRelocationType(DataRefImpl Rel, uint64_t &Res) const;
   virtual error_code getRelocationTypeName(DataRefImpl Rel,
                                            SmallVectorImpl<char> &Result) const;
   virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
@@ -108,28 +84,72 @@ class MachOObjectFile : public ObjectFile {
   virtual error_code getLibraryNext(DataRefImpl LibData, LibraryRef &Res) const;
   virtual error_code getLibraryPath(DataRefImpl LibData, StringRef &Res) const;
 
-private:
-  OwningPtr<MachOObject> MachOObj;
-  mutable uint32_t RegisteredStringTable;
-  typedef SmallVector<DataRefImpl, 1> SectionList;
-  SectionList Sections;
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
 
+  virtual symbol_iterator begin_dynamic_symbols() const;
+  virtual symbol_iterator end_dynamic_symbols() const;
+
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
 
-  void moveToNextSection(DataRefImpl &DRI) const;
-  void getSymbolTableEntry(DataRefImpl DRI,
-                           InMemoryStruct<macho::SymbolTableEntry> &Res) const;
-  void getSymbol64TableEntry(DataRefImpl DRI,
-                          InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
-  void moveToNextSymbol(DataRefImpl &DRI) const;
-  void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const;
-  void getSection64(DataRefImpl DRI,
-                    InMemoryStruct<macho::Section64> &Res) const;
-  void getRelocation(DataRefImpl Rel,
-                     InMemoryStruct<macho::RelocationEntry> &Res) const;
-  std::size_t getSectionIndex(DataRefImpl Sec) const;
-
-  void printRelocationTargetName(InMemoryStruct<macho::RelocationEntry>& RE,
-                                 raw_string_ostream &fmt) const;
+  virtual library_iterator begin_libraries_needed() const;
+  virtual library_iterator end_libraries_needed() const;
+
+  virtual uint8_t getBytesInAddress() const;
+
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+
+  virtual StringRef getLoadName() const;
+
+  // In a MachO file, sections have a segment name. This is used in the .o
+  // files. They have a single segment, but this field specifies which segment
+  // a section should be put in in the final object.
+  StringRef getSectionFinalSegmentName(DataRefImpl Sec) const;
+
+  // Names are stored as 16 bytes. These returns the raw 16 bytes without
+  // interpreting them as a C string.
+  ArrayRef<char> getSectionRawName(DataRefImpl Sec) const;
+  ArrayRef<char> getSectionRawFinalSegmentName(DataRefImpl Sec) const;
+
+  // MachO specific Info about relocations.
+  bool isRelocationScattered(const macho::RelocationEntry &RE) const;
+  unsigned getPlainRelocationSymbolNum(const macho::RelocationEntry &RE) const;
+  bool getPlainRelocationExternal(const macho::RelocationEntry &RE) const;
+  bool getScatteredRelocationScattered(const macho::RelocationEntry &RE) const;
+  uint32_t getScatteredRelocationValue(const macho::RelocationEntry &RE) const;
+  unsigned getAnyRelocationAddress(const macho::RelocationEntry &RE) const;
+  unsigned getAnyRelocationPCRel(const macho::RelocationEntry &RE) const;
+  unsigned getAnyRelocationLength(const macho::RelocationEntry &RE) const;
+  unsigned getAnyRelocationType(const macho::RelocationEntry &RE) const;
+
+  // Walk load commands.
+  LoadCommandInfo getFirstLoadCommandInfo() const;
+  LoadCommandInfo getNextLoadCommandInfo(const LoadCommandInfo &L) const;
+
+  // MachO specific structures.
+  macho::Section getSection(DataRefImpl DRI) const;
+  macho::Section64 getSection64(DataRefImpl DRI) const;
+  macho::SymbolTableEntry getSymbolTableEntry(DataRefImpl DRI) const;
+  macho::Symbol64TableEntry getSymbol64TableEntry(DataRefImpl DRI) const;
+  macho::LinkeditDataLoadCommand
+  getLinkeditDataLoadCommand(const LoadCommandInfo &L) const;
+  macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
+  macho::Header getHeader() const;
+  macho::SymtabLoadCommand getSymtabLoadCommand() const;
+
+  bool is64Bit() const;
+  void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
+
+  static bool classof(const Binary *v) {
+    return v->isMachO();
+  }
+
+private:
+  typedef SmallVector<const char*, 1> SectionList;
+  SectionList Sections;
+  const char *SymtabLoadCmd;
 };
 
 }
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 6a66653fe223..074a75236547 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -276,7 +276,7 @@ class ObjectFile : public Binary {
   ObjectFile(const ObjectFile &other) LLVM_DELETED_FUNCTION;
 
 protected:
-  ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec);
+  ObjectFile(unsigned int Type, MemoryBuffer *source);
 
   const uint8_t *base() const {
     return reinterpret_cast<const uint8_t *>(Data->getBufferStart());
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index a9fb65cba905..265b886daff7 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -27,8 +27,9 @@ namespace llvm {
 
 template <class Ptr, class USE_iterator> // Predecessor Iterator
 class PredIterator : public std::iterator<std::forward_iterator_tag,
-                                          Ptr, ptrdiff_t> {
-  typedef std::iterator<std::forward_iterator_tag, Ptr, ptrdiff_t> super;
+                                          Ptr, ptrdiff_t, Ptr*, Ptr*> {
+  typedef std::iterator<std::forward_iterator_tag, Ptr, ptrdiff_t, Ptr*,
+                                                                    Ptr*> super;
   typedef PredIterator<Ptr, USE_iterator> Self;
   USE_iterator It;
 
@@ -40,6 +41,7 @@ class PredIterator : public std::iterator<std::forward_iterator_tag,
 
 public:
   typedef typename super::pointer pointer;
+  typedef typename super::reference reference;
 
   PredIterator() {}
   explicit inline PredIterator(Ptr *bb) : It(bb->use_begin()) {
@@ -50,7 +52,7 @@ class PredIterator : public std::iterator<std::forward_iterator_tag,
   inline bool operator==(const Self& x) const { return It == x.It; }
   inline bool operator!=(const Self& x) const { return !operator==(x); }
 
-  inline pointer operator*() const {
+  inline reference operator*() const {
     assert(!It.atEnd() && "pred_iterator out of range!");
     return cast<TerminatorInst>(*It)->getParent();
   }
@@ -100,10 +102,11 @@ inline const_pred_iterator pred_end(const BasicBlock *BB) {
 
 template <class Term_, class BB_>           // Successor Iterator
 class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
-                                          BB_, ptrdiff_t> {
+                                          BB_, ptrdiff_t, BB_*, BB_*> {
   const Term_ Term;
   unsigned idx;
-  typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t> super;
+  typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t, BB_*,
+                                                                    BB_*> super;
   typedef SuccIterator<Term_, BB_> Self;
 
   inline bool index_is_valid(int idx) {
@@ -112,6 +115,7 @@ class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
 
 public:
   typedef typename super::pointer pointer;
+  typedef typename super::reference reference;
   // TODO: This can be random access iterator, only operator[] missing.
 
   explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator
@@ -142,7 +146,7 @@ class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
   inline bool operator==(const Self& x) const { return idx == x.idx; }
   inline bool operator!=(const Self& x) const { return !operator==(x); }
 
-  inline pointer operator*() const { return Term->getSuccessor(idx); }
+  inline reference operator*() const { return Term->getSuccessor(idx); }
   inline pointer operator->() const { return operator*(); }
 
   inline Self& operator++() { ++idx; return *this; } // Preincrement
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 02c44cdfad26..823b43ad938a 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -321,7 +321,8 @@ namespace COFF {
     IMAGE_COMDAT_SELECT_SAME_SIZE,
     IMAGE_COMDAT_SELECT_EXACT_MATCH,
     IMAGE_COMDAT_SELECT_ASSOCIATIVE,
-    IMAGE_COMDAT_SELECT_LARGEST
+    IMAGE_COMDAT_SELECT_LARGEST,
+    IMAGE_COMDAT_SELECT_NEWEST
   };
 
   // Auxiliary Symbol Formats
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index 3ce0fef4a045..92107ac02526 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -28,7 +28,6 @@
 
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Instructions.h"
 
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index 80f09db4f7ac..0d2d6c92fdb0 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -36,9 +36,13 @@ template<typename From> struct simplify_type {
 };
 
 template<typename From> struct simplify_type<const From> {
-  typedef const From SimpleType;
-  static SimpleType &getSimplifiedValue(const From &Val) {
-    return simplify_type<From>::getSimplifiedValue(static_cast<From&>(Val));
+  typedef typename simplify_type<From>::SimpleType NonConstSimpleType;
+  typedef typename add_const_past_pointer<NonConstSimpleType>::type
+    SimpleType;
+  typedef typename add_lvalue_reference_if_not_pointer<SimpleType>::type
+    RetType;
+  static RetType getSimplifiedValue(const From& Val) {
+    return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val));
   }
 };
 
@@ -81,6 +85,13 @@ template <typename To, typename From> struct isa_impl_cl<To, From*> {
   }
 };
 
+template <typename To, typename From> struct isa_impl_cl<To, From*const> {
+  static inline bool doit(const From *Val) {
+    assert(Val && "isa<> used on a null pointer");
+    return isa_impl<To, From>::doit(*Val);
+  }
+};
+
 template <typename To, typename From> struct isa_impl_cl<To, const From*> {
   static inline bool doit(const From *Val) {
     assert(Val && "isa<> used on a null pointer");
@@ -102,7 +113,7 @@ struct isa_impl_wrap {
   static bool doit(const From &Val) {
     return isa_impl_wrap<To, SimpleFrom,
       typename simplify_type<SimpleFrom>::SimpleType>::doit(
-                          simplify_type<From>::getSimplifiedValue(Val));
+                          simplify_type<const From>::getSimplifiedValue(Val));
   }
 };
 
@@ -121,7 +132,8 @@ struct isa_impl_wrap<To, FromTy, FromTy> {
 //
 template <class X, class Y>
 inline bool isa(const Y &Val) {
-  return isa_impl_wrap<X, Y, typename simplify_type<Y>::SimpleType>::doit(Val);
+  return isa_impl_wrap<X, const Y,
+                       typename simplify_type<const Y>::SimpleType>::doit(Val);
 }
 
 //===----------------------------------------------------------------------===//
@@ -178,7 +190,7 @@ struct cast_retty {
 //
 template<class To, class From, class SimpleFrom> struct cast_convert_val {
   // This is not a simple type, use the template to simplify it...
-  static typename cast_retty<To, From>::ret_type doit(const From &Val) {
+  static typename cast_retty<To, From>::ret_type doit(From &Val) {
     return cast_convert_val<To, SimpleFrom,
       typename simplify_type<SimpleFrom>::SimpleType>::doit(
                           simplify_type<From>::getSimplifiedValue(Val));
@@ -204,20 +216,14 @@ template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
 //  cast<Instruction>(myVal)->getParent()
 //
 template <class X, class Y>
-inline typename enable_if_c<
-  !is_same<Y, typename simplify_type<Y>::SimpleType>::value,
-  typename cast_retty<X, Y>::ret_type
->::type cast(const Y &Val) {
+inline typename cast_retty<X, const Y>::ret_type cast(const Y &Val) {
   assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
-  return cast_convert_val<X, Y,
-                          typename simplify_type<Y>::SimpleType>::doit(Val);
+  return cast_convert_val<X, const Y,
+                        typename simplify_type<const Y>::SimpleType>::doit(Val);
 }
 
 template <class X, class Y>
-inline typename enable_if<
-  is_same<Y, typename simplify_type<Y>::SimpleType>,
-  typename cast_retty<X, Y>::ret_type
->::type cast(Y &Val) {
+inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
   assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
   return cast_convert_val<X, Y,
                           typename simplify_type<Y>::SimpleType>::doit(Val);
@@ -253,18 +259,12 @@ inline typename cast_retty<X, Y*>::ret_type cast_or_null(Y *Val) {
 //
 
 template <class X, class Y>
-inline typename enable_if_c<
-  !is_same<Y, typename simplify_type<Y>::SimpleType>::value,
-  typename cast_retty<X, Y>::ret_type
->::type dyn_cast(const Y &Val) {
+inline typename cast_retty<X, const Y>::ret_type dyn_cast(const Y &Val) {
   return isa<X>(Val) ? cast<X>(Val) : 0;
 }
 
 template <class X, class Y>
-inline typename enable_if<
-  is_same<Y, typename simplify_type<Y>::SimpleType>,
-  typename cast_retty<X, Y>::ret_type
->::type dyn_cast(Y &Val) {
+inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) {
   return isa<X>(Val) ? cast<X>(Val) : 0;
 }
 
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 25f42a98e7de..13d057be049f 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -351,4 +351,14 @@
 #define LLVM_EXPLICIT
 #endif
 
+/// \macro LLVM_STATIC_ASSERT
+/// \brief Expands to C/C++'s static_assert on compilers which support it.
+#if __has_feature(cxx_static_assert)
+# define LLVM_STATIC_ASSERT(expr, msg) static_assert(expr, msg)
+#elif __has_feature(c_static_assert)
+# define LLVM_STATIC_ASSERT(expr, msg) _Static_assert(expr, msg)
+#else
+# define LLVM_STATIC_ASSERT(expr, msg)
+#endif
+
 #endif
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index 3596be87e3c9..f35d40729263 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -9,7 +9,7 @@
 //
 // This file defines a number of light weight data structures used
 // to describe and track debug location information.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_SUPPORT_DEBUGLOC_H
@@ -19,7 +19,7 @@ namespace llvm {
   template <typename T> struct DenseMapInfo;
   class MDNode;
   class LLVMContext;
-  
+
   /// DebugLoc - Debug location id.  This is carried by Instruction, SDNode,
   /// and MachineInstr to compactly encode file/line/scope information for an
   /// operation.
@@ -46,18 +46,18 @@ namespace llvm {
     /// location, encoded as 24-bits for line and 8 bits for col.  A value of 0
     /// for either means unknown.
     unsigned LineCol;
-    
+
     /// ScopeIdx - This is an opaque ID# for Scope/InlinedAt information,
     /// decoded by LLVMContext.  0 is unknown.
     int ScopeIdx;
   public:
     DebugLoc() : LineCol(0), ScopeIdx(0) {}  // Defaults to unknown.
-    
+
     /// get - Get a new DebugLoc that corresponds to the specified line/col
     /// scope/inline location.
     static DebugLoc get(unsigned Line, unsigned Col,
                         MDNode *Scope, MDNode *InlinedAt = 0);
-    
+
     /// getFromDILocation - Translate the DILocation quad into a DebugLoc.
     static DebugLoc getFromDILocation(MDNode *N);
 
@@ -66,32 +66,32 @@ namespace llvm {
 
     /// isUnknown - Return true if this is an unknown location.
     bool isUnknown() const { return ScopeIdx == 0; }
-    
+
     unsigned getLine() const {
       return (LineCol << 8) >> 8;  // Mask out column.
     }
-    
+
     unsigned getCol() const {
       return LineCol >> 24;
     }
-    
+
     /// getScope - This returns the scope pointer for this DebugLoc, or null if
     /// invalid.
     MDNode *getScope(const LLVMContext &Ctx) const;
-    
+
     /// getInlinedAt - This returns the InlinedAt pointer for this DebugLoc, or
     /// null if invalid or not present.
     MDNode *getInlinedAt(const LLVMContext &Ctx) const;
-    
+
     /// getScopeAndInlinedAt - Return both the Scope and the InlinedAt values.
     void getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
                               const LLVMContext &Ctx) const;
-    
-    
+
+
     /// getAsMDNode - This method converts the compressed DebugLoc node into a
     /// DILocation compatible MDNode.
     MDNode *getAsMDNode(const LLVMContext &Ctx) const;
-    
+
     bool operator==(const DebugLoc &DL) const {
       return LineCol == DL.LineCol && ScopeIdx == DL.ScopeIdx;
     }
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index cc9151ef50b6..232aadc3e0cd 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -466,6 +466,7 @@ enum {
 
 // ELF Relocation types for PPC64
 enum {
+  R_PPC64_NONE                = 0,
   R_PPC64_ADDR32              = 1,
   R_PPC64_ADDR16_LO           = 4,
   R_PPC64_ADDR16_HI           = 5,
@@ -480,10 +481,13 @@ enum {
   R_PPC64_TOC16_LO            = 48,
   R_PPC64_TOC16_HA            = 50,
   R_PPC64_TOC                 = 51,
+  R_PPC64_ADDR16_DS           = 56,
+  R_PPC64_ADDR16_LO_DS        = 57,
   R_PPC64_TOC16_DS            = 63,
   R_PPC64_TOC16_LO_DS         = 64,
   R_PPC64_TLS                 = 67,
   R_PPC64_TPREL16_LO          = 70,
+  R_PPC64_TPREL16_HA          = 72,
   R_PPC64_DTPREL16_LO         = 75,
   R_PPC64_DTPREL16_HA         = 77,
   R_PPC64_GOT_TLSGD16_LO      = 80,
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index d438facfa4e1..0d358498839d 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -37,7 +37,7 @@ namespace detail {
 namespace endian {
 template<typename value_type, endianness endian>
 inline value_type byte_swap(value_type value) {
-  if (endian != native && sys::isBigEndianHost() != (endian == big))
+  if (endian != native && sys::IsBigEndianHost != (endian == big))
     return sys::SwapByteOrder(value);
   return value;
 }
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index ca5dec0173c5..b948d97bff92 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -24,7 +24,8 @@ namespace llvm {
 
   /// An error handler callback.
   typedef void (*fatal_error_handler_t)(void *user_data,
-                                        const std::string& reason);
+                                        const std::string& reason,
+                                        bool gen_crash_diag);
 
   /// install_fatal_error_handler - Installs a new error handler to be used
   /// whenever a serious (non-recoverable) error is encountered by LLVM.
@@ -73,10 +74,14 @@ namespace llvm {
   /// standard error, followed by a newline.
   /// After the error handler is called this function will call exit(1), it 
   /// does not return.
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
+                                                  bool gen_crash_diag = true);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
+                                                  bool gen_crash_diag = true);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
+                                                  bool gen_crash_diag = true);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
+                                                  bool gen_crash_diag = true);
 
   /// This function calls abort(), and prints the optional message to stderr.
   /// Use the llvm_unreachable macro (that adds location info), instead of
diff --git a/include/llvm/Support/FEnv.h b/include/llvm/Support/FEnv.h
index d9cf7247a3a0..8560ee0a8afe 100644
--- a/include/llvm/Support/FEnv.h
+++ b/include/llvm/Support/FEnv.h
@@ -32,7 +32,7 @@ namespace sys {
 
 /// llvm_fenv_clearexcept - Clear the floating-point exception state.
 static inline void llvm_fenv_clearexcept() {
-#ifdef HAVE_FENV_H
+#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
   feclearexcept(FE_ALL_EXCEPT);
 #endif
   errno = 0;
@@ -43,7 +43,7 @@ static inline bool llvm_fenv_testexcept() {
   int errno_val = errno;
   if (errno_val == ERANGE || errno_val == EDOM)
     return true;
-#ifdef HAVE_FENV_H
+#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
   if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
     return true;
 #endif
diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h
index 21635dcfb688..2e4bd5aeca2d 100644
--- a/include/llvm/Support/FormattedStream.h
+++ b/include/llvm/Support/FormattedStream.h
@@ -17,125 +17,125 @@
 
 #include "llvm/Support/raw_ostream.h"
 
-namespace llvm 
-{
-  /// formatted_raw_ostream - Formatted raw_fd_ostream to handle
-  /// asm-specific constructs.
+namespace llvm {
+
+/// formatted_raw_ostream - A raw_ostream that wraps another one and keeps track
+/// of column position, allowing padding out to specific column boundaries.
+///
+class formatted_raw_ostream : public raw_ostream {
+public:
+  /// DELETE_STREAM - Tell the destructor to delete the held stream.
   ///
-  class formatted_raw_ostream : public raw_ostream {
-  public:
-    /// DELETE_STREAM - Tell the destructor to delete the held stream.
-    ///
-    static const bool DELETE_STREAM = true;
-
-    /// PRESERVE_STREAM - Tell the destructor to not delete the held
-    /// stream.
-    ///
-    static const bool PRESERVE_STREAM = false;
-
-  private:
-    /// TheStream - The real stream we output to. We set it to be
-    /// unbuffered, since we're already doing our own buffering.
-    ///
-    raw_ostream *TheStream;
-
-    /// DeleteStream - Do we need to delete TheStream in the
-    /// destructor?
-    ///
-    bool DeleteStream;
-
-    /// ColumnScanned - The current output column of the data that's
-    /// been flushed and the portion of the buffer that's been
-    /// scanned.  The column scheme is zero-based.
-    ///
-    unsigned ColumnScanned;
-
-    /// Scanned - This points to one past the last character in the
-    /// buffer we've scanned.
-    ///
-    const char *Scanned;
-
-    virtual void write_impl(const char *Ptr, size_t Size) LLVM_OVERRIDE;
-
-    /// current_pos - Return the current position within the stream,
-    /// not counting the bytes currently in the buffer.
-    virtual uint64_t current_pos() const LLVM_OVERRIDE {
-      // Our current position in the stream is all the contents which have been
-      // written to the underlying stream (*not* the current position of the
-      // underlying stream).
-      return TheStream->tell();
-    }
-
-    /// ComputeColumn - Examine the given output buffer and figure out which
-    /// column we end up in after output.
-    ///
-    void ComputeColumn(const char *Ptr, size_t size);
-
-  public:
-    /// formatted_raw_ostream - Open the specified file for
-    /// writing. If an error occurs, information about the error is
-    /// put into ErrorInfo, and the stream should be immediately
-    /// destroyed; the string will be empty if no error occurred.
-    ///
-    /// As a side effect, the given Stream is set to be Unbuffered.
-    /// This is because formatted_raw_ostream does its own buffering,
-    /// so it doesn't want another layer of buffering to be happening
-    /// underneath it.
-    ///
-    formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) 
-      : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
-      setStream(Stream, Delete);
-    }
-    explicit formatted_raw_ostream()
-      : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
-      Scanned = 0;
-    }
-
-    ~formatted_raw_ostream() {
-      flush();
-      releaseStream();
-    }
-
-    void setStream(raw_ostream &Stream, bool Delete = false) {
-      releaseStream();
-
-      TheStream = &Stream;
-      DeleteStream = Delete;
-
-      // This formatted_raw_ostream inherits from raw_ostream, so it'll do its
-      // own buffering, and it doesn't need or want TheStream to do another
-      // layer of buffering underneath. Resize the buffer to what TheStream
-      // had been using, and tell TheStream not to do its own buffering.
-      if (size_t BufferSize = TheStream->GetBufferSize())
-        SetBufferSize(BufferSize);
-      else
-        SetUnbuffered();
-      TheStream->SetUnbuffered();
+  static const bool DELETE_STREAM = true;
+
+  /// PRESERVE_STREAM - Tell the destructor to not delete the held
+  /// stream.
+  ///
+  static const bool PRESERVE_STREAM = false;
+
+private:
+  /// TheStream - The real stream we output to. We set it to be
+  /// unbuffered, since we're already doing our own buffering.
+  ///
+  raw_ostream *TheStream;
 
-      Scanned = 0;
-    }
-
-    /// PadToColumn - Align the output to some column number.  If the current
-    /// column is already equal to or more than NewCol, PadToColumn inserts one
-    /// space.
-    ///
-    /// \param NewCol - The column to move to.
-    formatted_raw_ostream &PadToColumn(unsigned NewCol);
-
-  private:
-    void releaseStream() {
-      // Delete the stream if needed. Otherwise, transfer the buffer
-      // settings from this raw_ostream back to the underlying stream.
-      if (!TheStream)
-        return;
-      if (DeleteStream)
-        delete TheStream;
-      else if (size_t BufferSize = GetBufferSize())
-        TheStream->SetBufferSize(BufferSize);
-      else
-        TheStream->SetUnbuffered();
-    }
-  };
+  /// DeleteStream - Do we need to delete TheStream in the
+  /// destructor?
+  ///
+  bool DeleteStream;
+
+  /// ColumnScanned - The current output column of the data that's
+  /// been flushed and the portion of the buffer that's been
+  /// scanned.  The column scheme is zero-based.
+  ///
+  unsigned ColumnScanned;
+
+  /// Scanned - This points to one past the last character in the
+  /// buffer we've scanned.
+  ///
+  const char *Scanned;
+
+  virtual void write_impl(const char *Ptr, size_t Size) LLVM_OVERRIDE;
+
+  /// current_pos - Return the current position within the stream,
+  /// not counting the bytes currently in the buffer.
+  virtual uint64_t current_pos() const LLVM_OVERRIDE {
+    // Our current position in the stream is all the contents which have been
+    // written to the underlying stream (*not* the current position of the
+    // underlying stream).
+    return TheStream->tell();
+  }
+
+  /// ComputeColumn - Examine the given output buffer and figure out which
+  /// column we end up in after output.
+  ///
+  void ComputeColumn(const char *Ptr, size_t size);
+
+public:
+  /// formatted_raw_ostream - Open the specified file for
+  /// writing. If an error occurs, information about the error is
+  /// put into ErrorInfo, and the stream should be immediately
+  /// destroyed; the string will be empty if no error occurred.
+  ///
+  /// As a side effect, the given Stream is set to be Unbuffered.
+  /// This is because formatted_raw_ostream does its own buffering,
+  /// so it doesn't want another layer of buffering to be happening
+  /// underneath it.
+  ///
+  formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) 
+    : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
+    setStream(Stream, Delete);
+  }
+  explicit formatted_raw_ostream()
+    : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
+    Scanned = 0;
+  }
+
+  ~formatted_raw_ostream() {
+    flush();
+    releaseStream();
+  }
+
+  void setStream(raw_ostream &Stream, bool Delete = false) {
+    releaseStream();
+
+    TheStream = &Stream;
+    DeleteStream = Delete;
+
+    // This formatted_raw_ostream inherits from raw_ostream, so it'll do its
+    // own buffering, and it doesn't need or want TheStream to do another
+    // layer of buffering underneath. Resize the buffer to what TheStream
+    // had been using, and tell TheStream not to do its own buffering.
+    if (size_t BufferSize = TheStream->GetBufferSize())
+      SetBufferSize(BufferSize);
+    else
+      SetUnbuffered();
+    TheStream->SetUnbuffered();
+
+    Scanned = 0;
+  }
+
+  /// PadToColumn - Align the output to some column number.  If the current
+  /// column is already equal to or more than NewCol, PadToColumn inserts one
+  /// space.
+  ///
+  /// \param NewCol - The column to move to.
+  formatted_raw_ostream &PadToColumn(unsigned NewCol);
+
+private:
+  void releaseStream() {
+    // Delete the stream if needed. Otherwise, transfer the buffer
+    // settings from this raw_ostream back to the underlying stream.
+    if (!TheStream)
+      return;
+    if (DeleteStream)
+      delete TheStream;
+    else if (size_t BufferSize = GetBufferSize())
+      TheStream->SetBufferSize(BufferSize);
+    else
+      TheStream->SetUnbuffered();
+  }
+};
 
 /// fouts() - This returns a reference to a formatted_raw_ostream for
 /// standard output.  Use it like: fouts() << "foo" << "bar";
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
index 3a4440573910..9a4036a8affb 100644
--- a/include/llvm/Support/Host.h
+++ b/include/llvm/Support/Host.h
@@ -15,23 +15,27 @@
 #define LLVM_SUPPORT_HOST_H
 
 #include "llvm/ADT/StringMap.h"
+
+#if defined(__linux__)
+#include <endian.h>
+#else
+#ifndef LLVM_ON_WIN32
+#include <machine/endian.h>
+#endif
+#endif
+
 #include <string>
 
 namespace llvm {
 namespace sys {
 
-  inline bool isLittleEndianHost() {
-    union {
-      int i;
-      char c;
-    };
-    i = 1;
-    return c;
-  }
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
+  static const bool IsBigEndianHost = true;
+#else
+  static const bool IsBigEndianHost = false;
+#endif
 
-  inline bool isBigEndianHost() {
-    return !isLittleEndianHost();
-  }
+  static const bool IsLittleEndianHost = !IsBigEndianHost;
 
   /// getDefaultTargetTriple() - Return the default target triple the compiler
   /// has been configured to produce code for.
diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h
deleted file mode 100644
index 6d8a9b30ae1f..000000000000
--- a/include/llvm/Support/IRReader.h
+++ /dev/null
@@ -1,112 +0,0 @@
-//===---- llvm/Support/IRReader.h - Reader for LLVM IR files ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines functions for reading LLVM IR. They support both
-// Bitcode and Assembly, automatically detecting the input format.
-//
-// These functions must be defined in a header file in order to avoid
-// library dependencies, since they reference both Bitcode and Assembly
-// functions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_IRREADER_H
-#define LLVM_SUPPORT_IRREADER_H
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Assembly/Parser.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/system_error.h"
-
-namespace llvm {
-
-  /// If the given MemoryBuffer holds a bitcode image, return a Module for it
-  /// which does lazy deserialization of function bodies.  Otherwise, attempt to
-  /// parse it as LLVM Assembly and return a fully populated Module. This
-  /// function *always* takes ownership of the given MemoryBuffer.
-  inline Module *getLazyIRModule(MemoryBuffer *Buffer,
-                                 SMDiagnostic &Err,
-                                 LLVMContext &Context) {
-    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
-                  (const unsigned char *)Buffer->getBufferEnd())) {
-      std::string ErrMsg;
-      Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
-      if (M == 0) {
-        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
-                           ErrMsg);
-        // ParseBitcodeFile does not take ownership of the Buffer in the
-        // case of an error.
-        delete Buffer;
-      }
-      return M;
-    }
-
-    return ParseAssembly(Buffer, 0, Err, Context);
-  }
-
-  /// If the given file holds a bitcode image, return a Module
-  /// for it which does lazy deserialization of function bodies.  Otherwise,
-  /// attempt to parse it as LLVM Assembly and return a fully populated
-  /// Module.
-  inline Module *getLazyIRFileModule(const std::string &Filename,
-                                     SMDiagnostic &Err,
-                                     LLVMContext &Context) {
-    OwningPtr<MemoryBuffer> File;
-    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
-      Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
-                         "Could not open input file: " + ec.message());
-      return 0;
-    }
-
-    return getLazyIRModule(File.take(), Err, Context);
-  }
-
-  /// If the given MemoryBuffer holds a bitcode image, return a Module
-  /// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
-  /// a Module for it. This function *always* takes ownership of the given
-  /// MemoryBuffer.
-  inline Module *ParseIR(MemoryBuffer *Buffer,
-                         SMDiagnostic &Err,
-                         LLVMContext &Context) {
-    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
-                  (const unsigned char *)Buffer->getBufferEnd())) {
-      std::string ErrMsg;
-      Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
-      if (M == 0)
-        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
-                           ErrMsg);
-      // ParseBitcodeFile does not take ownership of the Buffer.
-      delete Buffer;
-      return M;
-    }
-
-    return ParseAssembly(Buffer, 0, Err, Context);
-  }
-
-  /// If the given file holds a bitcode image, return a Module for it.
-  /// Otherwise, attempt to parse it as LLVM Assembly and return a Module
-  /// for it.
-  inline Module *ParseIRFile(const std::string &Filename,
-                             SMDiagnostic &Err,
-                             LLVMContext &Context) {
-    OwningPtr<MemoryBuffer> File;
-    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
-      Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
-                         "Could not open input file: " + ec.message());
-      return 0;
-    }
-
-    return ParseIR(File.take(), Err, Context);
-  }
-
-}
-
-#endif
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index a0cc27c0241c..fb177de97b40 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_SUPPORT_PROGRAM_H
 #define LLVM_SUPPORT_PROGRAM_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Path.h"
 
 namespace llvm {
@@ -125,7 +126,8 @@ namespace sys {
                               const sys::Path** redirects = 0,
                               unsigned secondsToWait = 0,
                               unsigned memoryLimit = 0,
-                              std::string* ErrMsg = 0);
+                              std::string* ErrMsg = 0,
+                              bool *ExecutionFailed = 0);
 
     /// A convenience function equivalent to Program prg; prg.Execute(..);
     /// @see Execute
@@ -139,6 +141,10 @@ namespace sys {
     /// @}
 
   };
+
+  // Return true if the given arguments fit within system-specific
+  // argument length limits.
+  bool argumentsFitWithinSystemLimits(ArrayRef<const char*> Args);
 }
 }
 
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index db44995d95b4..b49341c3ffb6 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -20,6 +20,7 @@
 
 namespace llvm {
 class ValueHandleBase;
+template<typename From> struct simplify_type;
 
 // ValueHandleBase** is only 4-byte aligned.
 template<>
@@ -162,14 +163,12 @@ class WeakVH : public ValueHandleBase {
 
 // Specialize simplify_type to allow WeakVH to participate in
 // dyn_cast, isa, etc.
-template<typename From> struct simplify_type;
-template<> struct simplify_type<const WeakVH> {
+template<> struct simplify_type<WeakVH> {
   typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const WeakVH &WVH) {
-    return static_cast<Value *>(WVH);
+  static SimpleType getSimplifiedValue(WeakVH &WVH) {
+    return WVH;
   }
 };
-template<> struct simplify_type<WeakVH> : public simplify_type<const WeakVH> {};
 
 /// AssertingVH - This is a Value Handle that points to a value and asserts out
 /// if the value is destroyed while the handle is still live.  This is very
@@ -236,18 +235,6 @@ class AssertingVH
   ValueTy &operator*() const { return *getValPtr(); }
 };
 
-// Specialize simplify_type to allow AssertingVH to participate in
-// dyn_cast, isa, etc.
-template<typename From> struct simplify_type;
-template<> struct simplify_type<const AssertingVH<Value> > {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const AssertingVH<Value> &AVH) {
-    return static_cast<Value *>(AVH);
-  }
-};
-template<> struct simplify_type<AssertingVH<Value> >
-  : public simplify_type<const AssertingVH<Value> > {};
-
 // Specialize DenseMapInfo to allow AssertingVH to participate in DenseMap.
 template<typename T>
 struct DenseMapInfo<AssertingVH<T> > {
@@ -345,18 +332,6 @@ class TrackingVH : public ValueHandleBase {
   ValueTy &operator*() const { return *getValPtr(); }
 };
 
-// Specialize simplify_type to allow TrackingVH to participate in
-// dyn_cast, isa, etc.
-template<typename From> struct simplify_type;
-template<> struct simplify_type<const TrackingVH<Value> > {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const TrackingVH<Value> &AVH) {
-    return static_cast<Value *>(AVH);
-  }
-};
-template<> struct simplify_type<TrackingVH<Value> >
-  : public simplify_type<const TrackingVH<Value> > {};
-
 /// CallbackVH - This is a value handle that allows subclasses to define
 /// callbacks that run when the underlying Value has RAUW called on it or is
 /// destroyed.  This class can be used as the key of a map, as long as the user
@@ -399,18 +374,6 @@ class CallbackVH : public ValueHandleBase {
   virtual void allUsesReplacedWith(Value *);
 };
 
-// Specialize simplify_type to allow CallbackVH to participate in
-// dyn_cast, isa, etc.
-template<typename From> struct simplify_type;
-template<> struct simplify_type<const CallbackVH> {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const CallbackVH &CVH) {
-    return static_cast<Value *>(CVH);
-  }
-};
-template<> struct simplify_type<CallbackVH>
-  : public simplify_type<const CallbackVH> {};
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/Watchdog.h b/include/llvm/Support/Watchdog.h
new file mode 100644
index 000000000000..b58496b2fb8e
--- /dev/null
+++ b/include/llvm/Support/Watchdog.h
@@ -0,0 +1,38 @@
+//===--- Watchdog.h - Watchdog timer ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file declares the llvm::sys::Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_WATCHDOG_H
+#define LLVM_SUPPORT_WATCHDOG_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  namespace sys {
+
+    /// This class provides an abstraction for a timeout around an operation
+    /// that must complete in a given amount of time. Failure to complete before
+    /// the timeout is an unrecoverable situation and no mechanisms to attempt
+    /// to handle it are provided.
+    class Watchdog {
+    public:
+      Watchdog(unsigned int seconds);
+      ~Watchdog();
+    private:
+      // Noncopyable.
+      Watchdog(const Watchdog &other) LLVM_DELETED_FUNCTION;
+      Watchdog &operator=(const Watchdog &other) LLVM_DELETED_FUNCTION;
+    };
+  }
+}
+
+#endif
diff --git a/include/llvm/Support/Win64EH.h b/include/llvm/Support/Win64EH.h
index 164aca16bf3a..ecce71368041 100644
--- a/include/llvm/Support/Win64EH.h
+++ b/include/llvm/Support/Win64EH.h
@@ -106,12 +106,17 @@ struct UnwindInfo {
     return reinterpret_cast<void *>(&UnwindCodes[(NumCodes+1) & ~1]);
   }
 
-  /// \brief Return image-relativ offset of language-specific exception handler.
-  uint32_t getLanguageSpecificHandlerOffset() {
-    return *reinterpret_cast<uint32_t *>(getLanguageSpecificData());
+  /// \brief Return pointer to language specific data part of UnwindInfo.
+  const void *getLanguageSpecificData() const {
+    return reinterpret_cast<const void *>(&UnwindCodes[(NumCodes+1) & ~1]);
+  }
+
+  /// \brief Return image-relative offset of language-specific exception handler.
+  uint32_t getLanguageSpecificHandlerOffset() const {
+    return *reinterpret_cast<const uint32_t *>(getLanguageSpecificData());
   }
 
-  /// \brief Set image-relativ offset of language-specific exception handler.
+  /// \brief Set image-relative offset of language-specific exception handler.
   void setLanguageSpecificHandlerOffset(uint32_t offset) {
     *reinterpret_cast<uint32_t *>(getLanguageSpecificData()) = offset;
   }
@@ -126,6 +131,11 @@ struct UnwindInfo {
   RuntimeFunction *getChainedFunctionEntry() {
     return reinterpret_cast<RuntimeFunction *>(getLanguageSpecificData());
   }
+
+  /// \brief Return pointer to chained unwind info.
+  const RuntimeFunction *getChainedFunctionEntry() const {
+    return reinterpret_cast<const RuntimeFunction *>(getLanguageSpecificData());
+  }
 };
 
 
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index db43ccfece14..906e97c91fb6 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -209,6 +209,26 @@ template <typename T> struct remove_pointer<T*volatile> { typedef T type; };
 template <typename T> struct remove_pointer<T*const volatile> {
     typedef T type; };
 
+// If T is a pointer, just return it. If it is not, return T&.
+template<typename T, typename Enable = void>
+struct add_lvalue_reference_if_not_pointer { typedef T &type; };
+
+template<typename T>
+struct add_lvalue_reference_if_not_pointer<T,
+                                     typename enable_if<is_pointer<T> >::type> {
+  typedef T type;
+};
+
+// If T is a pointer to X, return a pointer to const X. If it is not, return
+// const T.
+template<typename T, typename Enable = void>
+struct add_const_past_pointer { typedef const T type; };
+
+template<typename T>
+struct add_const_past_pointer<T, typename enable_if<is_pointer<T> >::type> {
+  typedef const typename remove_pointer<T>::type *type;
+};
+
 template <bool, typename T, typename F>
 struct conditional { typedef T type; };
 
diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h
index 2f6b7e625c3d..2d0a2b45a96a 100644
--- a/include/llvm/TableGen/Error.h
+++ b/include/llvm/TableGen/Error.h
@@ -32,6 +32,7 @@ LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
                                              const std::string &Msg);
 
 extern SourceMgr SrcMgr;
+extern unsigned ErrorsPrinted;
 
 
 } // end namespace "llvm"
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index deee2eb6debf..7de8b384c353 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -790,8 +790,8 @@ class AsmParser {
   // This can be used to perform target specific instruction post-processing.
   string AsmParserInstCleanup  = "";
 
-  //ShouldEmitMatchRegisterName - Set to false if the target needs a hand
-  //written register name matcher
+  // ShouldEmitMatchRegisterName - Set to false if the target needs a hand
+  // written register name matcher
   bit ShouldEmitMatchRegisterName = 1;
 }
 def DefaultAsmParser : AsmParser;
@@ -807,6 +807,9 @@ class AsmParserVariant {
   // assembly language.
   int Variant = 0;
 
+  // Name - The AsmParser variant name (e.g., AT&T vs Intel).
+  string Name = "";
+
   // CommentDelimiter - If given, the delimiter string used to recognize
   // comments which are hard coded in the .td assembler strings for individual
   // instructions.
@@ -860,9 +863,16 @@ class TokenAlias<string From, string To> {
 ///  def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
 ///  def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
 ///
-class MnemonicAlias<string From, string To> {
+/// Mnemonic aliases can also be constrained to specific variants, e.g.:
+///
+///  def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>;
+///
+/// If no variant (e.g., "att" or "intel") is specified then the alias is
+/// applied unconditionally.
+class MnemonicAlias<string From, string To, string VariantName = ""> {
   string FromMnemonic = From;
   string ToMnemonic = To;
+  string AsmVariantName = VariantName;
 
   // Predicates - Predicates that must be true for this remapping to happen.
   list<Predicate> Predicates = [];
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index fb311d850856..d49ce1ce7f46 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -364,11 +364,10 @@ class TargetInstrInfo : public MCInstrInfo {
   /// condition code in Cond.
   ///
   /// When successful, also return the latency in cycles from TrueReg,
-  /// FalseReg, and Cond to the destination register. The Cond latency should
-  /// compensate for a conditional branch being removed. For example, if a
-  /// conditional branch has a 3 cycle latency from the condition code read,
-  /// and a cmov instruction has a 2 cycle latency from the condition code
-  /// read, CondCycles should be returned as -1.
+  /// FalseReg, and Cond to the destination register. In most cases, a select
+  /// instruction will be 1 cycle, so CondCycles = TrueCycles = FalseCycles = 1
+  ///
+  /// Some x86 implementations have 2-cycle cmov instructions.
   ///
   /// @param MBB         Block where select instruction would be inserted.
   /// @param Cond        Condition returned by AnalyzeBranch.
@@ -775,6 +774,10 @@ class TargetInstrInfo : public MCInstrInfo {
 
   /// FoldImmediate - 'Reg' is known to be defined by a move immediate
   /// instruction, try to fold the immediate into the use instruction.
+  /// If MRI->hasOneNonDBGUse(Reg) is true, and this function returns true,
+  /// then the caller may assume that DefMI has been erased from its parent
+  /// block. The caller may assume that it will not be erased by this
+  /// function otherwise.
   virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
                              unsigned Reg, MachineRegisterInfo *MRI) const {
     return false;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 5d74848b71c7..e169bcf9e420 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -135,6 +135,11 @@ class TargetLoweringBase {
                               const TargetLoweringObjectFile *TLOF);
   virtual ~TargetLoweringBase();
 
+protected:
+  /// \brief Initialize all of the actions to default values.
+  void initActions();
+
+public:
   const TargetMachine &getTargetMachine() const { return TM; }
   const DataLayout *getDataLayout() const { return TD; }
   const TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; }
@@ -694,13 +699,6 @@ class TargetLoweringBase {
     return false;
   }
 
-  /// This function returns true if the target would benefit from code placement
-  /// optimization.
-  /// @brief Determine if the target should perform code placement optimization.
-  bool shouldOptimizeCodePlacement() const {
-    return BenefitFromCodePlacementOpt;
-  }
-
   /// getOptimalMemOpType - Returns the target specific optimal type for load
   /// and store operations as a result of memset, memcpy, and memmove
   /// lowering. If DstAlign is zero that means it's safe to destination
@@ -858,6 +856,9 @@ class TargetLoweringBase {
   // the derived class constructor to configure this object for the target.
   //
 
+  /// \brief Reset the operation actions based on target options.
+  virtual void resetOperationActions() {}
+
 protected:
   /// setBooleanContents - Specify how the target extends the result of a
   /// boolean value from i1 to a wider type.  See getBooleanContents.
@@ -958,13 +959,17 @@ class TargetLoweringBase {
     RegClassForVT[VT.SimpleTy] = RC;
   }
 
-  /// clearRegisterClasses - remove all register classes
+  /// clearRegisterClasses - Remove all register classes.
   void clearRegisterClasses() {
-    for (unsigned i = 0 ; i<array_lengthof(RegClassForVT); i++)
-      RegClassForVT[i] = 0;
+    memset(RegClassForVT, 0,MVT::LAST_VALUETYPE * sizeof(TargetRegisterClass*));
+
     AvailableRegClasses.clear();
   }
 
+  /// \brief Remove all operation actions.
+  void clearOperationActions() {
+  }
+
   /// findRepresentativeClass - Return the largest legal super-reg register class
   /// of the register class for the specified type and its associated "cost".
   virtual std::pair<const TargetRegisterClass*, uint8_t>
@@ -1518,6 +1523,7 @@ class TargetLoweringBase {
       // or until the element integer type is too big. If a legal type was not
       // found, fallback to the usual mechanism of widening/splitting the
       // vector.
+      EVT OldEltVT = EltVT;
       while (1) {
         // Increase the bitwidth of the element to the next pow-of-two
         // (which is greater than 8 bits).
@@ -1536,6 +1542,10 @@ class TargetLoweringBase {
           return LegalizeKind(TypePromoteInteger,
                               EVT::getVectorVT(Context, EltVT, NumElts));
       }
+
+      // Reset the type to the unexpanded type if we did not find a legal vector
+      // type with a promoted vector element type.
+      EltVT = OldEltVT;
     }
 
     // Try to widen the vector until a legal type is found.
@@ -1644,10 +1654,6 @@ class TargetLoweringBase {
   /// to memmove, used for functions with OpSize attribute.
   unsigned MaxStoresPerMemmoveOptSize;
 
-  /// This field specifies whether the target can benefit from code placement
-  /// optimization.
-  bool BenefitFromCodePlacementOpt;
-
   /// PredictableSelectIsExpensive - Tells the code generator that select is
   /// more expensive than a branch if the branch is usually predicted right.
   bool PredictableSelectIsExpensive;
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index c31db244900e..5041af84efb3 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -208,6 +208,11 @@ namespace llvm {
     /// the value of this option.
     FPOpFusion::FPOpFusionMode AllowFPOpFusion;
 
+    /// The size of the stack segment to use for functions with fixed-size
+    /// stack segments, in bytes.
+    unsigned FixedStackSegmentSize;
+
+    bool operator==(const TargetOptions &);
   };
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 209f68db6fd9..563721e12825 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -103,7 +103,8 @@ class PassManagerBuilder {
   bool DisableSimplifyLibCalls;
   bool DisableUnitAtATime;
   bool DisableUnrollLoops;
-  bool Vectorize;
+  bool BBVectorize;
+  bool SLPVectorize;
   bool LoopVectorize;
 
 private:
diff --git a/include/llvm/Transforms/Utils/BlackList.h b/include/llvm/Transforms/Utils/BlackList.h
index f19470e19d8a..316b364845ca 100644
--- a/include/llvm/Transforms/Utils/BlackList.h
+++ b/include/llvm/Transforms/Utils/BlackList.h
@@ -20,6 +20,7 @@
 // global-init:*global_with_initialization_issues*
 // global-init-type:*Namespace::ClassName*
 // src:file_with_tricky_code.cc
+// global-init-src:ignore-global-initializers-issues.cc
 // ---
 // Note that the wild card is in fact an llvm::Regex, but * is automatically
 // replaced with .*
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index d205dbdede2e..8d0db1611609 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -116,6 +116,12 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
 //
 Pass *createLoopVectorizePass();
 
+//===----------------------------------------------------------------------===//
+//
+// SLPVectorizer - Create a bottom-up SLP vectorizer pass.
+//
+Pass *createSLPVectorizerPass();
+
 //===----------------------------------------------------------------------===//
 /// @brief Vectorize the BasicBlock.
 ///
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index e58dde3d93bf..a571463dfe12 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -44,6 +44,8 @@ static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
 static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
 static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
 
+static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden);
+
 namespace {
   class AAEval : public FunctionPass {
     unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
@@ -123,6 +125,15 @@ PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
   }
 }
 
+static inline void
+PrintLoadStoreResults(const char *Msg, bool P, const Value *V1,
+                      const Value *V2, const Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ": " << *V1
+           << " <-> " << *V2 << '\n';
+  }
+}
+
 static inline bool isInterestingPointer(Value *V) {
   return V->getType()->isPointerTy()
       && !isa<ConstantPointerNull>(V);
@@ -133,6 +144,8 @@ bool AAEval::runOnFunction(Function &F) {
 
   SetVector<Value *> Pointers;
   SetVector<CallSite> CallSites;
+  SetVector<Value *> Loads;
+  SetVector<Value *> Stores;
 
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
     if (I->getType()->isPointerTy())    // Add all pointer arguments.
@@ -141,6 +154,10 @@ bool AAEval::runOnFunction(Function &F) {
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
     if (I->getType()->isPointerTy()) // Add all pointer instructions.
       Pointers.insert(&*I);
+    if (EvalTBAA && isa<LoadInst>(&*I))
+      Loads.insert(&*I);
+    if (EvalTBAA && isa<StoreInst>(&*I))
+      Stores.insert(&*I);
     Instruction &Inst = *I;
     if (CallSite CS = cast<Value>(&Inst)) {
       Value *Callee = CS.getCalledValue();
@@ -197,6 +214,61 @@ bool AAEval::runOnFunction(Function &F) {
     }
   }
 
+  if (EvalTBAA) {
+    // iterate over all pairs of load, store
+    for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end();
+         I1 != E; ++I1) {
+      for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end();
+           I2 != E2; ++I2) {
+        switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)),
+                         AA.getLocation(cast<StoreInst>(*I2)))) {
+        case AliasAnalysis::NoAlias:
+          PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+                                F.getParent());
+          ++NoAlias; break;
+        case AliasAnalysis::MayAlias:
+          PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+                                F.getParent());
+          ++MayAlias; break;
+        case AliasAnalysis::PartialAlias:
+          PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                                F.getParent());
+          ++PartialAlias; break;
+        case AliasAnalysis::MustAlias:
+          PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+                                F.getParent());
+          ++MustAlias; break;
+        }
+      }
+    }
+
+    // iterate over all pairs of store, store
+    for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
+         I1 != E; ++I1) {
+      for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
+        switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)),
+                         AA.getLocation(cast<StoreInst>(*I2)))) {
+        case AliasAnalysis::NoAlias:
+          PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+                                F.getParent());
+          ++NoAlias; break;
+        case AliasAnalysis::MayAlias:
+          PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+                                F.getParent());
+          ++MayAlias; break;
+        case AliasAnalysis::PartialAlias:
+          PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                                F.getParent());
+          ++PartialAlias; break;
+        case AliasAnalysis::MustAlias:
+          PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+                                F.getParent());
+          ++MustAlias; break;
+        }
+      }
+    }
+  }
+
   // Mod/ref alias analysis: compare all pairs of calls and values
   for (SetVector<CallSite>::iterator C = CallSites.begin(),
          Ce = CallSites.end(); C != Ce; ++C) {
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 4139336f268b..f8509dd070ff 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
   uint64_t Size;
-  if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+  if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -98,6 +98,35 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
 static bool isObjectSmallerThan(const Value *V, uint64_t Size,
                                 const DataLayout &TD,
                                 const TargetLibraryInfo &TLI) {
+  // Note that the meanings of the "object" are slightly different in the
+  // following contexts:
+  //    c1: llvm::getObjectSize()
+  //    c2: llvm.objectsize() intrinsic
+  //    c3: isObjectSmallerThan()
+  // c1 and c2 share the same meaning; however, the meaning of "object" in c3
+  // refers to the "entire object".
+  //
+  //  Consider this example:
+  //     char *p = (char*)malloc(100)
+  //     char *q = p+80;
+  //
+  //  In the context of c1 and c2, the "object" pointed by q refers to the
+  // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20.
+  //
+  //  However, in the context of c3, the "object" refers to the chunk of memory
+  // being allocated. So, the "object" has 100 bytes, and q points to the middle
+  // the "object". In case q is passed to isObjectSmallerThan() as the 1st
+  // parameter, before the llvm::getObjectSize() is called to get the size of
+  // entire object, we should:
+  //    - either rewind the pointer q to the base-address of the object in
+  //      question (in this case rewind to p), or
+  //    - just give up. It is up to caller to make sure the pointer is pointing
+  //      to the base address the object.
+  // 
+  // We go for 2nd option for simplicity.
+  if (!isIdentifiedObject(V))
+    return false;
+
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
   uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
@@ -851,9 +880,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   // pointers, figure out if the indexes to the GEP tell us anything about the
   // derived pointer.
   if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+    // Do the base pointers alias?
+    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+                                       UnderlyingV2, UnknownSize, 0);
+
     // Check for geps of non-aliasing underlying pointers where the offsets are
     // identical.
-    if (V1Size == V2Size) {
+    if ((BaseAlias == MayAlias) && V1Size == V2Size) {
       // Do the base pointers alias assuming type and size.
       AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
                                                 V1TBAAInfo, UnderlyingV2,
@@ -881,10 +914,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         GEP1VariableIndices.clear();
       }
     }
-
-    // Do the base pointers alias?
-    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
-                                       UnderlyingV2, UnknownSize, 0);
     
     // If we get a No or May, then return it immediately, no amount of analysis
     // will improve this situation.
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 09d7608c51da..bc0dffc47362 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -550,7 +551,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
 
   if (Opc == Instruction::And && DL) {
-    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType());
+    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
     APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
     APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
     ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
@@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
 }
 
-/// ConstantFoldConstantExpression - Attempt to fold the constant expression
-/// using the specified DataLayout.  If successful, the constant result is
-/// result is returned, if not, null is returned.
-Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const DataLayout *TD,
-                                               const TargetLibraryInfo *TLI) {
-  SmallVector<Constant*, 8> Ops;
-  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
-       i != e; ++i) {
+static Constant *
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
+                                   const TargetLibraryInfo *TLI,
+                                   SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
+  SmallVector<Constant *, 8> Ops;
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
+       ++i) {
     Constant *NewC = cast<Constant>(*i);
-    // Recursively fold the ConstantExpr's operands.
-    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
-      NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
+    // Recursively fold the ConstantExpr's operands. If we have already folded
+    // a ConstantExpr, we don't have to process it again.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
+      if (FoldedOps.insert(NewCE))
+        NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
+    }
     Ops.push_back(NewC);
   }
 
@@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
   return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
 }
 
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified DataLayout.  If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+                                               const DataLayout *TD,
+                                               const TargetLibraryInfo *TLI) {
+  SmallPtrSet<ConstantExpr *, 4> FoldedOps;
+  return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
+}
+
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
 /// specified opcode and operands.  If successful, the constant result is
 /// returned, if not, null is returned.  Note that this function can fail when
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index df70d157b41d..98a7780ad9a6 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -88,6 +88,23 @@ static bool isReverseVectorMask(SmallVector<int, 16> &Mask) {
   return true;
 }
 
+static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
+  TargetTransformInfo::OperandValueKind OpInfo =
+    TargetTransformInfo::OK_AnyValue;
+
+  // Check for a splat of a constant.
+  ConstantDataVector *CDV = 0;
+  if ((CDV = dyn_cast<ConstantDataVector>(V)))
+    if (CDV->getSplatValue() != NULL)
+      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+  ConstantVector *CV = 0;
+  if ((CV = dyn_cast<ConstantVector>(V)))
+    if (CV->getSplatValue() != NULL)
+      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+
+  return OpInfo;
+}
+
 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   if (!TTI)
     return -1;
@@ -121,7 +138,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
-    return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType());
+    TargetTransformInfo::OperandValueKind Op1VK =
+      getOperandInfo(I->getOperand(0));
+    TargetTransformInfo::OperandValueKind Op2VK =
+      getOperandInfo(I->getOperand(1));
+    return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
+                                       Op2VK);
   }
   case Instruction::Select: {
     const SelectInst *SI = cast<SelectInst>(I);
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 4a3c74e9db35..bf7745143da7 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1711,7 +1711,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
 //    subobject at its beginning) or function, both are pointers to one past the
 //    last element of the same array object, or one is a pointer to one past the
 //    end of one array object and the other is a pointer to the start of a
-//    different array object that happens to immediately follow the ﬁrst array
+//    different array object that happens to immediately follow the first array
 //    object in the address space.)
 //
 // C11's version is more restrictive, however there's no reason why an argument
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index d490d5419f75..9c0d8ac6a3e2 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -364,26 +364,6 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
   return true;
 }
 
-/// \brief Compute the size of the underlying object pointed by Ptr. Returns
-/// true and the object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
-                                   const DataLayout *TD,
-                                   const TargetLibraryInfo *TLI,
-                                   bool RoundToAlign) {
-  if (!TD)
-    return false;
-
-  ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
-  SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
-  if (!Visitor.knownSize(Data))
-    return false;
-
-  Size = Data.first.getZExtValue();
-  return true;
-}
-
 
 STATISTIC(ObjectVisitorArgument,
           "Number of arguments with unsolved size and offset");
@@ -409,23 +389,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   V = V->stripPointerCasts();
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If we have already seen this instruction, bail out. Cycles can happen in
+    // unreachable code after constant propagation.
+    if (!SeenInsts.insert(I))
+      return unknown();
 
-  if (isa<Instruction>(V) || isa<GEPOperator>(V)) {
-    // Return cached value or insert unknown in cache if size of V was not
-    // computed yet in order to avoid recursions in PHis.
-    std::pair<CacheMapTy::iterator, bool> CacheVal =
-      CacheMap.insert(std::make_pair(V, unknown()));
-    if (!CacheVal.second)
-      return CacheVal.first->second;
-
-    SizeOffsetType Result;
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-      Result = visitGEPOperator(*GEP);
-    else
-      Result = visit(cast<Instruction>(*V));
-    return CacheMap[V] = Result;
+      return visitGEPOperator(*GEP);
+    return visit(*I);
   }
-
   if (Argument *A = dyn_cast<Argument>(V))
     return visitArgument(*A);
   if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
@@ -439,6 +412,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == Instruction::IntToPtr)
       return unknown(); // clueless
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      return visitGEPOperator(cast<GEPOperator>(*CE));
   }
 
   DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
@@ -572,21 +547,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
   return unknown();
 }
 
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) {
-  if (PHI.getNumIncomingValues() == 0)
-    return unknown();
-
-  SizeOffsetType Ret = compute(PHI.getIncomingValue(0));
-  if (!bothKnown(Ret))
-    return unknown();
-
-  // Verify that all PHI incoming pointers have the same size and offset.
-  for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) {
-    SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i));
-    if (!bothKnown(EdgeData) || EdgeData != Ret)
-      return unknown();
-  }
-  return Ret;
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
+  // too complex to analyze statically.
+  return unknown();
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 1faa04623ef7..0e8529a4b070 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements an analysis that determines, for a given memory
-// operation, what preceding memory operations it depends on.  It builds on 
+// operation, what preceding memory operations it depends on.  It builds on
 // alias analysis information, and tries to provide a lazy, caching interface to
 // a common kind of alias information query.
 //
@@ -47,12 +47,10 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
           "Number of block queries that were completely cached");
 
 // Limit for the number of instructions to scan in a block.
-// FIXME: Figure out what a sane value is for this.
-//        (500 is relatively insane.)
-static const int BlockScanLimit = 500;
+static const int BlockScanLimit = 100;
 
 char MemoryDependenceAnalysis::ID = 0;
-  
+
 // Register this pass...
 INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
                 "Memory Dependence Analysis", false, true)
@@ -99,7 +97,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) {
 /// RemoveFromReverseMap - This is a helper function that removes Val from
 /// 'Inst's set in ReverseMap.  If the set becomes empty, remove Inst's entry.
 template <typename KeyTy>
-static void RemoveFromReverseMap(DenseMap<Instruction*, 
+static void RemoveFromReverseMap(DenseMap<Instruction*,
                                  SmallPtrSet<KeyTy, 4> > &ReverseMap,
                                  Instruction *Inst, KeyTy Val) {
   typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
@@ -123,7 +121,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
     if (LI->isUnordered()) {
       Loc = AA->getLocation(LI);
       return AliasAnalysis::Ref;
-    } else if (LI->getOrdering() == Monotonic) {
+    }
+    if (LI->getOrdering() == Monotonic) {
       Loc = AA->getLocation(LI);
       return AliasAnalysis::ModRef;
     }
@@ -135,7 +134,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
     if (SI->isUnordered()) {
       Loc = AA->getLocation(SI);
       return AliasAnalysis::Mod;
-    } else if (SI->getOrdering() == Monotonic) {
+    }
+    if (SI->getOrdering() == Monotonic) {
       Loc = AA->getLocation(SI);
       return AliasAnalysis::ModRef;
     }
@@ -196,13 +196,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
   // Walk backwards through the block, looking for dependencies
   while (ScanIt != BB->begin()) {
     // Limit the amount of scanning we do so we don't end up with quadratic
-    // running time on extreme testcases. 
+    // running time on extreme testcases.
     --Limit;
     if (!Limit)
       return MemDepResult::getUnknown();
 
     Instruction *Inst = --ScanIt;
-    
+
     // If this inst is a memory op, get the pointer it accessed
     AliasAnalysis::Location Loc;
     AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
@@ -251,7 +251,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 ///
 /// MemLocBase, MemLocOffset are lazily computed here the first time the
 /// base/offs of memloc is needed.
-static bool 
+static bool
 isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
                                        const Value *&MemLocBase,
                                        int64_t &MemLocOffs,
@@ -289,25 +289,25 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
   if (LI->getParent()->getParent()->getAttributes().
       hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
     return 0;
-  
+
   // Get the base of this load.
   int64_t LIOffs = 0;
-  const Value *LIBase = 
+  const Value *LIBase =
     GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD);
-  
+
   // If the two pointers are not based on the same pointer, we can't tell that
   // they are related.
   if (LIBase != MemLocBase) return 0;
-  
+
   // Okay, the two values are based on the same pointer, but returned as
   // no-alias.  This happens when we have things like two byte loads at "P+1"
   // and "P+3".  Check to see if increasing the size of the "LI" load up to its
   // alignment (or the largest native integer type) will allow us to load all
   // the bits required by MemLoc.
-  
+
   // If MemLoc is before LI, then no widening of LI will help us out.
   if (MemLocOffs < LIOffs) return 0;
-  
+
   // Get the alignment of the load in bytes.  We assume that it is safe to load
   // any legal integer up to this size without a problem.  For example, if we're
   // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
@@ -316,15 +316,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
   unsigned LoadAlign = LI->getAlignment();
 
   int64_t MemLocEnd = MemLocOffs+MemLocSize;
-  
+
   // If no amount of rounding up will let MemLoc fit into LI, then bail out.
   if (LIOffs+LoadAlign < MemLocEnd) return 0;
-  
+
   // This is the size of the load to try.  Start with the next larger power of
   // two.
   unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
   NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
-  
+
   while (1) {
     // If this load size is bigger than our known alignment or would not fit
     // into a native integer register, then we fail.
@@ -343,7 +343,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
     // If a load of this width would include all of MemLoc, then we succeed.
     if (LIOffs+NewLoadByteSize >= MemLocEnd)
       return NewLoadByteSize;
-    
+
     NewLoadByteSize <<= 1;
   }
 }
@@ -355,7 +355,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
 /// instruction as well; this function may take advantage of the metadata
 /// annotated to the query instruction to refine the result.
 MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
                          BasicBlock::iterator ScanIt, BasicBlock *BB,
                          Instruction *QueryInst) {
 
@@ -382,7 +382,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
       // Debug intrinsics don't (and can't) cause dependences.
       if (isa<DbgInfoIntrinsic>(II)) continue;
-      
+
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
       if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
@@ -406,10 +406,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         return MemDepResult::getClobber(LI);
 
       AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
-      
+
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
-      
+
       if (isLoad) {
         if (R == AliasAnalysis::NoAlias) {
           // If this is an over-aligned integer load (for example,
@@ -423,10 +423,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
                 isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
                                                        MemLocOffset, LI, TD))
               return MemDepResult::getClobber(Inst);
-          
+
           continue;
         }
-        
+
         // Must aliased loads are defs of each other.
         if (R == AliasAnalysis::MustAlias)
           return MemDepResult::getDef(Inst);
@@ -441,7 +441,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         if (R == AliasAnalysis::PartialAlias)
           return MemDepResult::getClobber(Inst);
 #endif
-        
+
         // Random may-alias loads don't depend on each other without a
         // dependence.
         continue;
@@ -458,7 +458,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       // Stores depend on may/must aliased loads.
       return MemDepResult::getDef(Inst);
     }
-    
+
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       // Atomic stores have complications involved.
       // FIXME: This is overly conservative.
@@ -474,10 +474,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       // Ok, this store might clobber the query pointer.  Check to see if it is
       // a must alias: in this case, we want to return this as a def.
       AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
-      
+
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
-      
+
       if (R == AliasAnalysis::NoAlias)
         continue;
       if (R == AliasAnalysis::MustAlias)
@@ -498,7 +498,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
     if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
       const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
-      
+
       if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
         return MemDepResult::getDef(Inst);
       // Be conservative if the accessed pointer may alias the allocation.
@@ -532,7 +532,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       return MemDepResult::getClobber(Inst);
     }
   }
-  
+
   // No dependence found.  If this is the entry block of the function, it is
   // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
@@ -544,25 +544,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
 /// depends.
 MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
   Instruction *ScanPos = QueryInst;
-  
+
   // Check for a cached result
   MemDepResult &LocalCache = LocalDeps[QueryInst];
-  
+
   // If the cached entry is non-dirty, just return it.  Note that this depends
   // on MemDepResult's default constructing to 'dirty'.
   if (!LocalCache.isDirty())
     return LocalCache;
-    
+
   // Otherwise, if we have a dirty entry, we know we can start the scan at that
   // instruction, which may save us some work.
   if (Instruction *Inst = LocalCache.getInst()) {
     ScanPos = Inst;
-   
+
     RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
   }
-  
+
   BasicBlock *QueryParent = QueryInst->getParent();
-  
+
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
     // No dependence found.  If this is the entry block of the function, it is
@@ -591,11 +591,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       // Non-memory instruction.
       LocalCache = MemDepResult::getUnknown();
   }
-  
+
   // Remember the result!
   if (Instruction *I = LocalCache.getInst())
     ReverseLocalDeps[I].insert(QueryInst);
-  
+
   return LocalCache;
 }
 
@@ -636,7 +636,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
   /// the uncached case, this starts out as the set of predecessors we care
   /// about.
   SmallVector<BasicBlock*, 32> DirtyBlocks;
-  
+
   if (!Cache.empty()) {
     // Okay, we have a cache entry.  If we know it is not dirty, just return it
     // with no computation.
@@ -644,17 +644,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       ++NumCacheNonLocal;
       return Cache;
     }
-    
+
     // If we already have a partially computed set of results, scan them to
     // determine what is dirty, seeding our initial DirtyBlocks worklist.
     for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
        I != E; ++I)
       if (I->getResult().isDirty())
         DirtyBlocks.push_back(I->getBB());
-    
+
     // Sort the cache so that we can do fast binary search lookups below.
     std::sort(Cache.begin(), Cache.end());
-    
+
     ++NumCacheDirtyNonLocal;
     //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
     //     << Cache.size() << " cached: " << *QueryInst;
@@ -665,45 +665,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       DirtyBlocks.push_back(*PI);
     ++NumUncacheNonLocal;
   }
-  
+
   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
   bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
 
   SmallPtrSet<BasicBlock*, 64> Visited;
-  
+
   unsigned NumSortedEntries = Cache.size();
   DEBUG(AssertSorted(Cache));
-  
+
   // Iterate while we still have blocks to update.
   while (!DirtyBlocks.empty()) {
     BasicBlock *DirtyBB = DirtyBlocks.back();
     DirtyBlocks.pop_back();
-    
+
     // Already processed this block?
     if (!Visited.insert(DirtyBB))
       continue;
-    
+
     // Do a binary search to see if we already have an entry for this block in
     // the cache set.  If so, find it.
     DEBUG(AssertSorted(Cache, NumSortedEntries));
-    NonLocalDepInfo::iterator Entry = 
+    NonLocalDepInfo::iterator Entry =
       std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
                        NonLocalDepEntry(DirtyBB));
     if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
       --Entry;
-    
+
     NonLocalDepEntry *ExistingResult = 0;
-    if (Entry != Cache.begin()+NumSortedEntries && 
+    if (Entry != Cache.begin()+NumSortedEntries &&
         Entry->getBB() == DirtyBB) {
       // If we already have an entry, and if it isn't already dirty, the block
       // is done.
       if (!Entry->getResult().isDirty())
         continue;
-      
+
       // Otherwise, remember this slot so we can update the value.
       ExistingResult = &*Entry;
     }
-    
+
     // If the dirty entry has a pointer, start scanning from it so we don't have
     // to rescan the entire block.
     BasicBlock::iterator ScanPos = DirtyBB->end();
@@ -715,10 +715,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
                              QueryCS.getInstruction());
       }
     }
-    
+
     // Find out if this block has a local dependency for QueryInst.
     MemDepResult Dep;
-    
+
     if (ScanPos != DirtyBB->begin()) {
       Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
@@ -728,14 +728,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     } else {
       Dep = MemDepResult::getNonFuncLocal();
     }
-    
+
     // If we had a dirty entry for the block, update it.  Otherwise, just add
     // a new entry.
     if (ExistingResult)
       ExistingResult->setResult(Dep);
     else
       Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
-    
+
     // If the block has a dependency (i.e. it isn't completely transparent to
     // the value), remember the association!
     if (!Dep.isNonLocal()) {
@@ -744,14 +744,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       if (Instruction *Inst = Dep.getInst())
         ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
     } else {
-    
+
       // If the block *is* completely transparent to the load, we need to check
       // the predecessors of this block.  Add them to our worklist.
       for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
         DirtyBlocks.push_back(*PI);
     }
   }
-  
+
   return Cache;
 }
 
@@ -769,9 +769,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
   assert(Loc.Ptr->getType()->isPointerTy() &&
          "Can't get pointer deps of a non-pointer!");
   Result.clear();
-  
+
   PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
-  
+
   // This is the set of blocks we've inspected, and the pointer we consider in
   // each block.  Because of critical edges, we currently bail out if querying
   // a block with multiple different pointers.  This can happen during PHI
@@ -794,7 +794,7 @@ MemDepResult MemoryDependenceAnalysis::
 GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
                         bool isLoad, BasicBlock *BB,
                         NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
-  
+
   // Do a binary search to see if we already have an entry for this block in
   // the cache set.  If so, find it.
   NonLocalDepInfo::iterator Entry =
@@ -802,18 +802,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
                      NonLocalDepEntry(BB));
   if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
     --Entry;
-  
+
   NonLocalDepEntry *ExistingResult = 0;
   if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
     ExistingResult = &*Entry;
-  
+
   // If we have a cached entry, and it is non-dirty, use it as the value for
   // this dependency.
   if (ExistingResult && !ExistingResult->getResult().isDirty()) {
     ++NumCacheNonLocalPtr;
     return ExistingResult->getResult();
-  }    
-  
+  }
+
   // Otherwise, we have to scan for the value.  If we have a dirty cache
   // entry, start scanning from its position, otherwise we scan from the end
   // of the block.
@@ -823,30 +823,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
            "Instruction invalidated?");
     ++NumCacheDirtyNonLocalPtr;
     ScanPos = ExistingResult->getResult().getInst();
-    
+
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
     ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
     RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
   } else {
     ++NumUncacheNonLocalPtr;
   }
-  
+
   // Scan the block for the dependency.
   MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
-  
+
   // If we had a dirty entry for the block, update it.  Otherwise, just add
   // a new entry.
   if (ExistingResult)
     ExistingResult->setResult(Dep);
   else
     Cache->push_back(NonLocalDepEntry(BB, Dep));
-  
+
   // If the block has a dependency (i.e. it isn't completely transparent to
   // the value), remember the reverse association because we just added it
   // to Cache!
   if (!Dep.isDef() && !Dep.isClobber())
     return Dep;
-  
+
   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
   // update MemDep when we remove instructions.
   Instruction *Inst = Dep.getInst();
@@ -859,7 +859,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
 /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
 /// number of elements in the array that are already properly ordered.  This is
 /// optimized for the case when only a few entries are added.
-static void 
+static void
 SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
                          unsigned NumSortedEntries) {
   switch (Cache.size() - NumSortedEntries) {
@@ -911,7 +911,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
                             SmallVectorImpl<NonLocalDepResult> &Result,
                             DenseMap<BasicBlock*, Value*> &Visited,
                             bool SkipFirstBlock) {
-  
+
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
 
@@ -925,7 +925,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
 
   // Get the NLPI for CacheKey, inserting one into the map if it doesn't
   // already have one.
-  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 
+  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
     NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
   NonLocalPointerInfo *CacheInfo = &Pair.first->second;
 
@@ -987,14 +987,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
         if (VI == Visited.end() || VI->second == Pointer.getAddr())
           continue;
-        
+
         // We have a pointer mismatch in a block.  Just return clobber, saying
         // that something was clobbered in this result.  We could also do a
         // non-fully cached query, but there is little point in doing this.
         return true;
       }
     }
-    
+
     Value *Addr = Pointer.getAddr();
     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
          I != E; ++I) {
@@ -1005,7 +1005,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     ++NumCacheCompleteNonLocalPtr;
     return false;
   }
-  
+
   // Otherwise, either this is a new block, a block with an invalid cache
   // pointer or one that we're about to invalidate by putting more info into it
   // than its valid cache info.  If empty, the result will be valid cache info,
@@ -1014,10 +1014,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
   else
     CacheInfo->Pair = BBSkipFirstBlockPair();
-  
+
   SmallVector<BasicBlock*, 32> Worklist;
   Worklist.push_back(StartBB);
-  
+
   // PredList used inside loop.
   SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
 
@@ -1028,10 +1028,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
   // revisit blocks after we insert info for them.
   unsigned NumSortedEntries = Cache->size();
   DEBUG(AssertSorted(*Cache));
-  
+
   while (!Worklist.empty()) {
     BasicBlock *BB = Worklist.pop_back_val();
-    
+
     // Skip the first block if we have it.
     if (!SkipFirstBlock) {
       // Analyze the dependency of *Pointer in FromBB.  See if we already have
@@ -1043,14 +1043,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       DEBUG(AssertSorted(*Cache, NumSortedEntries));
       MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
                                                  NumSortedEntries);
-      
+
       // If we got a Def or Clobber, add this to the list of results.
       if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
         Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
         continue;
       }
     }
-    
+
     // If 'Pointer' is an instruction defined in this block, then we need to do
     // phi translation to change it into a value live in the predecessor block.
     // If not, we just add the predecessors to the worklist and scan them with
@@ -1067,7 +1067,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
           NewBlocks.push_back(*PI);
           continue;
         }
-        
+
         // If we have seen this block before, but it was with a different
         // pointer then we have a phi translation failure and we have to treat
         // this as a clobber.
@@ -1082,12 +1082,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       Worklist.append(NewBlocks.begin(), NewBlocks.end());
       continue;
     }
-    
+
     // We do need to do phi translation, if we know ahead of time we can't phi
     // translate this value, don't even try.
     if (!Pointer.IsPotentiallyPHITranslatable())
       goto PredTranslationFailure;
-    
+
     // We may have added values to the cache list before this PHI translation.
     // If so, we haven't done anything to ensure that the cache remains sorted.
     // Sort it now (if needed) so that recursive invocations of
@@ -1110,7 +1110,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       PredPointer.PHITranslateValue(BB, Pred, 0);
 
       Value *PredPtrVal = PredPointer.getAddr();
-      
+
       // Check to see if we have already visited this pred block with another
       // pointer.  If so, we can't do this lookup.  This failure can occur
       // with PHI translation when a critical edge exists and the PHI node in
@@ -1127,14 +1127,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         // the analysis and can ignore it.
         if (InsertRes.first->second == PredPtrVal)
           continue;
-        
+
         // Otherwise, the block was previously analyzed with a different
         // pointer.  We can't represent the result of this case, so we just
         // treat this as a phi translation failure.
 
         // Make sure to clean up the Visited map before continuing on to
         // PredTranslationFailure.
-        for (unsigned i = 0; i < PredList.size(); i++)
+        for (unsigned i = 0, n = PredList.size(); i < n; ++i)
           Visited.erase(PredList[i].first);
 
         goto PredTranslationFailure;
@@ -1143,10 +1143,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
 
     // Actually process results here; this need to be a separate loop to avoid
     // calling getNonLocalPointerDepFromBB for blocks we don't want to return
-    // any results for.  (getNonLocalPointerDepFromBB will modify our 
+    // any results for.  (getNonLocalPointerDepFromBB will modify our
     // datastructures in ways the code after the PredTranslationFailure label
     // doesn't expect.)
-    for (unsigned i = 0; i < PredList.size(); i++) {
+    for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
       BasicBlock *Pred = PredList[i].first;
       PHITransAddr &PredPointer = PredList[i].second;
       Value *PredPtrVal = PredPointer.getAddr();
@@ -1186,12 +1186,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         continue;
       }
     }
-    
+
     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
     CacheInfo = &NonLocalPointerDeps[CacheKey];
     Cache = &CacheInfo->NonLocalDeps;
     NumSortedEntries = Cache->size();
-    
+
     // Since we did phi translation, the "Cache" set won't contain all of the
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
@@ -1204,20 +1204,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // The following code is "failure"; we can't produce a sane translation
     // for the given block.  It assumes that we haven't modified any of
     // our datastructures while processing the current block.
-    
+
     if (Cache == 0) {
       // Refresh the CacheInfo/Cache pointer if it got invalidated.
       CacheInfo = &NonLocalPointerDeps[CacheKey];
       Cache = &CacheInfo->NonLocalDeps;
       NumSortedEntries = Cache->size();
     }
-    
+
     // Since we failed phi translation, the "Cache" set won't contain all of the
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
     // results from the set".  Clear out the indicator for this.
     CacheInfo->Pair = BBSkipFirstBlockPair();
-    
+
     // If *nothing* works, mark the pointer as unknown.
     //
     // If this is the magic first block, return this as a clobber of the whole
@@ -1225,12 +1225,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // we have to bail out.
     if (SkipFirstBlock)
       return true;
-    
+
     for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
       assert(I != Cache->rend() && "Didn't find current block??");
       if (I->getBB() != BB)
         continue;
-      
+
       assert(I->getResult().isNonLocal() &&
              "Should only be here with transparent block");
       I->setResult(MemDepResult::getUnknown());
@@ -1250,23 +1250,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
 /// CachedNonLocalPointerInfo, remove it.
 void MemoryDependenceAnalysis::
 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
-  CachedNonLocalPointerInfo::iterator It = 
+  CachedNonLocalPointerInfo::iterator It =
     NonLocalPointerDeps.find(P);
   if (It == NonLocalPointerDeps.end()) return;
-  
+
   // Remove all of the entries in the BB->val map.  This involves removing
   // instructions from the reverse map.
   NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
-  
+
   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
     Instruction *Target = PInfo[i].getResult().getInst();
     if (Target == 0) continue;  // Ignore non-local dep results.
     assert(Target->getParent() == PInfo[i].getBB());
-    
+
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
     RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
   }
-  
+
   // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
   NonLocalPointerDeps.erase(It);
 }
@@ -1321,20 +1321,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
     // Remove this local dependency info.
     LocalDeps.erase(LocalDepEntry);
   }
-  
+
   // If we have any cached pointer dependencies on this instruction, remove
   // them.  If the instruction has non-pointer type, then it can't be a pointer
   // base.
-  
+
   // Remove it from both the load info and the store info.  The instruction
   // can't be in either of these maps if it is non-pointer.
   if (RemInst->getType()->isPointerTy()) {
     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
   }
-  
+
   // Loop over all of the things that depend on the instruction we're removing.
-  // 
+  //
   SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
 
   // If we find RemInst as a clobber or Def in any of the maps for other values,
@@ -1346,29 +1346,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
   MemDepResult NewDirtyVal;
   if (!RemInst->isTerminator())
     NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
-  
+
   ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
   if (ReverseDepIt != ReverseLocalDeps.end()) {
     SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
     // RemInst can't be the terminator if it has local stuff depending on it.
     assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
            "Nothing can locally depend on a terminator");
-    
+
     for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
          E = ReverseDeps.end(); I != E; ++I) {
       Instruction *InstDependingOnRemInst = *I;
       assert(InstDependingOnRemInst != RemInst &&
              "Already removed our local dep info");
-                        
+
       LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
-      
+
       // Make sure to remember that new things depend on NewDepInst.
       assert(NewDirtyVal.getInst() && "There is no way something else can have "
              "a local dep on this if it is a terminator!");
-      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), 
+      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
                                                 InstDependingOnRemInst));
     }
-    
+
     ReverseLocalDeps.erase(ReverseDepIt);
 
     // Add new reverse deps after scanning the set, to avoid invalidating the
@@ -1379,25 +1379,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       ReverseDepsToAdd.pop_back();
     }
   }
-  
+
   ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
   if (ReverseDepIt != ReverseNonLocalDeps.end()) {
     SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
     for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
          I != E; ++I) {
       assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
-      
+
       PerInstNLInfo &INLD = NonLocalDeps[*I];
       // The information is now dirty!
       INLD.second = true;
-      
-      for (NonLocalDepInfo::iterator DI = INLD.first.begin(), 
+
+      for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
            DE = INLD.first.end(); DI != DE; ++DI) {
         if (DI->getResult().getInst() != RemInst) continue;
-        
+
         // Convert to a dirty entry for the subsequent instruction.
         DI->setResult(NewDirtyVal);
-        
+
         if (Instruction *NextI = NewDirtyVal.getInst())
           ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
       }
@@ -1412,7 +1412,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       ReverseDepsToAdd.pop_back();
     }
   }
-  
+
   // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
   // value in the NonLocalPointerDeps info.
   ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
@@ -1420,45 +1420,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
   if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
     SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
     SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
-    
+
     for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
          E = Set.end(); I != E; ++I) {
       ValueIsLoadPair P = *I;
       assert(P.getPointer() != RemInst &&
              "Already removed NonLocalPointerDeps info for RemInst");
-      
+
       NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
-      
+
       // The cache is not valid for any specific block anymore.
       NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
-      
+
       // Update any entries for RemInst to use the instruction after it.
       for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
            DI != DE; ++DI) {
         if (DI->getResult().getInst() != RemInst) continue;
-        
+
         // Convert to a dirty entry for the subsequent instruction.
         DI->setResult(NewDirtyVal);
-        
+
         if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
           ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
       }
-      
+
       // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
       // subsequent value may invalidate the sortedness.
       std::sort(NLPDI.begin(), NLPDI.end());
     }
-    
+
     ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
-    
+
     while (!ReversePtrDepsToAdd.empty()) {
       ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
         .insert(ReversePtrDepsToAdd.back().second);
       ReversePtrDepsToAdd.pop_back();
     }
   }
-  
-  
+
+
   assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
   AA->deleteValue(RemInst);
   DEBUG(verifyRemoved(RemInst));
@@ -1472,7 +1472,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
     assert(I->second.getInst() != D &&
            "Inst occurs in data structures");
   }
-  
+
   for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
        E = NonLocalPointerDeps.end(); I != E; ++I) {
     assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
@@ -1481,7 +1481,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
          II != E; ++II)
       assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
   }
-  
+
   for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
        E = NonLocalDeps.end(); I != E; ++I) {
     assert(I->first != D && "Inst occurs in data structures");
@@ -1490,7 +1490,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
          EE = INLD.first.end(); II  != EE; ++II)
       assert(II->getResult().getInst() != D && "Inst occurs in data structures");
   }
-  
+
   for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
        E = ReverseLocalDeps.end(); I != E; ++I) {
     assert(I->first != D && "Inst occurs in data structures");
@@ -1498,7 +1498,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
          EE = I->second.end(); II != EE; ++II)
       assert(*II != D && "Inst occurs in data structures");
   }
-  
+
   for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
        E = ReverseNonLocalDeps.end();
        I != E; ++I) {
@@ -1507,17 +1507,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
          EE = I->second.end(); II != EE; ++II)
       assert(*II != D && "Inst occurs in data structures");
   }
-  
+
   for (ReverseNonLocalPtrDepTy::const_iterator
        I = ReverseNonLocalPtrDeps.begin(),
        E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
     assert(I->first != D && "Inst occurs in rev NLPD map");
-    
+
     for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
          E = I->second.end(); II != E; ++II)
       assert(*II != ValueIsLoadPair(D, false) &&
              *II != ValueIsLoadPair(D, true) &&
              "Inst occurs in ReverseNonLocalPtrDeps map");
   }
-  
+
 }
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
index 745d8c60bb5a..48d7d05d788f 100644
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -84,7 +84,7 @@ bool PathProfileVerifier::runOnModule (Module &M) {
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
     if (F->isDeclaration()) continue;
 
-    arrayMap[0][F->begin()][0] = i++;
+    arrayMap[(BasicBlock*)0][F->begin()][0] = i++;
 
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
       TerminatorInst *TI = BB->getTerminator();
@@ -125,7 +125,7 @@ bool PathProfileVerifier::runOnModule (Module &M) {
             << currentPath->getCount() << "\n");
       // setup the entry edge (normally path profiling doesn't care about this)
       if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
-        edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+        edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]]
           += currentPath->getCount();
 
       for( ProfilePathEdgeIterator nextEdge = pev->begin(),
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 2daa7d4f6b57..9626a48b9d0d 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -249,7 +249,7 @@ const BasicBlock *ProfileInfoT<Function,BasicBlock>::
 
     succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
     if (Succ == End) {
-      P[0] = BB;
+      P[(const BasicBlock*)0] = BB;
       if (Mode & GetPathToExit) {
         hasFoundPath = true;
         BB = 0;
@@ -752,10 +752,10 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
            Succ != End; ++Succ) {
         Path P;
         GetPath(*Succ, 0, P, GetPathToExit);
-        if (Dest && Dest != P[0]) {
+        if (Dest && Dest != P[(const BasicBlock*)0]) {
           AllEdgesHaveSameReturn = false;
         }
-        Dest = P[0];
+        Dest = P[(const BasicBlock*)0];
       }
       if (AllEdgesHaveSameReturn) {
         if(EstimateMissingEdges(BB)) {
@@ -927,7 +927,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
 
       Path P;
       const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
-      Dest = P[0];
+      Dest = P[(const BasicBlock*)0];
       if (!Dest) continue;
 
       if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index fad5074086ce..81ef6505e611 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -79,6 +79,38 @@ void Region::replaceExit(BasicBlock *BB) {
   exit = BB;
 }
 
+void Region::replaceEntryRecursive(BasicBlock *NewEntry) {
+  std::vector<Region *> RegionQueue;
+  BasicBlock *OldEntry = getEntry();
+
+  RegionQueue.push_back(this);
+  while (!RegionQueue.empty()) {
+    Region *R = RegionQueue.back();
+    RegionQueue.pop_back();
+
+    R->replaceEntry(NewEntry);
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      if ((*RI)->getEntry() == OldEntry)
+        RegionQueue.push_back(*RI);
+  }
+}
+
+void Region::replaceExitRecursive(BasicBlock *NewExit) {
+  std::vector<Region *> RegionQueue;
+  BasicBlock *OldExit = getExit();
+
+  RegionQueue.push_back(this);
+  while (!RegionQueue.empty()) {
+    Region *R = RegionQueue.back();
+    RegionQueue.pop_back();
+
+    R->replaceExit(NewExit);
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      if ((*RI)->getExit() == OldExit)
+        RegionQueue.push_back(*RI);
+  }
+}
+
 bool Region::contains(const BasicBlock *B) const {
   BasicBlock *BB = const_cast<BasicBlock*>(B);
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 07d83296bc5e..6ea915fdb0b7 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -4230,6 +4230,25 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
   return Max ? Max : SE->getCouldNotCompute();
 }
 
+bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
+                                                    ScalarEvolution *SE) const {
+  if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+    return true;
+
+  if (!ExitNotTaken.ExitingBlock)
+    return false;
+
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != 0; ENT = ENT->getNextExit()) {
+
+    if (ENT->ExactNotTaken != SE->getCouldNotCompute()
+        && SE->hasOperand(ENT->ExactNotTaken, S)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
 /// computable exit into a persistent ExitNotTakenInfo array.
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
@@ -6940,6 +6959,17 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
   BlockDispositions.erase(S);
   UnsignedRanges.erase(S);
   SignedRanges.erase(S);
+
+  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
+    BackedgeTakenInfo &BEInfo = I->second;
+    if (BEInfo.hasOperand(S, this)) {
+      BEInfo.clear();
+      BackedgeTakenCounts.erase(I++);
+    }
+    else
+      ++I;
+  }
 }
 
 typedef DenseMap<const Loop *, std::string> VerifyMap;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 976cd873213f..64f8e96884c7 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -150,8 +150,10 @@ unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
 }
 
 unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
-                                                     Type *Ty) const {
-  return PrevTTI->getArithmeticInstrCost(Opcode, Ty);
+                                                Type *Ty,
+                                                OperandValueKind Op1Info,
+                                                OperandValueKind Op2Info) const {
+  return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
 }
 
 unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
@@ -495,7 +497,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     return 1;
   }
 
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+                                  OperandValueKind) const {
     return 1;
   }
 
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 68e43b2cdb63..c14c593bf72e 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -71,6 +71,7 @@ using namespace llvm;
 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
 // more convenient.
 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
 
 namespace {
   /// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -109,6 +110,76 @@ namespace {
       return CI->getValue()[0];
     }
   };
+
+  /// This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAAStructTagNode {
+    /// This node should be created with createTBAAStructTagNode.
+    const MDNode *Node;
+
+  public:
+    TBAAStructTagNode() : Node(0) {}
+    explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+
+    /// Get the MDNode for this TBAAStructTagNode.
+    const MDNode *getNode() const { return Node; }
+
+    const MDNode *getBaseType() const {
+      return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+    }
+    const MDNode *getAccessType() const {
+      return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+    }
+    uint64_t getOffset() const {
+      return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+    }
+  };
+
+  /// This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAAStructTypeNode {
+    /// This node should be created with createTBAAStructTypeNode.
+    const MDNode *Node;
+
+  public:
+    TBAAStructTypeNode() : Node(0) {}
+    explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+    /// Get the MDNode for this TBAAStructTypeNode.
+    const MDNode *getNode() const { return Node; }
+
+    /// Get this TBAAStructTypeNode's field in the type DAG with
+    /// given offset. Update the offset to be relative to the field type.
+    TBAAStructTypeNode getParent(uint64_t &Offset) const {
+      if (Node->getNumOperands() < 2)
+        return TBAAStructTypeNode();
+
+      // Assume the offsets are in order. We return the previous field if
+      // the current offset is bigger than the given offset.
+      unsigned TheIdx = 0;
+      for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+        uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx))->getZExtValue();
+        if (Cur > Offset) {
+          assert(Idx >= 3 &&
+                 "TBAAStructTypeNode::getParent should have an offset match!");
+          TheIdx = Idx - 2;
+          break;
+        }
+      }
+      // Move along the last field.
+      if (TheIdx == 0)
+        TheIdx = Node->getNumOperands() - 2;
+      uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx))->
+                       getZExtValue();
+      Offset -= Cur;
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx + 1));
+      if (!P)
+        return TBAAStructTypeNode();
+      return TBAAStructTypeNode(P);
+    }
+  };
 }
 
 namespace {
@@ -137,6 +208,7 @@ namespace {
     }
 
     bool Aliases(const MDNode *A, const MDNode *B) const;
+    bool PathAliases(const MDNode *A, const MDNode *B) const;
 
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -171,6 +243,9 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 bool
 TypeBasedAliasAnalysis::Aliases(const MDNode *A,
                                 const MDNode *B) const {
+  if (EnableStructPathTBAA)
+    return PathAliases(A, B);
+
   // Keep track of the root node for A and B.
   TBAANode RootA, RootB;
 
@@ -209,6 +284,67 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A,
   return false;
 }
 
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool
+TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
+                                    const MDNode *B) const {
+  // Keep track of the root node for A and B.
+  TBAAStructTypeNode RootA, RootB;
+  TBAAStructTagNode TagA(A), TagB(B);
+
+  // TODO: We need to check if AccessType of TagA encloses AccessType of
+  // TagB to support aggregate AccessType. If yes, return true.
+
+  // Start from the base type of A, follow the edge with the correct offset in
+  // the type DAG and adjust the offset until we reach the base type of B or
+  // until we reach the Root node.
+  // Compare the adjusted offset once we have the same base.
+
+  // Climb the type DAG from base type of A to see if we reach base type of B.
+  const MDNode *BaseA = TagA.getBaseType();
+  const MDNode *BaseB = TagB.getBaseType();
+  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+  for (TBAAStructTypeNode T(BaseA); ; ) {
+    if (T.getNode() == BaseB)
+      // Base type of A encloses base type of B, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootA = T;
+    // Follow the edge with the correct offset, OffsetA will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetA);
+    if (!T.getNode())
+      break;
+  }
+
+  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+  // base type of A.
+  OffsetA = TagA.getOffset();
+  for (TBAAStructTypeNode T(BaseB); ; ) {
+    if (T.getNode() == BaseA)
+      // Base type of B encloses base type of A, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootB = T;
+    // Follow the edge with the correct offset, OffsetB will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetB);
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
 AliasAnalysis::AliasResult
 TypeBasedAliasAnalysis::alias(const Location &LocA,
                               const Location &LocB) {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 45b75df50875..45dcc5e37ecf 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -953,6 +953,8 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
 
   // Check for pointer simplifications.
   if (V->getType()->isPointerTy()) {
+    if (isKnownNonNull(V))
+      return true; 
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
       if (isGEPKnownNonNull(GEP, TD, Depth))
         return true;
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index f46383be7e46..451375e66660 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -593,6 +593,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(sanitize_memory);
   KEYWORD(uwtable);
   KEYWORD(zeroext);
+  KEYWORD(fixedstacksegment);
 
   KEYWORD(type);
   KEYWORD(opaque);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index c8da1f8bc661..8c62cf0d4e5b 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -878,8 +878,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
 
     // Target-independent attributes:
     case lltok::kw_align: {
-      // As a hack, we allow "align 2" on functions as a synonym for "alignstack
-      // 2".
+      // As a hack, we allow function alignment to be initially parsed as an
+      // attribute on a function declaration/definition or added to an attribute
+      // group and later moved to the alignment field.
       unsigned Alignment;
       if (inAttrGrp) {
         Lex.Lex();
@@ -930,6 +931,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_sanitize_thread:   B.addAttribute(Attribute::SanitizeThread); break;
     case lltok::kw_sanitize_memory:   B.addAttribute(Attribute::SanitizeMemory); break;
     case lltok::kw_uwtable:           B.addAttribute(Attribute::UWTable); break;
+    case lltok::kw_fixedstacksegment: B.addAttribute(Attribute::FixedStackSegment); break;
 
     // Error handling.
     case lltok::kw_inreg:
@@ -1159,17 +1161,18 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_sret:            B.addAttribute(Attribute::StructRet); break;
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
 
-    case lltok::kw_alignstack:      case lltok::kw_nounwind:
-    case lltok::kw_alwaysinline:    case lltok::kw_optsize:
-    case lltok::kw_inlinehint:      case lltok::kw_readnone:
-    case lltok::kw_minsize:         case lltok::kw_readonly:
-    case lltok::kw_naked:           case lltok::kw_returns_twice:
-    case lltok::kw_nobuiltin:       case lltok::kw_sanitize_address:
-    case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory:
-    case lltok::kw_noinline:        case lltok::kw_sanitize_thread:
-    case lltok::kw_nonlazybind:     case lltok::kw_ssp:
-    case lltok::kw_noredzone:       case lltok::kw_sspreq:
-    case lltok::kw_noreturn:        case lltok::kw_uwtable:
+    case lltok::kw_alignstack:       case lltok::kw_nounwind:
+    case lltok::kw_alwaysinline:     case lltok::kw_fixedstacksegment:
+    case lltok::kw_optsize:          case lltok::kw_inlinehint:
+    case lltok::kw_readnone:         case lltok::kw_minsize:
+    case lltok::kw_readonly:         case lltok::kw_naked:
+    case lltok::kw_returns_twice:    case lltok::kw_nobuiltin:
+    case lltok::kw_sanitize_address: case lltok::kw_noimplicitfloat:
+    case lltok::kw_sanitize_memory:  case lltok::kw_noinline:
+    case lltok::kw_sanitize_thread:  case lltok::kw_nonlazybind:
+    case lltok::kw_ssp:              case lltok::kw_noredzone:
+    case lltok::kw_sspreq:           case lltok::kw_noreturn:
+    case lltok::kw_uwtable:
       HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
     }
@@ -1195,24 +1198,38 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
 
     // Error handling.
-    case lltok::kw_sret:  case lltok::kw_nocapture:
-    case lltok::kw_byval: case lltok::kw_nest:
+    case lltok::kw_byval:
+    case lltok::kw_nest:
+    case lltok::kw_nocapture:
+    case lltok::kw_sret:
       HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
       break;
 
-    case lltok::kw_align:                 case lltok::kw_noreturn:
-    case lltok::kw_alignstack:            case lltok::kw_nounwind:
-    case lltok::kw_alwaysinline:          case lltok::kw_optsize:
-    case lltok::kw_inlinehint:            case lltok::kw_readnone:
-    case lltok::kw_minsize:               case lltok::kw_readonly:
-    case lltok::kw_naked:                 case lltok::kw_returns_twice:
-    case lltok::kw_nobuiltin:             case lltok::kw_sanitize_address:
-    case lltok::kw_noduplicate:           case lltok::kw_sanitize_memory:
-    case lltok::kw_noimplicitfloat:       case lltok::kw_sanitize_thread:
-    case lltok::kw_noinline:              case lltok::kw_ssp:
-    case lltok::kw_nonlazybind:           case lltok::kw_sspreq:
-    case lltok::kw_noredzone:             case lltok::kw_sspstrong:
-                                          case lltok::kw_uwtable:
+    case lltok::kw_align:
+    case lltok::kw_alignstack:
+    case lltok::kw_alwaysinline:
+    case lltok::kw_inlinehint:
+    case lltok::kw_minsize:
+    case lltok::kw_naked:
+    case lltok::kw_nobuiltin:
+    case lltok::kw_noduplicate:
+    case lltok::kw_noimplicitfloat:
+    case lltok::kw_noinline:
+    case lltok::kw_nonlazybind:
+    case lltok::kw_noredzone:
+    case lltok::kw_noreturn:
+    case lltok::kw_nounwind:
+    case lltok::kw_optsize:
+    case lltok::kw_readnone:
+    case lltok::kw_readonly:
+    case lltok::kw_returns_twice:
+    case lltok::kw_sanitize_address:
+    case lltok::kw_sanitize_memory:
+    case lltok::kw_sanitize_thread:
+    case lltok::kw_ssp:
+    case lltok::kw_sspreq:
+    case lltok::kw_sspstrong:
+    case lltok::kw_uwtable:
       HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
     }
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index cd25ba30008f..632a6d680a37 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -96,6 +96,7 @@ namespace lltok {
     kw_alwaysinline,
     kw_sanitize_address,
     kw_byval,
+    kw_fixedstacksegment,
     kw_inlinehint,
     kw_inreg,
     kw_minsize,
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index 942346b44e32..9dafe2a03670 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -292,7 +292,7 @@ void BitstreamCursor::ReadAbbrevRecord() {
         Abbv->Add(BitCodeAbbrevOp(0));
         continue;
       }
-      
+
       Abbv->Add(BitCodeAbbrevOp(E, Data));
     } else
       Abbv->Add(BitCodeAbbrevOp(E));
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 4f19dd00e639..8bac6da89285 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -60,7 +60,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
        I != E; ++I)
     EnumerateValue(I->getAliasee());
 
-  // Insert constants and metadata that are named at module level into the slot 
+  // Insert constants and metadata that are named at module level into the slot
   // pool so that the module symbol table can refer to them...
   EnumerateValueSymbolTable(M->getValueSymbolTable());
   EnumerateNamedMetadata(M);
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index d1ea02733836..76ebe9aca9a3 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -1,6 +1,7 @@
 # `Support' and `TableGen' libraries are added on the top-level CMakeLists.txt
 
 add_subdirectory(IR)
+add_subdirectory(IRReader)
 add_subdirectory(CodeGen)
 add_subdirectory(Bitcode)
 add_subdirectory(Transforms)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d4a745d985e8..2d13db2c9c90 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -135,7 +135,7 @@ const DataLayout &AsmPrinter::getDataLayout() const {
 
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
-  return OutStreamer.getCurrentSection();
+  return OutStreamer.getCurrentSection().first;
 }
 
 
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 58fe2ed9d357..8d15c069c6f8 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter
   DwarfCompileUnit.cpp
   DwarfDebug.cpp
   DwarfException.cpp
+  ErlangGCPrinter.cpp
   OcamlGCPrinter.cpp
   Win64Exception.cpp
   )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index bbb043258190..57e0acda890f 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -144,7 +144,7 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) {
     O << "Size: " << Size << "\n";
   }
 
-  const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+  const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData();
 
   IndentCount += 2;
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -324,7 +324,7 @@ void DIEEntry::print(raw_ostream &O) {
 ///
 unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
   if (!Size) {
-    const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+    const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
     for (unsigned i = 0, N = Values.size(); i < N; ++i)
       Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
   }
@@ -343,7 +343,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break;
   }
 
-  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
   for (unsigned i = 0, N = Values.size(); i < N; ++i)
     Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 9907b01ceb6a..c332aa2a7db6 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -66,7 +66,7 @@ namespace llvm {
 
     /// Data - Raw data bytes for abbreviation.
     ///
-    SmallVector<DIEAbbrevData, 8> Data;
+    SmallVector<DIEAbbrevData, 12> Data;
 
   public:
     DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
@@ -75,7 +75,7 @@ namespace llvm {
     uint16_t getTag() const { return Tag; }
     unsigned getNumber() const { return Number; }
     uint16_t getChildrenFlag() const { return ChildrenFlag; }
-    const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+    const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
     void setTag(uint16_t T) { Tag = T; }
     void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
     void setNumber(unsigned N) { Number = N; }
@@ -108,7 +108,7 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   /// DIE - A structured debug information entry.  Has an abbreviation which
-  /// describes it's organization.
+  /// describes its organization.
   class DIEValue;
 
   class DIE {
@@ -133,7 +133,7 @@ namespace llvm {
 
     /// Attribute values.
     ///
-    SmallVector<DIEValue*, 32> Values;
+    SmallVector<DIEValue*, 12> Values;
 
     // Private data for print()
     mutable unsigned IndentCount;
@@ -150,7 +150,7 @@ namespace llvm {
     unsigned getOffset() const { return Offset; }
     unsigned getSize() const { return Size; }
     const std::vector<DIE *> &getChildren() const { return Children; }
-    const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
+    const SmallVectorImpl<DIEValue*> &getValues() const { return Values; }
     DIE *getParent() const { return Parent; }
     /// Climb up the parent chain to get the compile unit DIE this DIE belongs
     /// to.
@@ -235,9 +235,10 @@ namespace llvm {
     ///
     static unsigned BestForm(bool IsSigned, uint64_t Int) {
       if (IsSigned) {
-        if ((char)Int == (signed)Int)   return dwarf::DW_FORM_data1;
-        if ((short)Int == (signed)Int)  return dwarf::DW_FORM_data2;
-        if ((int)Int == (signed)Int)    return dwarf::DW_FORM_data4;
+        const int64_t SignedInt = Int;
+        if ((char)Int == SignedInt)     return dwarf::DW_FORM_data1;
+        if ((short)Int == SignedInt)    return dwarf::DW_FORM_data2;
+        if ((int)Int == SignedInt)      return dwarf::DW_FORM_data4;
       } else {
         if ((unsigned char)Int == Int)  return dwarf::DW_FORM_data1;
         if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 1c743c2414e9..f9b6f9472141 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1366,7 +1366,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     }
   } else if (const ConstantInt *CI =
              dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
-    // AT_const_value was added when the static memeber was created. To avoid
+    // AT_const_value was added when the static member was created. To avoid
     // emitting AT_const_value multiple times, we only add AT_const_value when
     // it is not a static member.
     if (!IsStaticMember)
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index b169602b0ecb..762c475c8e54 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -55,7 +55,7 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
      cl::init(false));
 
 static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames",
-     cl::Hidden, cl::ZeroOrMore, cl::init(false),
+     cl::Hidden, cl::init(false),
      cl::desc("Generate DWARF pubnames section"));
 
 namespace {
@@ -170,6 +170,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
+  DwarfAddrSectionSym = 0;
   DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
 
@@ -643,7 +644,7 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
 
   // We look up the CUID/file/dir by concatenating them with a zero byte.
   SmallString<128> NamePair;
-  NamePair += CUID;
+  NamePair += utostr(CUID);
   NamePair += '\0';
   NamePair += DirName;
   NamePair += '\0'; // Zero bytes are not allowed in paths.
@@ -681,9 +682,12 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
   NewCU->addString(Die, dwarf::DW_AT_name, FN);
+
   // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
-  // into an entity. We're using 0 (or a NULL label) for this.
-  NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+  // into an entity. We're using 0 (or a NULL label) for this. For
+  // split dwarf it's in the skeleton CU so omit it here.
+  if (!useSplitDwarf())
+    NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
 
   // Define start line table label for each Compile Unit.
   MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
@@ -692,20 +696,25 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
                                                      NewCU->getUniqueID());
 
   // DW_AT_stmt_list is a offset of line number information for this
-  // compile unit in debug_line section.
+  // compile unit in debug_line section. For split dwarf this is
+  // left in the skeleton CU and so not included.
   // The line table entries are not always emitted in assembly, so it
   // is not okay to use line_table_start here.
-  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                    NewCU->getUniqueID() == 0 ?
-                    Asm->GetTempSymbol("section_line") : LineTableStartSym);
-  else if (NewCU->getUniqueID() == 0)
-    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
-  else
-    NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                    LineTableStartSym, DwarfLineSectionSym);
+  if (!useSplitDwarf()) {
+    if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+      NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                      NewCU->getUniqueID() == 0 ?
+                      Asm->GetTempSymbol("section_line") : LineTableStartSym);
+    else if (NewCU->getUniqueID() == 0)
+      NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+    else
+      NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                      LineTableStartSym, DwarfLineSectionSym);
+  }
 
-  if (!CompilationDir.empty())
+  // If we're using split dwarf the compilation dir is going to be in the
+  // skeleton CU and so we don't need to duplicate it here.
+  if (!useSplitDwarf() && !CompilationDir.empty())
     NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
   if (DIUnit.isOptimized())
     NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
@@ -721,13 +730,6 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   if (!FirstCU)
     FirstCU = NewCU;
 
-  if (useSplitDwarf()) {
-    // This should be a unique identifier when we want to build .dwp files.
-    NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
-    // Now construct the skeleton CU associated.
-    constructSkeletonCU(N);
-  }
-
   InfoHolder.addUnit(NewCU);
 
   CUMap.insert(std::make_pair(N, NewCU));
@@ -794,6 +796,14 @@ void DwarfDebug::beginModule() {
     DIArray RetainedTypes = CUNode.getRetainedTypes();
     for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
       CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+    // If we're splitting the dwarf out now that we've got the entire
+    // CU then construct a skeleton CU based upon it.
+    if (useSplitDwarf()) {
+    // This should be a unique identifier when we want to build .dwp files.
+      CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+      // Now construct the skeleton CU associated.
+      constructSkeletonCU(CUNode);
+    }
   }
 
   // Tell MMI that we have debug info.
@@ -1671,8 +1681,8 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
   // Start the size with the size of abbreviation code.
   Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
 
-  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
-  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+  const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
 
   // Size the DIE attribute values.
   for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -1699,7 +1709,7 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
 void DwarfUnits::computeSizeAndOffsets() {
   // Offset from the beginning of debug info section.
   unsigned AccuOffset = 0;
-  for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+  for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
          E = CUs.end(); I != E; ++I) {
     (*I)->setDebugInfoOffset(AccuOffset);
     unsigned Offset =
@@ -1739,9 +1749,12 @@ void DwarfDebug::emitSectionLabels() {
   emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym =
     emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
-  if (useSplitDwarf())
+  if (useSplitDwarf()) {
     DwarfStrDWOSectionSym =
       emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+    DwarfAddrSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
+  }
   DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
                                              "debug_range");
 
@@ -1766,8 +1779,8 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
                                 dwarf::TagString(Abbrev->getTag()));
   Asm->EmitULEB128(AbbrevNumber);
 
-  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
-  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+  const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
 
   // Emit the DIE attribute values.
   for (unsigned i = 0, N = Values.size(); i < N; ++i) {
@@ -1855,7 +1868,7 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
                            const MCSection *ASection,
                            const MCSymbol *ASectionSym) {
   Asm->OutStreamer.SwitchSection(USection);
-  for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+  for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
          E = CUs.end(); I != E; ++I) {
     CompileUnit *TheCU = *I;
     DIE *Die = TheCU->getCUDie();
@@ -1891,7 +1904,7 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
 unsigned DwarfUnits::getCUOffset(DIE *Die) {
   assert(Die->getTag() == dwarf::DW_TAG_compile_unit  &&
          "Input DIE should be compile unit in getCUOffset.");
-  for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+  for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
        E = CUs.end(); I != E; ++I) {
     CompileUnit *TheCU = *I;
     if (TheCU->getCUDie() == Die)
@@ -2284,7 +2297,7 @@ void DwarfDebug::emitDebugLoc() {
   if (DotDebugLocEntries.empty())
     return;
 
-  for (SmallVector<DotDebugLocEntry, 4>::iterator
+  for (SmallVectorImpl<DotDebugLocEntry>::iterator
          I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
        I != E; ++I) {
     DotDebugLocEntry &Entry = *I;
@@ -2298,7 +2311,7 @@ void DwarfDebug::emitDebugLoc() {
   unsigned char Size = Asm->getDataLayout().getPointerSize();
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
   unsigned index = 1;
-  for (SmallVector<DotDebugLocEntry, 4>::iterator
+  for (SmallVectorImpl<DotDebugLocEntry>::iterator
          I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
        I != E; ++I, ++index) {
     DotDebugLocEntry &Entry = *I;
@@ -2391,7 +2404,7 @@ void DwarfDebug::emitDebugRanges() {
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfRangesSection());
   unsigned char Size = Asm->getDataLayout().getPointerSize();
-  for (SmallVector<const MCSymbol *, 8>::iterator
+  for (SmallVectorImpl<const MCSymbol *>::iterator
          I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
        I != E; ++I) {
     if (*I)
@@ -2449,13 +2462,13 @@ void DwarfDebug::emitDebugInlineInfo() {
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
-  for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+  for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
 
     const MDNode *Node = *I;
     DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
       = InlineInfo.find(Node);
-    SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+    SmallVectorImpl<InlineInfoLabels> &Labels = II->second;
     DISubprogram SP(Node);
     StringRef LName = SP.getLinkageName();
     StringRef Name = SP.getName();
@@ -2474,7 +2487,7 @@ void DwarfDebug::emitDebugInlineInfo() {
                            DwarfStrSectionSym);
     Asm->EmitULEB128(Labels.size(), "Inline count");
 
-    for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+    for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(),
            LE = Labels.end(); LI != LE; ++LI) {
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
       Asm->EmitInt32(LI->second->getOffset());
@@ -2509,9 +2522,13 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
   // This should be a unique identifier when we want to build .dwp files.
   NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
 
-  // FIXME: The addr base should be relative for each compile unit, however,
-  // this one is going to be 0 anyhow.
-  NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
+  // Relocate to the beginning of the addr_base section, else 0 for the beginning
+  // of the one for this compile unit.
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+    NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset,
+                    DwarfAddrSectionSym);
+  else
+    NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
 
   // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
   // into an entity. We're using 0, or a NULL label for this.
@@ -2519,6 +2536,7 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
 
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section.
+  // FIXME: Should handle multiple compile units.
   if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
     NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
                     DwarfLineSectionSym);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 81e345e6281d..9a38256d8e14 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -392,7 +392,7 @@ class DwarfDebug {
   // section offsets and are created by EmitSectionLabels.
   MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
   MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
-  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
+  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
   MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
 
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
new file mode 100644
index 000000000000..a8fb66dcf17b
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -0,0 +1,120 @@
+//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the compiler plugin that is used in order to emit
+// garbage collection information in a convenient layout for parsing and
+// loading in the Erlang/OTP runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+  class ErlangGCPrinter : public GCMetadataPrinter {
+  public:
+    void beginAssembly(AsmPrinter &AP);
+    void finishAssembly(AsmPrinter &AP);
+  };
+
+}
+
+static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGCPrinter() { }
+
+void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
+
+void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
+  MCStreamer &OS = AP.OutStreamer;
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+
+  // Put this in a custom .note section.
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
+    .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
+                   SectionKind::getDataRel()));
+
+  // For each function...
+  for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+    GCFunctionInfo &MD = **FI;
+
+    /** A compact GC layout. Emit this data structure:
+     *
+     * struct {
+     *   int16_t PointCount;
+     *   void *SafePointAddress[PointCount];
+     *   int16_t StackFrameSize; (in words)
+     *   int16_t StackArity;
+     *   int16_t LiveCount;
+     *   int16_t LiveOffsets[LiveCount];
+     * } __gcmap_<FUNCTIONNAME>;
+     **/
+
+    // Align to address width.
+    AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+    // Emit PointCount.
+    OS.AddComment("safe point count");
+    AP.EmitInt16(MD.size());
+
+    // And each safe point...
+    for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
+         ++PI) {
+      // Emit the address of the safe point.
+      OS.AddComment("safe point address");
+      MCSymbol *Label = PI->Label;
+      AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+    }
+
+    // Stack information never change in safe points! Only print info from the
+    // first call-site.
+    GCFunctionInfo::iterator PI = MD.begin();
+
+    // Emit the stack frame size.
+    OS.AddComment("stack frame size (in words)");
+    AP.EmitInt16(MD.getFrameSize() / IntPtrSize);
+
+    // Emit stack arity, i.e. the number of stacked arguments.
+    unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+    unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
+                          MD.getFunction().arg_size() - RegisteredArgs : 0;
+    OS.AddComment("stack arity");
+    AP.EmitInt16(StackArity);
+
+    // Emit the number of live roots in the function.
+    OS.AddComment("live root count");
+    AP.EmitInt16(MD.live_size(PI));
+
+    // And for each live root...
+    for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+                                       LE = MD.live_end(PI);
+                                       LI != LE; ++LI) {
+      // Emit live root's offset within the stack frame.
+      OS.AddComment("stack index (offset / wordsize)");
+      AP.EmitInt16(LI->StackOffset / IntPtrSize);
+    }
+  }
+}
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 4cd1b80dc0b8..4a99184f5eec 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -85,7 +85,9 @@ class BasicTTI : public ImmutablePass, public TargetTransformInfo {
   virtual unsigned getNumberOfRegisters(bool Vector) const;
   virtual unsigned getMaximumUnrollFactor() const;
   virtual unsigned getRegisterBitWidth(bool Vector) const;
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind,
+                                          OperandValueKind) const;
   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
                                   int Index, Type *SubTp) const;
   virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -193,27 +195,34 @@ unsigned BasicTTI::getMaximumUnrollFactor() const {
   return 1;
 }
 
-unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind,
+                                          OperandValueKind) const {
   // Check if any of the operands are vector operands.
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
+  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
+  // Assume that floating point arithmetic operations cost twice as much as
+  // integer operations.
+  unsigned OpCost = (IsFloat ? 2 : 1);
+
   if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
     // The operation is legal. Assume it costs 1.
-    // If the type is split to multiple registers, assume that thre is some
+    // If the type is split to multiple registers, assume that there is some
     // overhead to this.
     // TODO: Once we have extract/insert subvector cost we need to use them.
     if (LT.first > 1)
-      return LT.first * 2;
-    return LT.first * 1;
+      return LT.first * 2 * OpCost;
+    return LT.first * 1 * OpCost;
   }
 
   if (!TLI->isOperationExpand(ISD, LT.second)) {
     // If the operation is custom lowered then assume
     // thare the code is twice as expensive.
-    return LT.first * 2;
+    return LT.first * 2 * OpCost;
   }
 
   // Else, assume that we need to scalarize this op.
@@ -226,7 +235,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
   }
 
   // We don't know anything about this scalar instruction.
-  return 1;
+  return OpCost;
 }
 
 unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index ddc7adab4983..56aa3309d3dd 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -7,13 +7,13 @@ add_llvm_library(LLVMCodeGen
   CalcSpillWeights.cpp
   CallingConvLower.cpp
   CodeGen.cpp
-  CodePlacementOpt.cpp
   CriticalAntiDepBreaker.cpp
   DFAPacketizer.cpp
   DeadMachineInstructionElim.cpp
   DwarfEHPrepare.cpp
   EarlyIfConversion.cpp
   EdgeBundles.cpp
+  ErlangGC.cpp
   ExecutionDepsFix.cpp
   ExpandISelPseudos.cpp
   ExpandPostRAPseudos.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index dee339a45863..38ae17d23156 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -117,7 +117,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
   float totalWeight = 0;
   SmallPtrSet<MachineInstr*, 8> visited;
 
-  // Find the best physreg hist and the best virtreg hint.
+  // Find the best physreg hint and the best virtreg hint.
   float bestPhys = 0, bestVirt = 0;
   unsigned hintPhys = 0, hintVirt = 0;
 
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index a33b67204499..35ec68d00cec 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -22,7 +22,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeBasicTTIPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeCalculateSpillWeightsPass(Registry);
-  initializeCodePlacementOptPass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
   initializeEarlyIfConverterPass(Registry);
   initializeExpandPostRAPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
deleted file mode 100644
index 24518443a768..000000000000
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ /dev/null
@@ -1,423 +0,0 @@
-//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the pass that optimizes code placement and aligns loop
-// headers to target-specific alignment boundaries.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "code-placement"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-STATISTIC(NumLoopsAligned,  "Number of loops aligned");
-STATISTIC(NumIntraElim,     "Number of intra loop branches eliminated");
-STATISTIC(NumIntraMoved,    "Number of intra loop branches moved");
-
-namespace {
-  class CodePlacementOpt : public MachineFunctionPass {
-    const MachineLoopInfo *MLI;
-    const TargetInstrInfo *TII;
-    const TargetLowering  *TLI;
-
-  public:
-    static char ID;
-    CodePlacementOpt() : MachineFunctionPass(ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<MachineLoopInfo>();
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-  private:
-    bool HasFallthrough(MachineBasicBlock *MBB);
-    bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
-    void Splice(MachineFunction &MF,
-                MachineFunction::iterator InsertPt,
-                MachineFunction::iterator Begin,
-                MachineFunction::iterator End);
-    bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
-                                          MachineLoop *L);
-    bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
-                                     MachineLoop *L);
-    bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
-    bool OptimizeIntraLoopEdges(MachineFunction &MF);
-    bool AlignLoops(MachineFunction &MF);
-    bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
-  };
-
-  char CodePlacementOpt::ID = 0;
-} // end anonymous namespace
-
-char &llvm::CodePlacementOptID = CodePlacementOpt::ID;
-INITIALIZE_PASS(CodePlacementOpt, "code-placement",
-                "Code Placement Optimizer", false, false)
-
-/// HasFallthrough - Test whether the given branch has a fallthrough, either as
-/// a plain fallthrough or as a fallthrough case of a conditional branch.
-///
-bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
-    return false;
-  // This conditional branch has no fallthrough.
-  if (FBB)
-    return false;
-  // An unconditional branch has no fallthrough.
-  if (Cond.empty() && TBB)
-    return false;
-  // It has a fallthrough.
-  return true;
-}
-
-/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
-/// This is called before major changes are begun to test whether it will be
-/// possible to complete the changes.
-///
-/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
-/// whenever possible.
-///
-bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
-  // Conservatively ignore EH landing pads.
-  if (MBB->isLandingPad()) return false;
-
-  // Aggressively handle return blocks and similar constructs.
-  if (MBB->succ_empty()) return true;
-
-  // Ask the target's AnalyzeBranch if it can handle this block.
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  // Make sure the terminator is understood.
-  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
-    return false;
-   // Ignore blocks which look like they might have EH-related control flow.
-   // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
-   // recognize the possibility of a control transfer through an unwind.
-   // Such blocks contain EH_LABEL instructions, however they may be in the
-   // middle of the block. Instead of searching for them, just check to see
-   // if the CFG disagrees with AnalyzeBranch.
-  if (1u + !Cond.empty() != MBB->succ_size())
-    return false;
-  // Make sure we have the option of reversing the condition.
-  if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
-    return false;
-  return true;
-}
-
-/// Splice - Move the sequence of instructions [Begin,End) to just before
-/// InsertPt. Update branch instructions as needed to account for broken
-/// fallthrough edges and to take advantage of newly exposed fallthrough
-/// opportunities.
-///
-void CodePlacementOpt::Splice(MachineFunction &MF,
-                              MachineFunction::iterator InsertPt,
-                              MachineFunction::iterator Begin,
-                              MachineFunction::iterator End) {
-  assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
-         "Splice can't change the entry block!");
-  MachineFunction::iterator OldBeginPrior = prior(Begin);
-  MachineFunction::iterator OldEndPrior = prior(End);
-
-  MF.splice(InsertPt, Begin, End);
-
-  prior(Begin)->updateTerminator();
-  OldBeginPrior->updateTerminator();
-  OldEndPrior->updateTerminator();
-}
-
-/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
-/// to the loop top to the top of the loop so that they have a fall through.
-/// This can introduce a branch on entry to the loop, but it can eliminate a
-/// branch within the loop. See the @simple case in
-/// test/CodeGen/X86/loop_blocks.ll for an example of this.
-bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
-                                                        MachineLoop *L) {
-  bool Changed = false;
-  MachineBasicBlock *TopMBB = L->getTopBlock();
-
-  bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
-
-  if (TopMBB == MF.begin() ||
-      HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
-  new_top:
-    for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
-         PE = TopMBB->pred_end(); PI != PE; ++PI) {
-      MachineBasicBlock *Pred = *PI;
-      if (Pred == TopMBB) continue;
-      if (HasFallthrough(Pred)) continue;
-      if (!L->contains(Pred)) continue;
-
-      // Verify that we can analyze all the loop entry edges before beginning
-      // any changes which will require us to be able to analyze them.
-      if (Pred == MF.begin())
-        continue;
-      if (!HasAnalyzableTerminator(Pred))
-        continue;
-      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
-        continue;
-
-      // Move the block.
-      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
-                   << " to top of loop.\n");
-      Changed = true;
-
-      // Move it and all the blocks that can reach it via fallthrough edges
-      // exclusively, to keep existing fallthrough edges intact.
-      MachineFunction::iterator Begin = Pred;
-      MachineFunction::iterator End = llvm::next(Begin);
-      while (Begin != MF.begin()) {
-        MachineFunction::iterator Prior = prior(Begin);
-        if (Prior == MF.begin())
-          break;
-        // Stop when a non-fallthrough edge is found.
-        if (!HasFallthrough(Prior))
-          break;
-        // Stop if a block which could fall-through out of the loop is found.
-        if (Prior->isSuccessor(End))
-          break;
-        // If we've reached the top, stop scanning.
-        if (Prior == MachineFunction::iterator(TopMBB)) {
-          // We know top currently has a fall through (because we just checked
-          // it) which would be lost if we do the transformation, so it isn't
-          // worthwhile to do the transformation unless it would expose a new
-          // fallthrough edge.
-          if (!Prior->isSuccessor(End))
-            goto next_pred;
-          // Otherwise we can stop scanning and proceed to move the blocks.
-          break;
-        }
-        // If we hit a switch or something complicated, don't move anything
-        // for this predecessor.
-        if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
-          break;
-        // Ok, the block prior to Begin will be moved along with the rest.
-        // Extend the range to include it.
-        Begin = Prior;
-        ++NumIntraMoved;
-      }
-
-      // Move the blocks.
-      Splice(MF, TopMBB, Begin, End);
-
-      // Update TopMBB.
-      TopMBB = L->getTopBlock();
-
-      // We have a new loop top. Iterate on it. We shouldn't have to do this
-      // too many times if BranchFolding has done a reasonable job.
-      goto new_top;
-    next_pred:;
-    }
-  }
-
-  // If the loop previously didn't exit with a fall-through and it now does,
-  // we eliminated a branch.
-  if (Changed &&
-      !BotHasFallthrough &&
-      HasFallthrough(L->getBottomBlock())) {
-    ++NumIntraElim;
-  }
-
-  return Changed;
-}
-
-/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
-/// portion of the loop contiguous with the header. This usually makes the loop
-/// contiguous, provided that AnalyzeBranch can handle all the relevant
-/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
-/// for an example of this.
-bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
-                                                   MachineLoop *L) {
-  bool Changed = false;
-  MachineBasicBlock *TopMBB = L->getTopBlock();
-  MachineBasicBlock *BotMBB = L->getBottomBlock();
-
-  // Determine a position to move orphaned loop blocks to. If TopMBB is not
-  // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
-  // to the top of the loop to avoid losing that fallthrough. Otherwise append
-  // them to the bottom, even if it previously had a fallthrough, on the theory
-  // that it's worth an extra branch to keep the loop contiguous.
-  MachineFunction::iterator InsertPt =
-    llvm::next(MachineFunction::iterator(BotMBB));
-  bool InsertAtTop = false;
-  if (TopMBB != MF.begin() &&
-      !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
-      HasFallthrough(BotMBB)) {
-    InsertPt = TopMBB;
-    InsertAtTop = true;
-  }
-
-  // Keep a record of which blocks are in the portion of the loop contiguous
-  // with the loop header.
-  SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
-  for (MachineFunction::iterator I = TopMBB,
-       E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
-    ContiguousBlocks.insert(I);
-
-  // Find non-contigous blocks and fix them.
-  if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
-    for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
-         BI != BE; ++BI) {
-      MachineBasicBlock *BB = *BI;
-
-      // Verify that we can analyze all the loop entry edges before beginning
-      // any changes which will require us to be able to analyze them.
-      if (!HasAnalyzableTerminator(BB))
-        continue;
-      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
-        continue;
-
-      // If the layout predecessor is part of the loop, this block will be
-      // processed along with it. This keeps them in their relative order.
-      if (BB != MF.begin() &&
-          L->contains(prior(MachineFunction::iterator(BB))))
-        continue;
-
-      // Check to see if this block is already contiguous with the main
-      // portion of the loop.
-      if (!ContiguousBlocks.insert(BB))
-        continue;
-
-      // Move the block.
-      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
-                   << " to be contiguous with loop.\n");
-      Changed = true;
-
-      // Process this block and all loop blocks contiguous with it, to keep
-      // them in their relative order.
-      MachineFunction::iterator Begin = BB;
-      MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
-      for (; End != MF.end(); ++End) {
-        if (!L->contains(End)) break;
-        if (!HasAnalyzableTerminator(End)) break;
-        ContiguousBlocks.insert(End);
-        ++NumIntraMoved;
-      }
-
-      // If we're inserting at the bottom of the loop, and the code we're
-      // moving originally had fall-through successors, bring the sucessors
-      // up with the loop blocks to preserve the fall-through edges.
-      if (!InsertAtTop)
-        for (; End != MF.end(); ++End) {
-          if (L->contains(End)) break;
-          if (!HasAnalyzableTerminator(End)) break;
-          if (!HasFallthrough(prior(End))) break;
-        }
-
-      // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
-      // we don't need them anymore at this point.
-      Splice(MF, InsertPt, Begin, End);
-    }
-
-  return Changed;
-}
-
-/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
-/// intra-loop branching and to form contiguous loops.
-///
-/// This code takes the approach of making minor changes to the existing
-/// layout to fix specific loop-oriented problems. Also, it depends on
-/// AnalyzeBranch, which can't understand complex control instructions.
-///
-bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
-                                                        MachineLoop *L) {
-  bool Changed = false;
-
-  // Do optimization for nested loops.
-  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
-
-  // Do optimization for this loop.
-  Changed |= EliminateUnconditionalJumpsToTop(MF, L);
-  Changed |= MoveDiscontiguousLoopBlocks(MF, L);
-
-  return Changed;
-}
-
-/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
-/// intra-loop branching and to form contiguous loops.
-///
-bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
-  bool Changed = false;
-
-  if (!TLI->shouldOptimizeCodePlacement())
-    return Changed;
-
-  // Do optimization for each loop in the function.
-  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
-       I != E; ++I)
-    if (!(*I)->getParentLoop())
-      Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
-
-  return Changed;
-}
-
-/// AlignLoops - Align loop headers to target preferred alignments.
-///
-bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                      Attribute::OptimizeForSize))
-    return false;
-
-  unsigned Align = TLI->getPrefLoopAlignment();
-  if (!Align)
-    return false;  // Don't care about loop alignment.
-
-  bool Changed = false;
-
-  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
-       I != E; ++I)
-    Changed |= AlignLoop(MF, *I, Align);
-
-  return Changed;
-}
-
-/// AlignLoop - Align loop headers to target preferred alignments.
-///
-bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
-                                 unsigned Align) {
-  bool Changed = false;
-
-  // Do alignment for nested loops.
-  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    Changed |= AlignLoop(MF, *I, Align);
-
-  L->getTopBlock()->setAlignment(Align);
-  Changed = true;
-  ++NumLoopsAligned;
-
-  return Changed;
-}
-
-bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
-  MLI = &getAnalysis<MachineLoopInfo>();
-  if (MLI->empty())
-    return false;  // No loops.
-
-  TLI = MF.getTarget().getTargetLowering();
-  TII = MF.getTarget().getInstrInfo();
-
-  bool Changed = OptimizeIntraLoopEdges(MF);
-
-  Changed |= AlignLoops(MF);
-
-  return Changed;
-}
diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp
new file mode 100644
index 000000000000..8a1e2d9c99a8
--- /dev/null
+++ b/lib/CodeGen/ErlangGC.cpp
@@ -0,0 +1,81 @@
+//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Erlang/OTP runtime-compatible garbage collector
+// (e.g. defines safe points, root initialization etc.)
+//
+// The frametable emitter is in ErlangGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+  class ErlangGC : public GCStrategy {
+    MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          DebugLoc DL) const;
+  public:
+    ErlangGC();
+    bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF);
+  };
+
+}
+
+static GCRegistry::Add<ErlangGC>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGC() { }
+
+ErlangGC::ErlangGC() {
+  InitRoots = false;
+  NeededSafePoints = 1 << GC::PostCall;
+  UsesMetadata = true;
+  CustomRoots = false;
+  CustomSafePoints = true;
+}
+
+MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI,
+                                DebugLoc DL) const {
+  const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+  return Label;
+}
+
+bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
+  for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+       ++BBI)
+    for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+         MI != ME; ++MI)
+
+      if (MI->getDesc().isCall()) {
+
+        // Do not treat tail call sites as safe points.
+        if (MI->getDesc().isTerminator())
+          continue;
+
+        /* Code copied from VisitCallPoint(...) */
+        MachineBasicBlock::iterator RAI = MI; ++RAI;
+        MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
+        FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
+      }
+
+  return false;
+}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 9958d7daada8..eaff93fd7fc5 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1054,6 +1054,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+
+    // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+    // explicitly remove CvtBBI as a successor.
+    BBI.BB->removeSuccessor(CvtBBI->BB);
   } else {
     PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
 
@@ -1146,6 +1150,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+
+    // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+    // explicitly remove CvtBBI as a successor.
+    BBI.BB->removeSuccessor(CvtBBI->BB);
   } else {
     // Predicate the 'true' block after removing its branch.
     CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 3b28e6afb670..7793e96c3540 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
 /// OrigIdx are also available with the same value at UseIdx.
 bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
                                        SlotIndex OrigIdx,
-                                       SlotIndex UseIdx) {
+                                       SlotIndex UseIdx) const {
   OrigIdx = OrigIdx.getRegSlot(true);
   UseIdx = UseIdx.getRegSlot(true);
   for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 3b09c6b779e8..bfba503b351e 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -52,6 +53,11 @@ STATISTIC(CondBranchTakenFreq,
 STATISTIC(UncondBranchTakenFreq,
           "Potential frequency of taking unconditional branches");
 
+static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
+                                       cl::desc("Force the alignment of all "
+                                                "blocks in the function."),
+                                       cl::init(0), cl::Hidden);
+
 namespace {
 class BlockChain;
 /// \brief Type for our function-wide basic block -> block chain mapping.
@@ -1061,7 +1067,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     }
 
     // Align this block if the layout predecessor's edge into this block is
-    // cold relative to the block. When this is true, othe predecessors make up
+    // cold relative to the block. When this is true, other predecessors make up
     // all of the hot entries into the block and thus alignment is likely to be
     // important.
     BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
@@ -1088,6 +1094,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   BlockToChain.clear();
   ChainAllocator.DestroyAll();
 
+  if (AlignAllBlock)
+    // Align all of the blocks in the function to a specific alignment.
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+         FI != FE; ++FI)
+      FI->setAlignment(AlignAllBlock);
+
   // We always return true as we have no way to track whether the final order
   // differs from the original order.
   return true;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 1af00e84a6ed..68372f6c9065 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -15,6 +15,8 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/raw_os_ostream.h"
+
 using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
@@ -106,13 +108,59 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
 /// clearVirtRegs - Remove all virtual registers (after physreg assignment).
 void MachineRegisterInfo::clearVirtRegs() {
 #ifndef NDEBUG
-  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
-    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
-           "Vreg use list non-empty still?");
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (!VRegInfo[Reg].second)
+      continue;
+    verifyUseList(Reg);
+    llvm_unreachable("Remaining virtual register operands");
+  }
 #endif
   VRegInfo.clear();
 }
 
+void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
+#ifndef NDEBUG
+  bool Valid = true;
+  for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
+    MachineOperand *MO = &I.getOperand();
+    MachineInstr *MI = MO->getParent();
+    if (!MI) {
+      errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+             << " has no parent instruction.\n";
+      Valid = false;
+    }
+    MachineOperand *MO0 = &MI->getOperand(0);
+    unsigned NumOps = MI->getNumOperands();
+    if (!(MO >= MO0 && MO < MO0+NumOps)) {
+      errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+             << " doesn't belong to parent MI: " << *MI;
+      Valid = false;
+    }
+    if (!MO->isReg()) {
+      errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO
+             << " is not a register\n";
+      Valid = false;
+    }
+    if (MO->getReg() != Reg) {
+      errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": "
+             << *MO << " is the wrong register\n";
+      Valid = false;
+    }
+  }
+  assert(Valid && "Invalid use list");
+#endif
+}
+
+void MachineRegisterInfo::verifyUseLists() const {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    verifyUseList(TargetRegisterInfo::index2VirtReg(i));
+  for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i)
+    verifyUseList(i);
+#endif
+}
+
 /// Add MO to the linked list of operands for its register.
 void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
   assert(!MO->isOnRegUseList() && "Already on list");
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 103b058c13a9..0acd9801141e 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -305,7 +305,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ReadyQueue::dump() {
-  dbgs() << Name << ": ";
+  dbgs() << "  " << Name << ": ";
   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
     dbgs() << Queue[i]->NodeNum << " ";
   dbgs() << "\n";
@@ -404,6 +404,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
   }
 }
 
+/// This is normally called from the main scheduler loop but may also be invoked
+/// by the scheduling strategy to perform additional code motion.
 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
                                     MachineBasicBlock::iterator InsertPos) {
   // Advance RegionBegin if the first instruction moves down.
@@ -916,7 +918,7 @@ class ConvergingScheduler : public MachineSchedStrategy {
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
   enum CandReason {
-    NoCand, SingleExcess, SingleCritical, Cluster,
+    NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster,
     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
     TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
     NodeOrder};
@@ -1191,8 +1193,10 @@ class ConvergingScheduler : public MachineSchedStrategy {
                          const RegPressureTracker &RPTracker,
                          SchedCandidate &Candidate);
 
+  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
+
 #ifndef NDEBUG
-  void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
+  void traceCandidate(const SchedCandidate &Cand);
 #endif
 };
 } // namespace
@@ -1243,8 +1247,6 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
   Top.init(DAG, SchedModel, &Rem);
   Bot.init(DAG, SchedModel, &Rem);
 
-  DAG->computeDFSResult();
-
   // Initialize resource counts.
 
   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
@@ -1341,6 +1343,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
   for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
        I != E; ++I) {
     unsigned L = getUnscheduledLatency(*I);
+    DEBUG(dbgs() << "  " << Available.getName()
+          << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
     if (L > RemLatency)
       RemLatency = L;
   }
@@ -1351,10 +1355,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
       RemLatency = L;
   }
   unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+  DEBUG(dbgs() << "  " << Available.getName()
+        << " ExpectedLatency " << ExpectedLatency
+        << " CP Limit " << CriticalPathLimit << '\n');
   if (RemLatency + ExpectedLatency >= CriticalPathLimit
       && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
     Policy.ReduceLatency = true;
-    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
+    DEBUG(dbgs() << "  Increase ILP: " << Available.getName() << '\n');
   }
 }
 
@@ -1403,8 +1410,8 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
   CheckPending = true;
   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
 
-  DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
-        << CurrCycle << '\n');
+  DEBUG(dbgs() << "  " << Available.getName()
+        << " Cycle: " << CurrCycle << '\n');
 }
 
 /// Add the given processor resource to this scheduled zone.
@@ -1571,7 +1578,8 @@ void ConvergingScheduler::balanceZones(
   if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
       > (int)SchedModel->getLatencyFactor()) {
     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
-    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+    DEBUG(dbgs() << "  Balance " << CriticalZone.Available.getName()
+          << " reduce "
           << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
           << '\n');
   }
@@ -1582,7 +1590,8 @@ void ConvergingScheduler::balanceZones(
   if ((int)(OppositeZone.ExpectedCount - OppositeCount)
       > (int)SchedModel->getLatencyFactor()) {
     OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
-    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+    DEBUG(dbgs() << "  Balance " << OppositeZone.Available.getName()
+          << " demand "
           << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
           << '\n');
   }
@@ -1606,7 +1615,7 @@ void ConvergingScheduler::checkResourceLimits(
     if (Top.CritResIdx != Rem.CritResIdx) {
       TopCand.Policy.ReduceResIdx = Top.CritResIdx;
       BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
-      DEBUG(dbgs() << "Reduce scheduled "
+      DEBUG(dbgs() << "  Reduce scheduled "
             << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
     }
     return;
@@ -1623,7 +1632,7 @@ void ConvergingScheduler::checkResourceLimits(
         && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
       TopCand.Policy.ReduceLatency = true;
       BotCand.Policy.ReduceLatency = true;
-      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+      DEBUG(dbgs() << "  Reduce scheduled latency " << Top.ExpectedLatency
             << " + " << Bot.ExpectedLatency << '\n');
     }
     return;
@@ -1662,7 +1671,7 @@ initResourceDelta(const ScheduleDAGMI *DAG,
 }
 
 /// Return true if this heuristic determines order.
-static bool tryLess(unsigned TryVal, unsigned CandVal,
+static bool tryLess(int TryVal, int CandVal,
                     ConvergingScheduler::SchedCandidate &TryCand,
                     ConvergingScheduler::SchedCandidate &Cand,
                     ConvergingScheduler::CandReason Reason) {
@@ -1678,7 +1687,7 @@ static bool tryLess(unsigned TryVal, unsigned CandVal,
   return false;
 }
 
-static bool tryGreater(unsigned TryVal, unsigned CandVal,
+static bool tryGreater(int TryVal, int CandVal,
                        ConvergingScheduler::SchedCandidate &TryCand,
                        ConvergingScheduler::SchedCandidate &Cand,
                        ConvergingScheduler::CandReason Reason) {
@@ -1698,6 +1707,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
 }
 
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+  const MachineInstr *MI = SU->getInstr();
+  if (!MI->isCopy())
+    return 0;
+
+  unsigned ScheduledOper = isTop ? 1 : 0;
+  unsigned UnscheduledOper = isTop ? 0 : 1;
+  // If we have already scheduled the physreg produce/consumer, immediately
+  // schedule the copy.
+  if (TargetRegisterInfo::isPhysicalRegister(
+        MI->getOperand(ScheduledOper).getReg()))
+    return 1;
+  // If the physreg is at the boundary, defer it. Otherwise schedule it
+  // immediately to free the dependent. We can hoist the copy later.
+  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+  if (TargetRegisterInfo::isPhysicalRegister(
+        MI->getOperand(UnscheduledOper).getReg()))
+    return AtBoundary ? -1 : 1;
+  return 0;
+}
+
 /// Apply a set of heursitics to a new candidate. Heuristics are currently
 /// hierarchical. This may be more efficient than a graduated cost model because
 /// we don't need to evaluate all aspects of the model for each node in the
@@ -1725,6 +1762,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
     TryCand.Reason = NodeOrder;
     return;
   }
+
+  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
+                 biasPhysRegCopy(Cand.SU, Zone.isTop()),
+                 TryCand, Cand, PhysRegCopy))
+    return;
+
   // Avoid exceeding the target's limit.
   if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
               Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
@@ -1827,20 +1870,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS,
 
   // Avoid increasing the max critical pressure in the scheduled region.
   if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
-    DEBUG(dbgs() << "RP excess top - bot: "
+    DEBUG(dbgs() << "  RP excess top - bot: "
           << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
   }
   // Avoid increasing the max critical pressure in the scheduled region.
   if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
-    DEBUG(dbgs() << "RP critical top - bot: "
+    DEBUG(dbgs() << "  RP critical top - bot: "
           << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
           << '\n');
     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
   }
   // Avoid increasing the max pressure of the entire region.
   if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
-    DEBUG(dbgs() << "RP current top - bot: "
+    DEBUG(dbgs() << "  RP current top - bot: "
           << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
           << '\n');
     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
@@ -1853,6 +1896,7 @@ const char *ConvergingScheduler::getReasonStr(
   ConvergingScheduler::CandReason Reason) {
   switch (Reason) {
   case NoCand:         return "NOCAND    ";
+  case PhysRegCopy:    return "PREG-COPY";
   case SingleExcess:   return "REG-EXCESS";
   case SingleCritical: return "REG-CRIT  ";
   case Cluster:        return "CLUSTER   ";
@@ -1870,9 +1914,7 @@ const char *ConvergingScheduler::getReasonStr(
   llvm_unreachable("Unknown reason!");
 }
 
-void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
-                                         const SchedBoundary &Zone) {
-  const char *Label = getReasonStr(Cand.Reason);
+void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
   PressureElement P;
   unsigned ResIdx = 0;
   unsigned Latency = 0;
@@ -1907,21 +1949,21 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
     Latency = Cand.SU->getDepth();
     break;
   }
-  dbgs() << Label << " " << Zone.Available.getName() << " ";
+  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
   if (P.isValid())
-    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
-           << " ";
+    dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
+           << ":" << P.UnitIncrease << " ";
   else
-    dbgs() << "     ";
+    dbgs() << "      ";
   if (ResIdx)
-    dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
+    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
   else
-    dbgs() << "        ";
+    dbgs() << "         ";
   if (Latency)
-    dbgs() << Latency << " cycles ";
+    dbgs() << " " << Latency << " cycles ";
   else
-    dbgs() << "         ";
-  Cand.SU->dump(DAG);
+    dbgs() << "          ";
+  dbgs() << '\n';
 }
 #endif
 
@@ -1950,15 +1992,14 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
       if (TryCand.ResDelta == SchedResourceDelta())
         TryCand.initResourceDelta(DAG, SchedModel);
       Cand.setBest(TryCand);
-      DEBUG(traceCandidate(Cand, Zone));
+      DEBUG(traceCandidate(Cand));
     }
   }
 }
 
 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
                       bool IsTop) {
-  DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
-        << " SU(" << Cand.SU->NodeNum << ") "
+  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
 }
 
@@ -1968,10 +2009,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // efficient, but also provides the best heuristics for CriticalPSets.
   if (SUnit *SU = Bot.pickOnlyChoice()) {
     IsTopNode = false;
+    DEBUG(dbgs() << "Pick Top NOCAND\n");
     return SU;
   }
   if (SUnit *SU = Top.pickOnlyChoice()) {
     IsTopNode = true;
+    DEBUG(dbgs() << "Pick Bot NOCAND\n");
     return SU;
   }
   CandPolicy NoPolicy;
@@ -2069,24 +2112,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
   if (SU->isBottomReady())
     Bot.removeReady(SU);
 
-  DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
-        << " Scheduling Instruction in cycle "
-        << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
-        SU->dump(DAG));
+  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
   return SU;
 }
 
+void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+
+  MachineBasicBlock::iterator InsertPos = SU->getInstr();
+  if (!isTop)
+    ++InsertPos;
+  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
+
+  // Find already scheduled copies with a single physreg dependence and move
+  // them just above the scheduled instruction.
+  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
+       I != E; ++I) {
+    if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+      continue;
+    SUnit *DepSU = I->getSUnit();
+    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
+      continue;
+    MachineInstr *Copy = DepSU->getInstr();
+    if (!Copy->isCopy())
+      continue;
+    DEBUG(dbgs() << "  Rescheduling physreg copy ";
+          I->getSUnit()->dump(DAG));
+    DAG->moveInstruction(Copy, InsertPos);
+  }
+}
+
 /// Update the scheduler's state after scheduling a node. This is the same node
 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
 /// it's state based on the current cycle before MachineSchedStrategy does.
+///
+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
+/// them here. See comments in biasPhysRegCopy.
 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
     SU->TopReadyCycle = Top.CurrCycle;
     Top.bumpNode(SU);
+    if (SU->hasPhysRegUses)
+      reschedulePhysRegCopies(SU, true);
   }
   else {
     SU->BotReadyCycle = Bot.CurrCycle;
     Bot.bumpNode(SU);
+    if (SU->hasPhysRegDefs)
+      reschedulePhysRegCopies(SU, false);
   }
 }
 
@@ -2182,16 +2254,16 @@ class ILPScheduler : public MachineSchedStrategy {
   /// Callback to select the highest priority node from the ready Q.
   virtual SUnit *pickNode(bool &IsTopNode) {
     if (ReadyQ.empty()) return NULL;
-    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
     SUnit *SU = ReadyQ.back();
     ReadyQ.pop_back();
     IsTopNode = false;
-    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
-          << *SU->getInstr()
+    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
           << " ILP: " << DAG->getDFSResult()->getILP(SU)
           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
           << DAG->getDFSResult()->getSubtreeLevel(
-            DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
+            DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
+          << "Scheduling " << *SU->getInstr());
     return SU;
   }
 
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 5bf0176eadc9..49d8c4e9470d 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
     MF->getTarget().getSubtarget<TargetSubtargetInfo>();
   SchedModel.init(*ST.getSchedModel(), &ST, TII);
   BlockInfo.resize(MF->getNumBlockIDs());
+  ProcResourceCycles.resize(MF->getNumBlockIDs() *
+                            SchedModel.getNumProcResourceKinds());
   return false;
 }
 
@@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
     return FBI;
 
   // Compute resource usage in the block.
-  // FIXME: Compute per-functional unit counts.
   FBI->HasCalls = false;
   unsigned InstrCount = 0;
+
+  // Add up per-processor resource cycles as well.
+  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+  SmallVector<unsigned, 32> PRCycles(PRKinds);
+
   for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
        I != E; ++I) {
     const MachineInstr *MI = I;
@@ -96,11 +103,43 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
     ++InstrCount;
     if (MI->isCall())
       FBI->HasCalls = true;
+
+    // Count processor resources used.
+    if (!SchedModel.hasInstrSchedModel())
+      continue;
+    const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
+    if (!SC->isValid())
+      continue;
+
+    for (TargetSchedModel::ProcResIter
+         PI = SchedModel.getWriteProcResBegin(SC),
+         PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+      assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+      PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+    }
   }
   FBI->InstrCount = InstrCount;
+
+  // Scale the resource cycles so they are comparable.
+  unsigned PROffset = MBB->getNumber() * PRKinds;
+  for (unsigned K = 0; K != PRKinds; ++K)
+    ProcResourceCycles[PROffset + K] =
+      PRCycles[K] * SchedModel.getResourceFactor(K);
+
   return FBI;
 }
 
+ArrayRef<unsigned>
+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+  assert(BlockInfo[MBBNum].hasResources() &&
+         "getResources() must be called before getProcResourceCycles()");
+  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+  assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
+  return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds,
+                            PRKinds);
+}
+
+
 //===----------------------------------------------------------------------===//
 //                         Ensemble utility functions
 //===----------------------------------------------------------------------===//
@@ -108,6 +147,9 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
   : MTM(*ct) {
   BlockInfo.resize(MTM.BlockInfo.size());
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
+  ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
 }
 
 // Virtual destructor serves as an anchor.
@@ -123,21 +165,32 @@ MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
 void MachineTraceMetrics::Ensemble::
 computeDepthResources(const MachineBasicBlock *MBB) {
   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  unsigned PROffset = MBB->getNumber() * PRKinds;
 
   // Compute resources from trace above. The top block is simple.
   if (!TBI->Pred) {
     TBI->InstrDepth = 0;
     TBI->Head = MBB->getNumber();
+    std::fill(ProcResourceDepths.begin() + PROffset,
+              ProcResourceDepths.begin() + PROffset + PRKinds, 0);
     return;
   }
 
   // Compute from the block above. A post-order traversal ensures the
   // predecessor is always computed first.
-  TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+  unsigned PredNum = TBI->Pred->getNumber();
+  TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
   assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
   const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
   TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
   TBI->Head = PredTBI->Head;
+
+  // Compute per-resource depths.
+  ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
+  ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+  for (unsigned K = 0; K != PRKinds; ++K)
+    ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
 }
 
 // Update resource-related information in the TraceBlockInfo for MBB.
@@ -145,22 +198,33 @@ computeDepthResources(const MachineBasicBlock *MBB) {
 void MachineTraceMetrics::Ensemble::
 computeHeightResources(const MachineBasicBlock *MBB) {
   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  unsigned PROffset = MBB->getNumber() * PRKinds;
 
   // Compute resources for the current block.
   TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+  ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
 
   // The trace tail is done.
   if (!TBI->Succ) {
     TBI->Tail = MBB->getNumber();
+    std::copy(PRCycles.begin(), PRCycles.end(),
+              ProcResourceHeights.begin() + PROffset);
     return;
   }
 
   // Compute from the block below. A post-order traversal ensures the
   // predecessor is always computed first.
-  TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+  unsigned SuccNum = TBI->Succ->getNumber();
+  TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
   assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
   TBI->InstrHeight += SuccTBI->InstrHeight;
   TBI->Tail = SuccTBI->Tail;
+
+  // Compute per-resource heights.
+  ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
+  for (unsigned K = 0; K != PRKinds; ++K)
+    ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
 }
 
 // Check if depth resources for MBB are valid and return the TBI.
@@ -181,6 +245,35 @@ getHeightResources(const MachineBasicBlock *MBB) const {
   return TBI->hasValidHeight() ? TBI : 0;
 }
 
+/// Get an array of processor resource depths for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by all blocks preceding MBB in its trace. It does not include instructions
+/// in MBB.
+///
+/// Compare TraceBlockInfo::InstrDepth.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceDepths(unsigned MBBNum) const {
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
+  return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds,
+                            PRKinds);
+}
+
+/// Get an array of processor resource heights for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by this block and all blocks following it in its trace.
+///
+/// Compare TraceBlockInfo::InstrHeight.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceHeights(unsigned MBBNum) const {
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
+  return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds,
+                            PRKinds);
+}
+
 //===----------------------------------------------------------------------===//
 //                         Trace Selection Strategies
 //===----------------------------------------------------------------------===//
@@ -713,11 +806,24 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
   SmallVector<DataDep, 8> Deps;
   while (!Stack.empty()) {
     MBB = Stack.pop_back_val();
-    DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+    DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
     TBI.HasValidInstrDepths = true;
     TBI.CriticalPath = 0;
 
+    // Print out resource depths here as well.
+    DEBUG({
+      dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
+      ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
+      for (unsigned K = 0; K != PRDepths.size(); ++K)
+        if (PRDepths[K]) {
+          unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+          dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
+                 << MTM.SchedModel.getProcResource(K)->Name << " ("
+                 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
+        }
+    });
+
     // Also compute the critical path length through MBB when possible.
     if (TBI.HasValidInstrHeights)
       TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
@@ -928,6 +1034,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
     TBI.HasValidInstrHeights = true;
     TBI.CriticalPath = 0;
 
+    DEBUG({
+      dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
+      ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
+      for (unsigned K = 0; K != PRHeights.size(); ++K)
+        if (PRHeights[K]) {
+          unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+          dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
+                 << MTM.SchedModel.getProcResource(K)->Name << " ("
+                 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
+        }
+    });
+
     // Get dependencies from PHIs in the trace successor.
     const MachineBasicBlock *Succ = TBI.Succ;
     // If MBB is the last block in the trace, and it has a back-edge to the
@@ -1058,27 +1176,52 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
 }
 
 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
-  // For now, we compute the resource depth from instruction count / issue
-  // width. Eventually, we should compute resource depth per functional unit
-  // and return the max.
+  // Find the limiting processor resource.
+  // Numbers have been pre-scaled to be comparable.
+  unsigned PRMax = 0;
+  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+  if (Bottom) {
+    ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+    for (unsigned K = 0; K != PRDepths.size(); ++K)
+      PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
+  } else {
+    for (unsigned K = 0; K != PRDepths.size(); ++K)
+      PRMax = std::max(PRMax, PRDepths[K]);
+  }
+  // Convert to cycle count.
+  PRMax = TE.MTM.getCycles(PRMax);
+
   unsigned Instrs = TBI.InstrDepth;
   if (Bottom)
     Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
     Instrs /= IW;
   // Assume issue width 1 without a schedule model.
-  return Instrs;
+  return std::max(Instrs, PRMax);
 }
 
 unsigned MachineTraceMetrics::Trace::
 getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+  // Add up resources above and below the center block.
+  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+  ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
+  unsigned PRMax = 0;
+  for (unsigned K = 0; K != PRDepths.size(); ++K) {
+    unsigned PRCycles = PRDepths[K] + PRHeights[K];
+    for (unsigned I = 0; I != Extrablocks.size(); ++I)
+      PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+    PRMax = std::max(PRMax, PRCycles);
+  }
+  // Convert to cycle count.
+  PRMax = TE.MTM.getCycles(PRMax);
+
   unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
   for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
     Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
     Instrs /= IW;
   // Assume issue width 1 without a schedule model.
-  return Instrs;
+  return std::max(Instrs, PRMax);
 }
 
 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 4b1230029a74..037043f6419c 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -472,6 +472,9 @@ void MachineVerifier::visitMachineFunctionBefore() {
     if (MInfo.Succs.size() != I->succ_size())
       report("MBB has duplicate entries in its successor list.", I);
   }
+
+  // Check that the register use lists are sane.
+  MRI->verifyUseLists();
 }
 
 // Does iterator point to a and b as the first two elements?
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 6e1cad32523b..bfbc0623f9cc 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -39,12 +39,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
 static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
     cl::desc("Disable pre-register allocation tail duplication"));
 static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
-    cl::Hidden, cl::desc("Disable the probability-driven block placement, and "
-                         "re-enable the old code placement pass"));
+    cl::Hidden, cl::desc("Disable probability-driven block placement"));
 static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
     cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
-static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
-    cl::desc("Disable code placement"));
 static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
     cl::desc("Disable Stack Slot Coloring"));
 static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
@@ -96,9 +93,10 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
 /// simple binary flags that either suppress the pass or do nothing.
 /// i.e. -disable-mypass=false has no effect.
 /// These should be converted to boolOrDefault in order to use applyOverride.
-static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
+static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
+                                       bool Override) {
   if (Override)
-    return 0;
+    return IdentifyingPassPtr();
   return PassID;
 }
 
@@ -106,19 +104,20 @@ static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
 /// flags with ternary conditions. TargetID is passed through by default. The
 /// pass is suppressed when the option is false. When the option is true, the
 /// StandardID is selected if the target provides no default.
-static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
-                                AnalysisID StandardID) {
+static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
+                                        cl::boolOrDefault Override,
+                                        AnalysisID StandardID) {
   switch (Override) {
   case cl::BOU_UNSET:
     return TargetID;
   case cl::BOU_TRUE:
-    if (TargetID)
+    if (TargetID.isValid())
       return TargetID;
     if (StandardID == 0)
       report_fatal_error("Target cannot enable pass");
     return StandardID;
   case cl::BOU_FALSE:
-    return 0;
+    return IdentifyingPassPtr();
   }
   llvm_unreachable("Invalid command line option state");
 }
@@ -135,7 +134,8 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
 /// StandardID may be a pseudo ID. In that case TargetID is the name of the real
 /// pass to run. This allows multiple options to control a single pass depending
 /// on where in the pipeline that pass is added.
-static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+static IdentifyingPassPtr overridePass(AnalysisID StandardID,
+                                       IdentifyingPassPtr TargetID) {
   if (StandardID == &PostRASchedulerID)
     return applyDisable(TargetID, DisablePostRA);
 
@@ -149,10 +149,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
     return applyDisable(TargetID, DisableEarlyTailDup);
 
   if (StandardID == &MachineBlockPlacementID)
-    return applyDisable(TargetID, DisableCodePlace);
-
-  if (StandardID == &CodePlacementOptID)
-    return applyDisable(TargetID, DisableCodePlace);
+    return applyDisable(TargetID, DisableBlockPlacement);
 
   if (StandardID == &StackSlotColoringID)
     return applyDisable(TargetID, DisableSSC);
@@ -206,11 +203,11 @@ class PassConfigImpl {
   // user interface. For example, a target may disable a standard pass by
   // default by substituting a pass ID of zero, and the user may still enable
   // that standard pass with an explicit command line option.
-  DenseMap<AnalysisID,AnalysisID> TargetPasses;
+  DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses;
 
   /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
   /// is inserted after each instance of the first one.
-  SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
+  SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses;
 };
 } // namespace llvm
 
@@ -245,9 +242,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
 
 /// Insert InsertedPassID pass after TargetPassID.
 void TargetPassConfig::insertPass(AnalysisID TargetPassID,
-                                  AnalysisID InsertedPassID) {
-  assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
-  std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+                                  IdentifyingPassPtr InsertedPassID) {
+  assert(((!InsertedPassID.isInstance() &&
+           TargetPassID != InsertedPassID.getID()) ||
+          (InsertedPassID.isInstance() &&
+           TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
+         "Insert a pass after itself!");
+  std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID);
   Impl->InsertedPasses.push_back(P);
 }
 
@@ -271,12 +272,12 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) {
 }
 
 void TargetPassConfig::substitutePass(AnalysisID StandardID,
-                                      AnalysisID TargetID) {
+                                      IdentifyingPassPtr TargetID) {
   Impl->TargetPasses[StandardID] = TargetID;
 }
 
-AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
-  DenseMap<AnalysisID, AnalysisID>::const_iterator
+IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+  DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator
     I = Impl->TargetPasses.find(ID);
   if (I == Impl->TargetPasses.end())
     return ID;
@@ -309,24 +310,39 @@ void TargetPassConfig::addPass(Pass *P) {
 
 /// Add a CodeGen pass at this point in the pipeline after checking for target
 /// and command line overrides.
+///
+/// addPass cannot return a pointer to the pass instance because is internal the
+/// PassManager and the instance we create here may already be freed.
 AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
-  AnalysisID TargetID = getPassSubstitution(PassID);
-  AnalysisID FinalID = overridePass(PassID, TargetID);
-  if (FinalID == 0)
-    return FinalID;
-
-  Pass *P = Pass::createPass(FinalID);
-  if (!P)
-    llvm_unreachable("Pass ID not registered");
-  addPass(P);
+  IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
+  IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
+  if (!FinalPtr.isValid())
+    return 0;
+
+  Pass *P;
+  if (FinalPtr.isInstance())
+    P = FinalPtr.getInstance();
+  else {
+    P = Pass::createPass(FinalPtr.getID());
+    if (!P)
+      llvm_unreachable("Pass ID not registered");
+  }
+  AnalysisID FinalID = P->getPassID();
+  addPass(P); // Ends the lifetime of P.
+
   // Add the passes after the pass P if there is any.
-  for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+  for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator
          I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
        I != E; ++I) {
     if ((*I).first == PassID) {
-      assert((*I).second && "Illegal Pass ID!");
-      Pass *NP = Pass::createPass((*I).second);
-      assert(NP && "Pass ID not registered");
+      assert((*I).second.isValid() && "Illegal Pass ID!");
+      Pass *NP;
+      if ((*I).second.isInstance())
+        NP = (*I).second.getInstance();
+      else {
+        NP = Pass::createPass((*I).second.getID());
+        assert(NP && "Pass ID not registered");
+      }
       addPass(NP);
     }
   }
@@ -693,14 +709,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
   addPass(&VirtRegRewriterID);
   printAndVerify("After Virtual Register Rewriter");
 
-  // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
-  // but eventually, all users of it should probably be moved to addPostRA and
-  // it can go away.  Currently, it's the intended place for targets to run
-  // FinalizeMachineBundles, because passes other than MachineScheduling an
-  // RegAlloc itself may not be aware of bundles.
-  if (addFinalizeRegAlloc())
-    printAndVerify("After RegAlloc finalization");
-
   // Perform stack slot coloring and post-ra machine LICM.
   //
   // FIXME: Re-enable coloring with register when it's capable of adding
@@ -742,16 +750,7 @@ bool TargetPassConfig::addGCPasses() {
 
 /// Add standard basic block placement passes.
 void TargetPassConfig::addBlockPlacement() {
-  AnalysisID PassID = 0;
-  if (!DisableBlockPlacement) {
-    // MachineBlockPlacement is a new pass which subsumes the functionality of
-    // CodPlacementOpt. The old code placement pass can be restored by
-    // disabling block placement, but eventually it will be removed.
-    PassID = addPass(&MachineBlockPlacementID);
-  } else {
-    PassID = addPass(&CodePlacementOptID);
-  }
-  if (PassID) {
+  if (addPass(&MachineBlockPlacementID)) {
     // Run a separate pass to collect block placement statistics.
     if (EnableBlockPlacementStats)
       addPass(&MachineBlockPlacementStatsID);
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 855a8c5f926e..959dd7df58ee 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -55,7 +55,6 @@ INITIALIZE_PASS_END(PEI, "prologepilog",
                     "Prologue/Epilogue Insertion & Frame Finalization",
                     false, false)
 
-STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
 STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
 STATISTIC(NumBytesStackSpace,
           "Number of bytes used for stack in all functions");
@@ -548,9 +547,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
   if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
       !RegInfo->needsStackRealignment(Fn)) {
-    int SFI = RS->getScavengingFrameIndex();
-    if (SFI >= 0)
-      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+    SmallVector<int, 2> SFIs;
+    RS->getScavengingFrameIndices(SFIs);
+    for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+         IE = SFIs.end(); I != IE; ++I)
+      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
   }
 
   // FIXME: Once this is working, then enable flag will change to a target
@@ -593,7 +594,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
         continue;
       if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
         continue;
-      if (RS && (int)i == RS->getScavengingFrameIndex())
+      if (RS && RS->isScavengingFrameIndex((int)i))
         continue;
       if (MFI->isDeadObjectIndex(i))
         continue;
@@ -615,7 +616,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
       continue;
     if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
       continue;
-    if (RS && (int)i == RS->getScavengingFrameIndex())
+    if (RS && RS->isScavengingFrameIndex((int)i))
       continue;
     if (MFI->isDeadObjectIndex(i))
       continue;
@@ -631,9 +632,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // stack pointer.
   if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
              !RegInfo->useFPForScavengingIndex(Fn))) {
-    int SFI = RS->getScavengingFrameIndex();
-    if (SFI >= 0)
-      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+    SmallVector<int, 2> SFIs;
+    RS->getScavengingFrameIndices(SFIs);
+    for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+         IE = SFIs.end(); I != IE; ++I)
+      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
   }
 
   if (!TFI.targetHandlesStackFrameRounding()) {
@@ -816,14 +819,28 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
        E = Fn.end(); BB != E; ++BB) {
     RS->enterBasicBlock(BB);
 
-    unsigned VirtReg = 0;
-    unsigned ScratchReg = 0;
     int SPAdj = 0;
 
     // The instruction stream may change in the loop, so check BB->end()
     // directly.
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+      // We might end up here again with a NULL iterator if we scavenged a
+      // register for which we inserted spill code for definition by what was
+      // originally the first instruction in BB.
+      if (I == MachineBasicBlock::iterator(NULL))
+        I = BB->begin();
+
       MachineInstr *MI = I;
+      MachineBasicBlock::iterator J = llvm::next(I);
+      MachineBasicBlock::iterator P = I == BB->begin() ?
+        MachineBasicBlock::iterator(NULL) : llvm::prior(I);
+
+      // RS should process this instruction before we might scavenge at this
+      // location. This is because we might be replacing a virtual register
+      // defined by this instruction, and if so, registers killed by this
+      // instruction are available, and defined registers are not.
+      RS->forward(I);
+
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         if (MI->getOperand(i).isReg()) {
           MachineOperand &MO = MI->getOperand(i);
@@ -833,29 +850,47 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
           if (!TargetRegisterInfo::isVirtualRegister(Reg))
             continue;
 
-          ++NumVirtualFrameRegs;
-
-          // Have we already allocated a scratch register for this virtual?
-          if (Reg != VirtReg) {
-            // When we first encounter a new virtual register, it
-            // must be a definition.
-            assert(MI->getOperand(i).isDef() &&
-                   "frame index virtual missing def!");
-            // Scavenge a new scratch register
-            VirtReg = Reg;
-            const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
-            ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
-            ++NumScavengedRegs;
-          }
+          // When we first encounter a new virtual register, it
+          // must be a definition.
+          assert(MI->getOperand(i).isDef() &&
+                 "frame index virtual missing def!");
+          // Scavenge a new scratch register
+          const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+          unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+          ++NumScavengedRegs;
+
           // Replace this reference to the virtual register with the
           // scratch register.
           assert (ScratchReg && "Missing scratch register!");
-          MI->getOperand(i).setReg(ScratchReg);
+          Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
 
+          // Because this instruction was processed by the RS before this
+          // register was allocated, make sure that the RS now records the
+          // register as being used.
+          RS->setUsed(ScratchReg);
         }
       }
-      RS->forward(I);
-      ++I;
+
+      // If the scavenger needed to use one of its spill slots, the
+      // spill code will have been inserted in between I and J. This is a
+      // problem because we need the spill code before I: Move I to just
+      // prior to J.
+      if (I != llvm::prior(J)) {
+        BB->splice(J, BB, I);
+
+        // Before we move I, we need to prepare the RS to visit I again.
+        // Specifically, RS will assert if it sees uses of registers that
+        // it believes are undefined. Because we have already processed
+        // register kills in I, when it visits I again, it will believe that
+        // those registers are undefined. To avoid this situation, unprocess
+        // the instruction I.
+        assert(RS->getCurrentPosition() == I &&
+          "The register scavenger has an unexpected position");
+        I = P;
+        RS->unprocess(P);
+      } else
+        ++I;
     }
   }
 }
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 0b6dc68cdf09..7fcfe9e88bef 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -63,7 +63,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
   MachineFunction *MF;
 
   // state
-  std::auto_ptr<Spiller> SpillerInstance;
+  OwningPtr<Spiller> SpillerInstance;
   std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
                       CompSpillWeight> Queue;
 
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6d84176af261..9eed1fc62acc 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -78,7 +78,7 @@ class RAGreedy : public MachineFunctionPass,
   LiveDebugVariables *DebugVars;
 
   // state
-  std::auto_ptr<Spiller> SpillerInstance;
+  OwningPtr<Spiller> SpillerInstance;
   std::priority_queue<std::pair<unsigned, unsigned> > Queue;
   unsigned NextCascade;
 
@@ -166,8 +166,8 @@ class RAGreedy : public MachineFunctionPass,
   };
 
   // splitting state.
-  std::auto_ptr<SplitAnalysis> SA;
-  std::auto_ptr<SplitEditor> SE;
+  OwningPtr<SplitAnalysis> SA;
+  OwningPtr<SplitEditor> SE;
 
   /// Cached per-block interference maps
   InterferenceCache IntfCache;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 607edac24bd2..15a88e224faa 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/RegAllocPBQP.h"
 #include "RegisterCoalescer.h"
 #include "Spiller.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -89,8 +90,8 @@ class RegAllocPBQP : public MachineFunctionPass {
   static char ID;
 
   /// Construct a PBQP register allocator.
-  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
-      : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+  RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0)
+      : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
@@ -121,7 +122,7 @@ class RegAllocPBQP : public MachineFunctionPass {
   typedef std::set<unsigned> RegSet;
 
 
-  std::auto_ptr<PBQPBuilder> builder;
+  OwningPtr<PBQPBuilder> builder;
 
   char *customPassID;
 
@@ -132,7 +133,7 @@ class RegAllocPBQP : public MachineFunctionPass {
   const MachineLoopInfo *loopInfo;
   MachineRegisterInfo *mri;
 
-  std::auto_ptr<Spiller> spiller;
+  OwningPtr<Spiller> spiller;
   LiveIntervals *lis;
   LiveStacks *lss;
   VirtRegMap *vrm;
@@ -186,16 +187,15 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
   return allowedSet[option - 1];
 }
 
-std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
-                                                const LiveIntervals *lis,
-                                                const MachineLoopInfo *loopInfo,
-                                                const RegSet &vregs) {
+PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
+                                  const MachineLoopInfo *loopInfo,
+                                  const RegSet &vregs) {
 
   LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
   MachineRegisterInfo *mri = &mf->getRegInfo();
   const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
 
-  std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+  OwningPtr<PBQPRAProblem> p(new PBQPRAProblem());
   PBQP::Graph &g = p->getGraph();
   RegSet pregs;
 
@@ -282,7 +282,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     }
   }
 
-  return p;
+  return p.take();
 }
 
 void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
@@ -311,13 +311,12 @@ void PBQPBuilder::addInterferenceCosts(
   }
 }
 
-std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
-                                                MachineFunction *mf,
+PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
                                                 const LiveIntervals *lis,
                                                 const MachineLoopInfo *loopInfo,
                                                 const RegSet &vregs) {
 
-  std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+  OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs));
   PBQP::Graph &g = p->getGraph();
 
   const TargetMachine &tm = mf->getTarget();
@@ -391,7 +390,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
     }
   }
 
-  return p;
+  return p.take();
 }
 
 void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
@@ -584,8 +583,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
     while (!pbqpAllocComplete) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
-      std::auto_ptr<PBQPRAProblem> problem =
-        builder->build(mf, lis, loopInfo, vregsToAlloc);
+      OwningPtr<PBQPRAProblem> problem(
+        builder->build(mf, lis, loopInfo, vregsToAlloc));
 
 #ifndef NDEBUG
       if (pbqpDumpGraphs) {
@@ -621,18 +620,18 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 }
 
 FunctionPass* llvm::createPBQPRegisterAllocator(
-                                           std::auto_ptr<PBQPBuilder> builder,
+                                           OwningPtr<PBQPBuilder> &builder,
                                            char *customPassID) {
   return new RegAllocPBQP(builder, customPassID);
 }
 
 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
-  if (pbqpCoalescing) {
-    return createPBQPRegisterAllocator(
-             std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
-  } // else
-  return createPBQPRegisterAllocator(
-           std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+  OwningPtr<PBQPBuilder> Builder;
+  if (pbqpCoalescing)
+    Builder.reset(new PBQPBuilderWithCoalescing());
+  else
+    Builder.reset(new PBQPBuilder());
+  return createPBQPRegisterAllocator(Builder);
 }
 
 #undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 6da901f81d37..f82ccbe84da3 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -45,9 +45,11 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const {
 }
 
 void RegScavenger::initRegState() {
-  ScavengedReg = 0;
-  ScavengedRC = NULL;
-  ScavengeRestore = NULL;
+  for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+       IE = Scavenged.end(); I != IE; ++I) {
+    I->Reg = 0;
+    I->Restore = NULL;
+  }
 
   // All registers started out unused.
   RegsAvailable.set();
@@ -108,27 +110,11 @@ void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
     BV.set(*SubRegs);
 }
 
-void RegScavenger::forward() {
-  // Move ptr forward.
-  if (!Tracking) {
-    MBBI = MBB->begin();
-    Tracking = true;
-  } else {
-    assert(MBBI != MBB->end() && "Already past the end of the basic block!");
-    MBBI = llvm::next(MBBI);
-  }
-  assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+void RegScavenger::determineKillsAndDefs() {
+  assert(Tracking && "Must be tracking to determine kills and defs");
 
   MachineInstr *MI = MBBI;
-
-  if (MI == ScavengeRestore) {
-    ScavengedReg = 0;
-    ScavengedRC = NULL;
-    ScavengeRestore = NULL;
-  }
-
-  if (MI->isDebugValue())
-    return;
+  assert(!MI->isDebugValue() && "Debug values have no kills or defs");
 
   // Find out which registers are early clobbered, killed, defined, and marked
   // def-dead in this instruction.
@@ -145,7 +131,7 @@ void RegScavenger::forward() {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || isReserved(Reg))
+    if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
       continue;
 
     if (MO.isUse()) {
@@ -162,6 +148,53 @@ void RegScavenger::forward() {
         addRegWithSubRegs(DefRegs, Reg);
     }
   }
+}
+
+void RegScavenger::unprocess() {
+  assert(Tracking && "Cannot unprocess because we're not tracking");
+
+  MachineInstr *MI = MBBI;
+  if (!MI->isDebugValue()) {
+    determineKillsAndDefs();
+
+    // Commit the changes.
+    setUsed(KillRegs);
+    setUnused(DefRegs);
+  }
+
+  if (MBBI == MBB->begin()) {
+    MBBI = MachineBasicBlock::iterator(NULL);
+    Tracking = false;
+  } else
+    --MBBI;
+}
+
+void RegScavenger::forward() {
+  // Move ptr forward.
+  if (!Tracking) {
+    MBBI = MBB->begin();
+    Tracking = true;
+  } else {
+    assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+    MBBI = llvm::next(MBBI);
+  }
+  assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+  MachineInstr *MI = MBBI;
+
+  for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+       IE = Scavenged.end(); I != IE; ++I) {
+    if (I->Restore != MI)
+      continue;
+
+    I->Reg = 0;
+    I->Restore = NULL;
+  }
+
+  if (MI->isDebugValue())
+    return;
+
+  determineKillsAndDefs();
 
   // Verify uses and defs.
 #ifndef NDEBUG
@@ -170,7 +203,7 @@ void RegScavenger::forward() {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || isReserved(Reg))
+    if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
       continue;
     if (MO.isUse()) {
       if (MO.isUndef())
@@ -360,37 +393,47 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
     return SReg;
   }
 
-  assert(ScavengedReg == 0 &&
-         "Scavenger slot is live, unable to scavenge another register!");
+  // Find an available scavenging slot.
+  unsigned SI;
+  for (SI = 0; SI < Scavenged.size(); ++SI)
+    if (Scavenged[SI].Reg == 0)
+      break;
+
+  if (SI == Scavenged.size()) {
+    // We need to scavenge a register but have no spill slot, the target
+    // must know how to do it (if not, we'll assert below).
+    Scavenged.push_back(ScavengedInfo());
+  }
 
   // Avoid infinite regress
-  ScavengedReg = SReg;
+  Scavenged[SI].Reg = SReg;
 
   // If the target knows how to save/restore the register, let it do so;
   // otherwise, use the emergency stack spill slot.
   if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
     // Spill the scavenged register before I.
-    assert(ScavengingFrameIndex >= 0 &&
+    assert(Scavenged[SI].FrameIndex >= 0 &&
            "Cannot scavenge register without an emergency spill slot!");
-    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
+    TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
+                             RC, TRI);
     MachineBasicBlock::iterator II = prior(I);
 
     unsigned FIOperandNum = getFrameIndexOperandNum(II);
     TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
 
     // Restore the scavenged register before its use (or first terminator).
-    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
+    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
+                              RC, TRI);
     II = prior(UseMI);
 
     FIOperandNum = getFrameIndexOperandNum(II);
     TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
   }
 
-  ScavengeRestore = prior(UseMI);
+  Scavenged[SI].Restore = prior(UseMI);
 
   // Doing this here leads to infinite regress.
-  // ScavengedReg = SReg;
-  ScavengedRC = RC;
+  // Scavenged[SI].Reg = SReg;
 
   DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
         "\n");
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 71e7a21ef2bc..e4da6a41eead 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -262,6 +262,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
       if (UseOp < 0)
         Dep = SDep(SU, SDep::Artificial);
       else {
+        // Set the hasPhysRegDefs only for physreg defs that have a use within
+        // the scheduling region.
+        SU->hasPhysRegDefs = true;
         Dep = SDep(SU, SDep::Data, *Alias);
         RegUse = UseSU->getInstr();
         Dep.setMinLatency(
@@ -318,6 +321,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
   }
 
   if (!MO.isDef()) {
+    SU->hasPhysRegUses = true;
     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
     // retrieve the existing SUnits list for this register's uses.
     // Push this SUnit on the use list.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 61603e18f567..44e212aef005 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5835,14 +5835,25 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                        DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                    N0.getOperand(1), N1));
 
+  // No FP constant should be created after legalization as Instruction
+  // Selection pass has hard time in dealing with FP constant.
+  //
+  // We don't need test this condition for transformation like following, as
+  // the DAG being transformed implies it is legal to take FP constant as
+  // operand.
+  // 
+  //  (fadd (fmul c, x), x) -> (fmul c+1, x)
+  // 
+  bool AllowNewFpConst = (Level < AfterLegalizeDAG);
+
   // If allow, fold (fadd (fneg x), x) -> 0.0
-  if (DAG.getTarget().Options.UnsafeFPMath &&
+  if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
       N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
     return DAG.getConstantFP(0.0, VT);
   }
 
     // If allow, fold (fadd x, (fneg x)) -> 0.0
-  if (DAG.getTarget().Options.UnsafeFPMath &&
+  if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
       N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
     return DAG.getConstantFP(0.0, VT);
   }
@@ -5944,7 +5955,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       }
     }
 
-    if (N0.getOpcode() == ISD::FADD) {
+    if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
       ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
       // (fadd (fadd x, x), x) -> (fmul 3.0, x)
       if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
@@ -5954,7 +5965,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       }
     }
 
-    if (N1.getOpcode() == ISD::FADD) {
+    if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
       ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
       // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
       if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
@@ -5965,7 +5976,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     }
 
     // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
-    if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+    if (AllowNewFpConst &&
+        N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
         N0.getOperand(0) == N0.getOperand(1) &&
         N1.getOperand(0) == N1.getOperand(1) &&
         N0.getOperand(0) == N1.getOperand(0)) {
@@ -6811,9 +6823,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                              MVT::Other, Chain, Tmp, N2);
         }
 
-        // visitXOR has changed XOR's operands.
-        Op0 = TheXor->getOperand(0);
-        Op1 = TheXor->getOperand(1);
+        // visitXOR has changed XOR's operands or replaced the XOR completely,
+        // bail out.
+        return SDValue(N, 0);
       }
     }
 
@@ -7699,16 +7711,82 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   return SDValue();
 }
 
-/// Returns the base pointer and an integer offset from that object.
-static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
-  if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
-    int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
-    SDValue Base = Ptr->getOperand(0);
-    return std::make_pair(Base, Offset);
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+///  (load (i64 add (i64 copyfromreg %c)
+///                 (i64 signextend (add (i8 load %index)
+///                                      (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
+///                                         (i32 1)))))
+struct BaseIndexOffset {
+  SDValue Base;
+  SDValue Index;
+  int64_t Offset;
+  bool IsIndexSignExt;
+
+  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+                  bool IsIndexSignExt) :
+    Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+  bool equalBaseIndex(const BaseIndexOffset &Other) {
+    return Other.Base == Base && Other.Index == Index &&
+      Other.IsIndexSignExt == IsIndexSignExt;
   }
 
-  return std::make_pair(Ptr, 0);
-}
+  /// Parses tree in Ptr for base, index, offset addresses.
+  static BaseIndexOffset match(SDValue Ptr) {
+    bool IsIndexSignExt = false;
+
+    // Just Base or possibly anything else.
+    if (Ptr->getOpcode() != ISD::ADD)
+      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+    // Base + offset.
+    if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+      int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+      return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
+                              IsIndexSignExt);
+    }
+
+    // Look at Base + Index + Offset cases.
+    SDValue Base = Ptr->getOperand(0);
+    SDValue IndexOffset = Ptr->getOperand(1);
+
+    // Skip signextends.
+    if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+      IndexOffset = IndexOffset->getOperand(0);
+      IsIndexSignExt = true;
+    }
+
+    // Either the case of Base + Index (no offset) or something else.
+    if (IndexOffset->getOpcode() != ISD::ADD)
+      return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+
+    // Now we have the case of Base + Index + offset.
+    SDValue Index = IndexOffset->getOperand(0);
+    SDValue Offset = IndexOffset->getOperand(1);
+
+    if (!isa<ConstantSDNode>(Offset))
+      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+    // Ignore signextends.
+    if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+      Index = Index->getOperand(0);
+      IsIndexSignExt = true;
+    } else IsIndexSignExt = false;
+
+    int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+    return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+  }
+};
 
 /// Holds a pointer to an LSBaseSDNode as well as information on where it
 /// is located in a sequence of memory operations connected by a chain.
@@ -7755,16 +7833,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
     return false;
 
-  // This holds the base pointer and the offset in bytes from the base pointer.
-  std::pair<SDValue, int64_t> BasePtr =
-      GetPointerBaseAndOffset(St->getBasePtr());
+  // This holds the base pointer, index, and the offset in bytes from the base
+  // pointer.
+  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
 
   // We must have a base and an offset.
-  if (!BasePtr.first.getNode())
+  if (!BasePtr.Base.getNode())
     return false;
 
   // Do not handle stores to undef base pointers.
-  if (BasePtr.first.getOpcode() == ISD::UNDEF)
+  if (BasePtr.Base.getOpcode() == ISD::UNDEF)
     return false;
 
   // Save the LoadSDNodes that we find in the chain.
@@ -7786,11 +7864,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       break;
 
     // Find the base pointer and offset for this memory node.
-    std::pair<SDValue, int64_t> Ptr =
-      GetPointerBaseAndOffset(Index->getBasePtr());
+    BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
 
     // Check that the base pointer is the same as the original one.
-    if (Ptr.first.getNode() != BasePtr.first.getNode())
+    if (!Ptr.equalBaseIndex(BasePtr))
       break;
 
     // Check that the alignment is the same.
@@ -7816,7 +7893,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       break;
 
     // We found a potential memory operand to merge.
-    StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
+    StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
 
     // Find the next memory operand in the chain. If the next operand in the
     // chain is a store then move up and continue the scan with the next
@@ -7903,6 +7980,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
       if (TLI.isTypeLegal(StoreTy))
         LastLegalType = i+1;
+      // Or check whether a truncstore is legal.
+      else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+               TargetLowering::TypePromoteInteger) {
+        EVT LegalizedStoredValueTy =
+          TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
+          LastLegalType = i+1;
+      }
 
       // Find a legal type for the vector store.
       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
@@ -8013,7 +8098,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
   // Find acceptable loads. Loads need to have the same chain (token factor),
   // must not be zext, volatile, indexed, and they must be consecutive.
-  SDValue LdBasePtr;
+  BaseIndexOffset LdBasePtr;
   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
@@ -8039,21 +8124,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (Ld->getMemoryVT() != MemVT)
       break;
 
-    std::pair<SDValue, int64_t> LdPtr =
-    GetPointerBaseAndOffset(Ld->getBasePtr());
-
+    BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
     // If this is not the first ptr that we check.
-    if (LdBasePtr.getNode()) {
+    if (LdBasePtr.Base.getNode()) {
       // The base ptr must be the same.
-      if (LdPtr.first != LdBasePtr)
+      if (!LdPtr.equalBaseIndex(LdBasePtr))
         break;
     } else {
       // Check that all other base pointers are the same as this one.
-      LdBasePtr = LdPtr.first;
+      LdBasePtr = LdPtr;
     }
 
     // We found a potential memory operand to merge.
-    LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0));
+    LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
   }
 
   if (LoadNodes.size() < 2)
@@ -8088,6 +8171,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
     if (TLI.isTypeLegal(StoreTy))
       LastLegalIntegerType = i + 1;
+    // Or check whether a truncstore and extload is legal.
+    else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+             TargetLowering::TypePromoteInteger) {
+      EVT LegalizedStoredValueTy =
+        TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+      if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+          TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
+          TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
+          TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
+        LastLegalIntegerType = i+1;
+    }
   }
 
   // Only use vector types if the vector type is larger than the integer type.
@@ -8978,12 +9072,32 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   EVT NVT = N->getValueType(0);
   SDValue V = N->getOperand(0);
 
+  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+    // Combine:
+    //    (extract_subvec (concat V1, V2, ...), i)
+    // Into:
+    //    Vi if possible
+    // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+    if (V->getOperand(0).getValueType() != NVT)
+      return SDValue();
+    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned NumElems = NVT.getVectorNumElements();
+    assert((Idx % NumElems) == 0 &&
+           "IDX in concat is not a multiple of the result vector length.");
+    return V->getOperand(Idx / NumElems);
+  }
+
+  // Skip bitcasting
+  if (V->getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+
   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+    DebugLoc dl = N->getDebugLoc();
     // Handle only simple case where vector being inserted and vector
     // being extracted are of same type, and are half size of larger vectors.
     EVT BigVT = V->getOperand(0).getValueType();
     EVT SmallVT = V->getOperand(1).getValueType();
-    if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+    if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
       return SDValue();
 
     // Only handle cases where both indexes are constants with the same type.
@@ -8996,31 +9110,57 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       // Combine:
       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
       // Into:
-      //    indices are equal => V1
+      //    indices are equal or bit offsets are equal => V1
       //    otherwise => (extract_subvec V1, ExtIdx)
-      if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
-        return V->getOperand(1);
-      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
-                         V->getOperand(0), N->getOperand(1));
+      if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
+          ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+        return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
+                         DAG.getNode(ISD::BITCAST, dl,
+                                     N->getOperand(0).getValueType(),
+                                     V->getOperand(0)), N->getOperand(1));
     }
   }
 
-  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
-    // Combine:
-    //    (extract_subvec (concat V1, V2, ...), i)
-    // Into:
-    //    Vi if possible
-    // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
-    if (V->getOperand(0).getValueType() != NVT)
+  return SDValue();
+}
+
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+  SmallVector<SDValue, 4> Ops;
+  EVT ConcatVT = N0.getOperand(0).getValueType();
+  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
+  unsigned NumConcats = NumElts / NumElemsPerConcat;
+
+  // Look at every vector that's inserted. We're looking for exact
+  // subvector-sized copies from a concatenated vector
+  for (unsigned I = 0; I != NumConcats; ++I) {
+    // Make sure we're dealing with a copy.
+    unsigned Begin = I * NumElemsPerConcat;
+    if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
       return SDValue();
-    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
-    unsigned NumElems = NVT.getVectorNumElements();
-    assert((Idx % NumElems) == 0 &&
-           "IDX in concat is not a multiple of the result vector length.");
-    return V->getOperand(Idx / NumElems);
+
+    for (unsigned J = 1; J != NumElemsPerConcat; ++J) {
+      if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
+        return SDValue();
+    }
+
+    unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
+    if (FirstElt < N0.getNumOperands())
+      Ops.push_back(N0.getOperand(FirstElt));
+    else
+      Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
   }
 
-  return SDValue();
+  return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(),
+                     Ops.size());
 }
 
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
@@ -9124,6 +9264,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
     }
   }
 
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+      Level < AfterLegalizeVectorOps &&
+      (N1.getOpcode() == ISD::UNDEF ||
+      (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+       N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
+    SDValue V = partitionShuffleOfConcats(N, DAG);
+
+    if (V.getNode())
+      return V;
+  }
+
   // If this shuffle node is simply a swizzle of another shuffle node,
   // and it reverses the swizzle of the previous shuffle then we can
   // optimize shuffle(shuffle(x, undef), undef) -> x.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 10e2dc61499b..288499ac6f32 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1183,6 +1183,8 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
     IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
                                               VT.getSizeInBits());
     MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+    assert (MaterialReg != 0 && "Unable to materialize imm.");
+    if (MaterialReg == 0) return 0;
   }
   return FastEmit_rr(VT, VT, Opcode,
                      Op0, Op0IsKill,
@@ -1503,3 +1505,61 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
 
   return true;
 }
+
+bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
+  assert(LI->hasOneUse() &&
+      "tryToFoldLoad expected a LoadInst with a single use");
+  // We know that the load has a single use, but don't know what it is.  If it
+  // isn't one of the folded instructions, then we can't succeed here.  Handle
+  // this by scanning the single-use users of the load until we get to FoldInst.
+  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
+
+  const Instruction *TheUser = LI->use_back();
+  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
+         // Stay in the right block.
+         TheUser->getParent() == FoldInst->getParent() &&
+         --MaxUsers) {  // Don't scan too far.
+    // If there are multiple or no uses of this instruction, then bail out.
+    if (!TheUser->hasOneUse())
+      return false;
+
+    TheUser = TheUser->use_back();
+  }
+
+  // If we didn't find the fold instruction, then we failed to collapse the
+  // sequence.
+  if (TheUser != FoldInst)
+    return false;
+
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile())
+    return false;
+
+  // Figure out which vreg this is going into.  If there is no assigned vreg yet
+  // then there actually was no reference to it.  Perhaps the load is referenced
+  // by a dead instruction.
+  unsigned LoadReg = getRegForValue(LI);
+  if (LoadReg == 0)
+    return false;
+
+  // We can't fold if this vreg has no uses or more than one use.  Multiple uses
+  // may mean that the instruction got lowered to multiple MIs, or the use of
+  // the loaded value ended up being multiple operands of the result.
+  if (!MRI.hasOneUse(LoadReg))
+    return false;
+
+  MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
+  MachineInstr *User = &*RI;
+
+  // Set the insertion point properly.  Folding the load can cause generation of
+  // other random instructions (like sign extends) for addressing modes; make
+  // sure they get inserted in a logical place before the new instruction.
+  FuncInfo.InsertPt = User;
+  FuncInfo.MBB = User->getParent();
+
+  // Ask the target to try folding the load.
+  return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
+}
+
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6a05cf897577..de217d8571ff 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -824,6 +824,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+  case ISD::FREM:       ExpandFloatRes_FREM(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1051,6 +1052,16 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
   GetPairElements(Call, Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::REM_F32, RTLIB::REM_F64,
+                                         RTLIB::REM_F80, RTLIB::REM_F128,
+                                         RTLIB::REM_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index beeb6b3b33b7..d19c13b8ff13 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -515,7 +515,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
   // Only use the result of getSetCCResultType if it is legal,
   // otherwise just use the promoted result type (NVT).
   if (!TLI.isTypeLegal(SVT))
-      SVT = NVT;
+    SVT = NVT;
 
   DebugLoc dl = N->getDebugLoc();
   assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
@@ -550,7 +550,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
   SDValue LHS = GetPromotedInteger(N->getOperand(0));
   SDValue RHS = GetPromotedInteger(N->getOperand(1));
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
-                    LHS.getValueType(), LHS, RHS);
+                     LHS.getValueType(), LHS, RHS);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index e26d1656e87c..b6436bf42741 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
   SDValue &OpEntry = PromotedIntegers[Op];
   assert(OpEntry.getNode() == 0 && "Node is already promoted!");
   OpEntry = Result;
+
+  // Propagate node ordering
+  DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
@@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
   SDValue &OpEntry = SoftenedFloats[Op];
   assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
   OpEntry = Result;
+
+  // Propagate node ordering
+  DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
@@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
   SDValue &OpEntry = ScalarizedVectors[Op];
   assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
   OpEntry = Result;
+
+  // Propagate node ordering
+  DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
@@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
   assert(Entry.first.getNode() == 0 && "Node already expanded");
   Entry.first = Lo;
   Entry.second = Hi;
+
+  // Propagate ordering
+  DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+  DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
@@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
   assert(Entry.first.getNode() == 0 && "Node already expanded");
   Entry.first = Lo;
   Entry.second = Hi;
+
+  // Propagate ordering
+  DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+  DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
@@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
   assert(Entry.first.getNode() == 0 && "Node already split");
   Entry.first = Lo;
   Entry.second = Hi;
+
+  // Propagate ordering
+  DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+  DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
@@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
   SDValue &OpEntry = WidenedVectors[Op];
   assert(OpEntry.getNode() == 0 && "Node already widened!");
   OpEntry = Result;
+
+  // Propagate node ordering
+  DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 
@@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
   // Make everything that once used N's values now use those in Results instead.
   assert(Results.size() == N->getNumValues() &&
          "Custom lowering returned the wrong number of results!");
-  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+  for (unsigned i = 0, e = Results.size(); i != e; ++i) {
     ReplaceValueWith(SDValue(N, i), Results[i]);
+    // Propagate node ordering
+    DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N));
+  }
   return true;
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 27b3cf2abc92..54ea926241cf 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -465,6 +465,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FPOW      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FPOWI     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FREM      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FRINT     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
index d2269f8accf1..7e7b8974be48 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -33,8 +33,10 @@ class SDNodeOrdering {
 public:
   SDNodeOrdering() {}
 
-  void add(const SDNode *Node, unsigned O) {
-    OrderMap[Node] = O;
+  void add(const SDNode *Node, unsigned NewOrder) {
+    unsigned &OldOrder = OrderMap[Node];
+    if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder))
+      OldOrder = NewOrder;
   }
   void remove(const SDNode *Node) {
     DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 35707e86cebf..3082ee7caae4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1917,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(Op);
-    if (ISD::isZEXTLoad(Op.getNode())) {
+    // If this is a ZEXTLoad and we are looking at the loaded value.
+    if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
       EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getScalarType().getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
@@ -2287,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     break;
   }
 
-  // Handle LOADX separately here. EXTLOAD case will fallthrough.
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
-    unsigned ExtType = LD->getExtensionType();
-    switch (ExtType) {
-    default: break;
-    case ISD::SEXTLOAD:    // '17' bits known
-      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
-      return VTBits-Tmp+1;
-    case ISD::ZEXTLOAD:    // '16' bits known
-      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
-      return VTBits-Tmp;
+  // If we are looking at the loaded value of the SDNode.
+  if (Op.getResNo() == 0) {
+    // Handle LOADX separately here. EXTLOAD case will fallthrough.
+    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+      unsigned ExtType = LD->getExtensionType();
+      switch (ExtType) {
+        default: break;
+        case ISD::SEXTLOAD:    // '17' bits known
+          Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+          return VTBits-Tmp+1;
+        case ISD::ZEXTLOAD:    // '16' bits known
+          Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+          return VTBits-Tmp;
+      }
     }
   }
 
@@ -5248,14 +5252,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
   SDVTList VTs = getVTList(VT);
-  return getMachineNode(Opcode, dl, VTs, 0, 0);
+  return getMachineNode(Opcode, dl, VTs, ArrayRef<SDValue>());
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5263,7 +5267,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
                              SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5271,20 +5275,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
                              SDValue Op1, SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getMachineNode(Opcode, dl, VTs, 0, 0);
+  return getMachineNode(Opcode, dl, VTs, ArrayRef<SDValue>());
 }
 
 MachineSDNode *
@@ -5292,7 +5296,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5300,7 +5304,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5309,15 +5313,15 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5326,7 +5330,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5335,39 +5339,41 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op1, SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, EVT VT3,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
                              EVT VT2, EVT VT3, EVT VT4,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              ArrayRef<EVT> ResultTys,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> OpsArray) {
   bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
   MachineSDNode *N;
   void *IP = 0;
+  const SDValue *Ops = OpsArray.data();
+  unsigned NumOps = OpsArray.size();
 
   if (DoCSE) {
     FoldingSetNodeID ID;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 33d100eb3a06..ce40cd6a0c9c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4914,7 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::fmuladd: {
     EVT VT = TLI.getValueType(I.getType());
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-        TLI.isOperationLegalOrCustom(ISD::FMA, VT) &&
         TLI.isFMAFasterThanMulAndAdd(VT)){
       setValue(&I, DAG.getNode(ISD::FMA, dl,
                                getValue(I.getArgOperand(0)).getValueType(),
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c3b6276a8dc7..e21f26e91ceb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -63,12 +63,16 @@ STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
 STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
 STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
+STATISTIC(NumFastIselFailLowerArguments,
+          "Number of entry blocks where fast isel failed to lower arguments");
 
 #ifndef NDEBUG
 static cl::opt<bool>
 EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
           cl::desc("Enable extra verbose messages in the \"fast\" "
                    "instruction selector"));
+
   // Terminators
 STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
 STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
@@ -742,7 +746,7 @@ class ISelUpdater : public SelectionDAG::DAGUpdateListener {
 } // end anonymous namespace
 
 void SelectionDAGISel::DoInstructionSelection() {
-  DEBUG(errs() << "===== Instruction selection begins: BB#"
+  DEBUG(dbgs() << "===== Instruction selection begins: BB#"
         << FuncInfo->MBB->getNumber()
         << " '" << FuncInfo->MBB->getName() << "'\n");
 
@@ -785,8 +789,12 @@ void SelectionDAGISel::DoInstructionSelection() {
       if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
         continue;
       // Replace node.
-      if (ResNode)
+      if (ResNode) {
+        // Propagate ordering
+        CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node));
+
         ReplaceUses(Node, ResNode);
+      }
 
       // If after the replacement this node is not used any more,
       // remove this dead node.
@@ -797,7 +805,7 @@ void SelectionDAGISel::DoInstructionSelection() {
     CurDAG->setRoot(Dummy.getValue());
   }
 
-  DEBUG(errs() << "===== Instruction selection ends:\n");
+  DEBUG(dbgs() << "===== Instruction selection ends:\n");
 
   PostprocessISelDAG();
 }
@@ -827,84 +835,6 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   if (Reg) MBB->addLiveIn(Reg);
 }
 
-/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
-/// load into the specified FoldInst.  Note that we could have a sequence where
-/// multiple LLVM IR instructions are folded into the same machineinstr.  For
-/// example we could have:
-///   A: x = load i32 *P
-///   B: y = icmp A, 42
-///   C: br y, ...
-///
-/// In this scenario, LI is "A", and FoldInst is "C".  We know about "B" (and
-/// any other folded instructions) because it is between A and C.
-///
-/// If we succeed in folding the load into the operation, return true.
-///
-bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
-                                             const Instruction *FoldInst,
-                                             FastISel *FastIS) {
-  // We know that the load has a single use, but don't know what it is.  If it
-  // isn't one of the folded instructions, then we can't succeed here.  Handle
-  // this by scanning the single-use users of the load until we get to FoldInst.
-  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
-
-  const Instruction *TheUser = LI->use_back();
-  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
-         // Stay in the right block.
-         TheUser->getParent() == FoldInst->getParent() &&
-         --MaxUsers) {  // Don't scan too far.
-    // If there are multiple or no uses of this instruction, then bail out.
-    if (!TheUser->hasOneUse())
-      return false;
-
-    TheUser = TheUser->use_back();
-  }
-
-  // If we didn't find the fold instruction, then we failed to collapse the
-  // sequence.
-  if (TheUser != FoldInst)
-    return false;
-
-  // Don't try to fold volatile loads.  Target has to deal with alignment
-  // constraints.
-  if (LI->isVolatile()) return false;
-
-  // Figure out which vreg this is going into.  If there is no assigned vreg yet
-  // then there actually was no reference to it.  Perhaps the load is referenced
-  // by a dead instruction.
-  unsigned LoadReg = FastIS->getRegForValue(LI);
-  if (LoadReg == 0)
-    return false;
-
-  // Check to see what the uses of this vreg are.  If it has no uses, or more
-  // than one use (at the machine instr level) then we can't fold it.
-  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
-  if (RI == RegInfo->reg_end())
-    return false;
-
-  // See if there is exactly one use of the vreg.  If there are multiple uses,
-  // then the instruction got lowered to multiple machine instructions or the
-  // use of the loaded value ended up being multiple operands of the result, in
-  // either case, we can't fold this.
-  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
-  if (PostRI != RegInfo->reg_end())
-    return false;
-
-  assert(RI.getOperand().isUse() &&
-         "The only use of the vreg must be a use, we haven't emitted the def!");
-
-  MachineInstr *User = &*RI;
-
-  // Set the insertion point properly.  Folding the load can cause generation of
-  // other random instructions (like sign extends) for addressing modes, make
-  // sure they get inserted in a logical place before the new instruction.
-  FuncInfo->InsertPt = User;
-  FuncInfo->MBB = User->getParent();
-
-  // Ask the target to try folding the load.
-  return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
-}
-
 /// isFoldedOrDeadInstruction - Return true if the specified instruction is
 /// side-effect free and is either dead or folded into a generated instruction.
 /// Return false if it needs to be emitted.
@@ -1050,9 +980,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
+        ++NumEntryBlocks;
+
         // Lower any arguments needed in this block if this is the entry block.
         if (!FastIS->LowerArguments()) {
           // Fast isel failed to lower these arguments
+          ++NumFastIselFailLowerArguments;
           if (EnableFastISelAbortArgs)
             llvm_unreachable("FastISel didn't lower all arguments");
 
@@ -1102,7 +1035,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           }
           if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
               BeforeInst->hasOneUse() &&
-              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
+              FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
             // If we succeeded, don't re-select the load.
             BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
             --NumFastIselRemaining;
@@ -1174,8 +1107,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       FastIS->recomputeInsertPt();
     } else {
       // Lower any arguments needed in this block if this is the entry block.
-      if (LLVMBB == &Fn.getEntryBlock())
+      if (LLVMBB == &Fn.getEntryBlock()) {
+        ++NumEntryBlocks;
         LowerArguments(Fn);
+      }
     }
 
     if (Begin != BI)
@@ -1767,7 +1702,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
   if (!NowDeadNodes.empty())
     CurDAG->RemoveDeadNodes(NowDeadNodes);
 
-  DEBUG(errs() << "ISEL: Match complete!\n");
+  DEBUG(dbgs() << "ISEL: Match complete!\n");
 }
 
 enum ChainResult {
@@ -2272,9 +2207,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   SmallVector<SDNode*, 3> ChainNodesMatched;
   SmallVector<SDNode*, 3> GlueResultNodesMatched;
 
-  DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+  DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
         NodeToMatch->dump(CurDAG);
-        errs() << '\n');
+        dbgs() << '\n');
 
   // Determine where to start the interpreter.  Normally we start at opcode #0,
   // but if the state machine starts with an OPC_SwitchOpcode, then we
@@ -2286,7 +2221,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     // Already computed the OpcodeOffset table, just index into it.
     if (N.getOpcode() < OpcodeOffset.size())
       MatcherIndex = OpcodeOffset[N.getOpcode()];
-    DEBUG(errs() << "  Initial Opcode index to " << MatcherIndex << "\n");
+    DEBUG(dbgs() << "  Initial Opcode index to " << MatcherIndex << "\n");
 
   } else if (MatcherTable[0] == OPC_SwitchOpcode) {
     // Otherwise, the table isn't computed, but the state machine does start
@@ -2353,7 +2288,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         if (!Result)
           break;
 
-        DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "
+        DEBUG(dbgs() << "  Skipped scope entry (due to false predicate) at "
                      << "index " << MatcherIndexOfPredicate
                      << ", continuing at " << FailIndex << "\n");
         ++NumDAGIselRetries;
@@ -2483,7 +2418,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (CaseSize == 0) break;
 
       // Otherwise, execute the case we found.
-      DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
+      DEBUG(dbgs() << "  OpcodeSwitch from " << SwitchStart
                    << " to " << MatcherIndex << "\n");
       continue;
     }
@@ -2515,7 +2450,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (CaseSize == 0) break;
 
       // Otherwise, execute the case we found.
-      DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+      DEBUG(dbgs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
                    << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
       continue;
     }
@@ -2783,7 +2718,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         // If this is a normal EmitNode command, just create the new node and
         // add the results to the RecordedNodes list.
         Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
-                                     VTList, Ops.data(), Ops.size());
+                                     VTList, Ops);
 
         // Add all the non-glue/non-chain results to the RecordedNodes list.
         for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
@@ -2859,9 +2794,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
           ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
       }
 
-      DEBUG(errs() << "  "
+      DEBUG(dbgs() << "  "
                    << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
-                   << " node: "; Res->dump(CurDAG); errs() << "\n");
+                   << " node: "; Res->dump(CurDAG); dbgs() << "\n");
 
       // If this was a MorphNodeTo then we're completely done!
       if (Opcode == OPC_MorphNodeTo) {
@@ -2936,7 +2871,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     // If the code reached this point, then the match failed.  See if there is
     // another child to try in the current 'Scope', otherwise pop it until we
     // find a case to check.
-    DEBUG(errs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
+    DEBUG(dbgs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
     ++NumDAGIselRetries;
     while (1) {
       if (MatchScopes.empty()) {
@@ -2956,7 +2891,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
       MatcherIndex = LastScope.FailIndex;
 
-      DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n");
+      DEBUG(dbgs() << "  Continuing at " << MatcherIndex << "\n");
 
       InputChain = LastScope.InputChain;
       InputGlue = LastScope.InputGlue;
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 320128a999ea..c5bbba3ffccc 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -29,6 +29,7 @@
 
 #define DEBUG_TYPE "spillplacement"
 #include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index ec44b8cb5959..a789a2596dbf 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -67,14 +67,14 @@ DisableColoring("no-stack-coloring",
 /// code. If this flag is enabled, we try to save the user.
 static cl::opt<bool>
 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
-        cl::init(false), cl::Hidden,
-        cl::desc("Do not optimize lifetime zones that are broken"));
+                          cl::init(false), cl::Hidden,
+                          cl::desc("Do not optimize lifetime zones that "
+                                   "are broken"));
 
 STATISTIC(NumMarkerSeen,  "Number of lifetime markers found.");
 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
-STATISTIC(EscapedAllocas,
-          "Number of allocas that escaped the lifetime region");
+STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
 
 //===----------------------------------------------------------------------===//
 //                           StackColoring Pass
@@ -577,7 +577,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
           SlotIndex Index = Indexes->getInstructionIndex(I);
           LiveInterval *Interval = Intervals[FromSlot];
           assert(Interval->find(Index) != Interval->end() &&
-               "Found instruction usage outside of live range.");
+                 "Found instruction usage outside of live range.");
         }
 #endif
 
@@ -741,9 +741,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
   std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
                    SlotSizeSorter(MFI));
 
-  bool Chanded = true;
-  while (Chanded) {
-    Chanded = false;
+  bool Changed = true;
+  while (Changed) {
+    Changed = false;
     for (unsigned I = 0; I < NumSlots; ++I) {
       if (SortedSlots[I] == -1)
         continue;
@@ -760,7 +760,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
 
         // Merge disjoint slots.
         if (!First->overlaps(*Second)) {
-          Chanded = true;
+          Changed = true;
           First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
           SlotRemap[SecondSlot] = FirstSlot;
           SortedSlots[J] = -1;
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 3c346766ad6d..82bb37ef9787 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -620,12 +620,56 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
                                        const TargetLoweringObjectFile *tlof)
   : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+  initActions();
+
+  // Perform these initializations only once.
+  IsLittleEndian = TD->isLittleEndian();
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+  MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+  MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+    = MaxStoresPerMemmoveOptSize = 4;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  JumpIsExpensive = false;
+  PredictableSelectIsExpensive = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  BooleanContents = UndefinedBooleanContent;
+  BooleanVectorContents = UndefinedBooleanContent;
+  SchedPreferenceInfo = Sched::ILP;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  MinFunctionAlignment = 0;
+  PrefFunctionAlignment = 0;
+  PrefLoopAlignment = 0;
+  MinStackArgumentAlignment = 1;
+  ShouldFoldAtomicFences = false;
+  InsertFencesForAtomic = false;
+  SupportJumpTables = true;
+  MinimumJumpTableEntries = 4;
+
+  InitLibcallNames(LibcallRoutineNames, TM);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+  delete &TLOF;
+}
+
+void TargetLoweringBase::initActions() {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
   memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
   memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
   memset(CondCodeActions, 0, sizeof(CondCodeActions));
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
 
   // Set default actions for various operations.
   for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
@@ -702,46 +746,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
   // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
   //
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
-
-  IsLittleEndian = TD->isLittleEndian();
-  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
-  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
-  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
-  MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
-  MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
-    = MaxStoresPerMemmoveOptSize = 4;
-  BenefitFromCodePlacementOpt = false;
-  UseUnderscoreSetJmp = false;
-  UseUnderscoreLongJmp = false;
-  SelectIsExpensive = false;
-  IntDivIsCheap = false;
-  Pow2DivIsCheap = false;
-  JumpIsExpensive = false;
-  PredictableSelectIsExpensive = false;
-  StackPointerRegisterToSaveRestore = 0;
-  ExceptionPointerRegister = 0;
-  ExceptionSelectorRegister = 0;
-  BooleanContents = UndefinedBooleanContent;
-  BooleanVectorContents = UndefinedBooleanContent;
-  SchedPreferenceInfo = Sched::ILP;
-  JumpBufSize = 0;
-  JumpBufAlignment = 0;
-  MinFunctionAlignment = 0;
-  PrefFunctionAlignment = 0;
-  PrefLoopAlignment = 0;
-  MinStackArgumentAlignment = 1;
-  ShouldFoldAtomicFences = false;
-  InsertFencesForAtomic = false;
-  SupportJumpTables = true;
-  MinimumJumpTableEntries = 4;
-
-  InitLibcallNames(LibcallRoutineNames, TM);
-  InitCmpLibcallCCs(CmpLibcallCCs);
-  InitLibcallCallingConvs(LibcallCallingConvs);
-}
-
-TargetLoweringBase::~TargetLoweringBase() {
-  delete &TLOF;
 }
 
 MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 0f59d0169e18..435a5e7e0bf9 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -50,3 +50,29 @@ StringRef TargetOptions::getTrapFunctionName() const {
   return TrapFuncName;
 }
 
+bool TargetOptions::operator==(const TargetOptions &TO) {
+#define ARE_EQUAL(X) X == TO.X
+  return
+    ARE_EQUAL(UnsafeFPMath) &&
+    ARE_EQUAL(NoInfsFPMath) &&
+    ARE_EQUAL(NoNaNsFPMath) &&
+    ARE_EQUAL(HonorSignDependentRoundingFPMathOption) &&
+    ARE_EQUAL(UseSoftFloat) &&
+    ARE_EQUAL(NoZerosInBSS) &&
+    ARE_EQUAL(JITExceptionHandling) &&
+    ARE_EQUAL(JITEmitDebugInfo) &&
+    ARE_EQUAL(JITEmitDebugInfoToDisk) &&
+    ARE_EQUAL(GuaranteedTailCallOpt) &&
+    ARE_EQUAL(DisableTailCalls) &&
+    ARE_EQUAL(StackAlignmentOverride) &&
+    ARE_EQUAL(RealignStack) &&
+    ARE_EQUAL(SSPBufferSize) &&
+    ARE_EQUAL(EnableFastISel) &&
+    ARE_EQUAL(PositionIndependentExecutable) &&
+    ARE_EQUAL(EnableSegmentedStacks) &&
+    ARE_EQUAL(UseInitArray) &&
+    ARE_EQUAL(TrapFuncName) &&
+    ARE_EQUAL(FloatABIType) &&
+    ARE_EQUAL(AllowFPOpFusion);
+#undef ARE_EQUAL
+}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 783bfa1c1a18..1bf14dbcef2c 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -128,6 +128,8 @@ resolveSchedClass(const MachineInstr *MI) const {
   // Get the definition's scheduling class descriptor from this machine model.
   unsigned SchedClass = MI->getDesc().getSchedClass();
   const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+  if (!SCDesc->isValid())
+    return SCDesc;
 
 #ifndef NDEBUG
   unsigned NIter = 0;
diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp
index e3e4ccd7d9e1..4f0eed4940b4 100644
--- a/lib/DebugInfo/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -9,7 +9,7 @@
 
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -165,7 +165,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) {
   // we were told to parse
 
   const uint8_t *fixed_form_sizes =
-    DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize());
+    DWARFFormValue::getFixedFormSizes(getAddressByteSize(), getVersion());
 
   while (offset < next_cu_offset &&
          die.extractFast(this, fixed_form_sizes, &offset)) {
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index 9e19310a99c0..74a8c36d7aba 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -9,6 +9,7 @@
 
 #include "DWARFContext.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
@@ -495,49 +496,39 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
     i->getContents(data);
 
     name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
-    if (name == "debug_info")
-      InfoSection = data;
-    else if (name == "debug_abbrev")
-      AbbrevSection = data;
-    else if (name == "debug_line")
-      LineSection = data;
-    else if (name == "debug_aranges")
-      ARangeSection = data;
-    else if (name == "debug_frame")
-      DebugFrameSection = data;
-    else if (name == "debug_str")
-      StringSection = data;
-    else if (name == "debug_ranges") {
+
+    StringRef *Section = StringSwitch<StringRef*>(name)
+        .Case("debug_info", &InfoSection)
+        .Case("debug_abbrev", &AbbrevSection)
+        .Case("debug_line", &LineSection)
+        .Case("debug_aranges", &ARangeSection)
+        .Case("debug_frame", &DebugFrameSection)
+        .Case("debug_str", &StringSection)
+        .Case("debug_ranges", &RangeSection)
+        .Case("debug_pubnames", &PubNamesSection)
+        .Case("debug_info.dwo", &InfoDWOSection)
+        .Case("debug_abbrev.dwo", &AbbrevDWOSection)
+        .Case("debug_str.dwo", &StringDWOSection)
+        .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+        .Case("debug_addr", &AddrSection)
+        // Any more debug info sections go here.
+        .Default(0);
+    if (!Section)
+      continue;
+    *Section = data;
+    if (name == "debug_ranges") {
       // FIXME: Use the other dwo range section when we emit it.
       RangeDWOSection = data;
-      RangeSection = data;
     }
-    else if (name == "debug_pubnames")
-      PubNamesSection = data;
-    else if (name == "debug_info.dwo")
-      InfoDWOSection = data;
-    else if (name == "debug_abbrev.dwo")
-      AbbrevDWOSection = data;
-    else if (name == "debug_str.dwo")
-      StringDWOSection = data;
-    else if (name == "debug_str_offsets.dwo")
-      StringOffsetDWOSection = data;
-    else if (name == "debug_addr")
-      AddrSection = data;
-    // Any more debug info sections go here.
-    else
-      continue;
 
     // TODO: Add support for relocations in other sections as needed.
     // Record relocations for the debug_info and debug_line sections.
-    RelocAddrMap *Map;
-    if (name == "debug_info")
-      Map = &InfoRelocMap;
-    else if (name == "debug_info.dwo")
-      Map = &InfoDWORelocMap;
-    else if (name == "debug_line")
-      Map = &LineRelocMap;
-    else
+    RelocAddrMap *Map = StringSwitch<RelocAddrMap*>(name)
+        .Case("debug_info", &InfoRelocMap)
+        .Case("debug_info.dwo", &InfoDWORelocMap)
+        .Case("debug_line", &LineRelocMap)
+        .Default(0);
+    if (!Map)
       continue;
 
     if (i->begin_relocations() != i->end_relocations()) {
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp
index 2efbfd1f92fb..7dff9ff49a62 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -16,7 +16,7 @@ using namespace llvm;
 
 void DWARFDebugArangeSet::clear() {
   Offset = -1U;
-  std::memset(&Header, 0, sizeof(Header));
+  std::memset(&HeaderData, 0, sizeof(Header));
   ArangeDescriptors.clear();
 }
 
@@ -66,15 +66,15 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
     // descriptor on the target system. This header is followed by a series
     // of tuples. Each tuple consists of an address and a length, each in
     // the size appropriate for an address on the target architecture.
-    Header.Length = data.getU32(offset_ptr);
-    Header.Version = data.getU16(offset_ptr);
-    Header.CuOffset = data.getU32(offset_ptr);
-    Header.AddrSize = data.getU8(offset_ptr);
-    Header.SegSize = data.getU8(offset_ptr);
+    HeaderData.Length = data.getU32(offset_ptr);
+    HeaderData.Version = data.getU16(offset_ptr);
+    HeaderData.CuOffset = data.getU32(offset_ptr);
+    HeaderData.AddrSize = data.getU8(offset_ptr);
+    HeaderData.SegSize = data.getU8(offset_ptr);
 
     // Perform basic validation of the header fields.
-    if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) ||
-        (Header.AddrSize != 4 && Header.AddrSize != 8)) {
+    if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) ||
+        (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) {
       clear();
       return false;
     }
@@ -84,7 +84,7 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
     // size of an address). The header is padded, if necessary, to the
     // appropriate boundary.
     const uint32_t header_size = *offset_ptr - Offset;
-    const uint32_t tuple_size = Header.AddrSize * 2;
+    const uint32_t tuple_size = HeaderData.AddrSize * 2;
     uint32_t first_tuple_offset = 0;
     while (first_tuple_offset < header_size)
       first_tuple_offset += tuple_size;
@@ -94,11 +94,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
     Descriptor arangeDescriptor;
 
     assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length));
-    assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize);
+    assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize);
 
     while (data.isValidOffset(*offset_ptr)) {
-      arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize);
-      arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize);
+      arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
+      arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
 
       // Each set of tuples is terminated by a 0 for the address and 0
       // for the length.
@@ -115,11 +115,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
 
 void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
   OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ",
-               Header.Length, Header.Version)
+               HeaderData.Length, HeaderData.Version)
      << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n",
-               Header.CuOffset, Header.AddrSize, Header.SegSize);
+               HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize);
 
-  const uint32_t hex_width = Header.AddrSize * 2;
+  const uint32_t hex_width = HeaderData.AddrSize * 2;
   for (DescriptorConstIter pos = ArangeDescriptors.begin(),
        end = ArangeDescriptors.end(); pos != end; ++pos)
     OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address)
@@ -145,7 +145,7 @@ uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const {
     std::find_if(ArangeDescriptors.begin(), end, // Range
                  DescriptorContainsAddress(address)); // Predicate
   if (pos != end)
-    return Header.CuOffset;
+    return HeaderData.CuOffset;
 
   return -1U;
 }
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h
index 9a2a6d0f0037..d76867615aa1 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.h
+++ b/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -48,7 +48,7 @@ class DWARFDebugArangeSet {
   typedef DescriptorColl::const_iterator DescriptorConstIter;
 
   uint32_t Offset;
-  Header Header;
+  Header HeaderData;
   DescriptorColl ArangeDescriptors;
 
 public:
@@ -58,11 +58,11 @@ class DWARFDebugArangeSet {
   bool extract(DataExtractor data, uint32_t *offset_ptr);
   void dump(raw_ostream &OS) const;
 
-  uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; }
-  uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; }
+  uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
+  uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; }
   uint32_t findAddress(uint64_t address) const;
   uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); }
-  const struct Header &getHeader() const { return Header; }
+  const struct Header &getHeader() const { return HeaderData; }
   const Descriptor *getDescriptor(uint32_t i) const {
     if (i < ArangeDescriptors.size())
       return &ArangeDescriptors[i];
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index b077eb5e38d8..f79862d606f5 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -186,7 +186,7 @@ uint32_t DWARFDebugAranges::findAddress(uint64_t address) const {
     Range range(address);
     RangeCollIterator begin = Aranges.begin();
     RangeCollIterator end = Aranges.end();
-    RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan);
+    RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan);
 
     if (pos != end && pos->LoPC <= address && address < pos->HiPC()) {
       return pos->Offset;
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index 02b15d69043f..10be7b4cbb82 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -11,7 +11,7 @@
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
 #include "DWARFDebugAbbrev.h"
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
@@ -94,279 +94,87 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
   OS << ")\n";
 }
 
-bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
-                                             const uint8_t *fixed_form_sizes,
-                                             uint32_t *offset_ptr) {
-  Offset = *offset_ptr;
-
-  DataExtractor debug_info_data = cu->getDebugInfoExtractor();
-  uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr);
-
-  assert(fixed_form_sizes); // For best performance this should be specified!
-
-  if (abbrCode) {
-    uint32_t offset = *offset_ptr;
-
-    AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
-
-    // Skip all data in the .debug_info for the attributes
-    const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
-    uint32_t i;
-    uint16_t form;
-    for (i=0; i<numAttributes; ++i) {
-
-      form = AbbrevDecl->getFormByIndex(i);
-
-      // FIXME: Currently we're checking if this is less than the last
-      // entry in the fixed_form_sizes table, but this should be changed
-      // to use dynamic dispatch.
-      const uint8_t fixed_skip_size = (form < DW_FORM_ref_sig8) ?
-                                       fixed_form_sizes[form] : 0;
-      if (fixed_skip_size)
-        offset += fixed_skip_size;
-      else {
-        bool form_is_indirect = false;
-        do {
-          form_is_indirect = false;
-          uint32_t form_size = 0;
-          switch (form) {
-          // Blocks if inlined data that have a length field and the data bytes
-          // inlined in the .debug_info.
-          case DW_FORM_exprloc:
-          case DW_FORM_block:
-            form_size = debug_info_data.getULEB128(&offset);
-            break;
-          case DW_FORM_block1:
-            form_size = debug_info_data.getU8(&offset);
-            break;
-          case DW_FORM_block2:
-            form_size = debug_info_data.getU16(&offset);
-            break;
-          case DW_FORM_block4:
-            form_size = debug_info_data.getU32(&offset);
-            break;
-
-          // Inlined NULL terminated C-strings
-          case DW_FORM_string:
-            debug_info_data.getCStr(&offset);
-            break;
-
-          // Compile unit address sized values
-          case DW_FORM_addr:
-          case DW_FORM_ref_addr:
-            form_size = cu->getAddressByteSize();
-            break;
-
-          // 0 sized form.
-          case DW_FORM_flag_present:
-            form_size = 0;
-            break;
-
-          // 1 byte values
-          case DW_FORM_data1:
-          case DW_FORM_flag:
-          case DW_FORM_ref1:
-            form_size = 1;
-            break;
-
-          // 2 byte values
-          case DW_FORM_data2:
-          case DW_FORM_ref2:
-            form_size = 2;
-            break;
-
-          // 4 byte values
-          case DW_FORM_strp:
-          case DW_FORM_data4:
-          case DW_FORM_ref4:
-            form_size = 4;
-            break;
-
-          // 8 byte values
-          case DW_FORM_data8:
-          case DW_FORM_ref8:
-          case DW_FORM_ref_sig8:
-            form_size = 8;
-            break;
-
-          // signed or unsigned LEB 128 values
-          case DW_FORM_sdata:
-          case DW_FORM_udata:
-          case DW_FORM_ref_udata:
-          case DW_FORM_GNU_str_index:
-          case DW_FORM_GNU_addr_index:
-            debug_info_data.getULEB128(&offset);
-            break;
-
-          case DW_FORM_indirect:
-            form_is_indirect = true;
-            form = debug_info_data.getULEB128(&offset);
-            break;
-
-            // FIXME: 64-bit for DWARF64
-          case DW_FORM_sec_offset:
-            debug_info_data.getU32(offset_ptr);
-            break;
-
-          default:
-            *offset_ptr = Offset;
-            return false;
-          }
-          offset += form_size;
-        } while (form_is_indirect);
-      }
-    }
-    *offset_ptr = offset;
-    return true;
-  } else {
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *CU,
+                                             const uint8_t *FixedFormSizes,
+                                             uint32_t *OffsetPtr) {
+  Offset = *OffsetPtr;
+  DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
+  uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
+  if (0 == AbbrCode) {
+    // NULL debug tag entry.
     AbbrevDecl = NULL;
-    return true; // NULL debug tag entry
+    return true;
+  }
+  AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+  assert(AbbrevDecl);
+  assert(FixedFormSizes); // For best performance this should be specified!
+
+  // Skip all data in the .debug_info for the attributes
+  for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
+    uint16_t Form = AbbrevDecl->getFormByIndex(i);
+
+    // FIXME: Currently we're checking if this is less than the last
+    // entry in the fixed_form_sizes table, but this should be changed
+    // to use dynamic dispatch.
+    uint8_t FixedFormSize =
+        (Form < DW_FORM_ref_sig8) ? FixedFormSizes[Form] : 0;
+    if (FixedFormSize)
+      *OffsetPtr += FixedFormSize;
+    else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
+                                        CU)) {
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
+    }
   }
+  return true;
 }
 
 bool
-DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu,
-                                    uint32_t *offset_ptr) {
-  DataExtractor debug_info_data = cu->getDebugInfoExtractor();
-  const uint32_t cu_end_offset = cu->getNextCompileUnitOffset();
-  const uint8_t cu_addr_size = cu->getAddressByteSize();
-  uint32_t offset = *offset_ptr;
-  if ((offset < cu_end_offset) && debug_info_data.isValidOffset(offset)) {
-    Offset = offset;
-
-    uint64_t abbrCode = debug_info_data.getULEB128(&offset);
-
-    if (abbrCode) {
-      AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
-
-      if (AbbrevDecl) {
-        uint16_t tag = AbbrevDecl->getTag();
-
-        bool isCompileUnitTag = tag == DW_TAG_compile_unit;
-        if(cu && isCompileUnitTag)
-          const_cast<DWARFCompileUnit*>(cu)->setBaseAddress(0);
-
-        // Skip all data in the .debug_info for the attributes
-        const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
-        for (uint32_t i = 0; i != numAttributes; ++i) {
-          uint16_t attr = AbbrevDecl->getAttrByIndex(i);
-          uint16_t form = AbbrevDecl->getFormByIndex(i);
-
-          if (isCompileUnitTag &&
-              ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) {
-            DWARFFormValue form_value(form);
-            if (form_value.extractValue(debug_info_data, &offset, cu)) {
-              if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc)
-                const_cast<DWARFCompileUnit*>(cu)
-                  ->setBaseAddress(form_value.getUnsigned());
-            }
-          } else {
-            bool form_is_indirect = false;
-            do {
-              form_is_indirect = false;
-              register uint32_t form_size = 0;
-              switch (form) {
-              // Blocks if inlined data that have a length field and the data
-              // bytes // inlined in the .debug_info
-              case DW_FORM_exprloc:
-              case DW_FORM_block:
-                form_size = debug_info_data.getULEB128(&offset);
-                break;
-              case DW_FORM_block1:
-                form_size = debug_info_data.getU8(&offset);
-                break;
-              case DW_FORM_block2:
-                form_size = debug_info_data.getU16(&offset);
-                break;
-              case DW_FORM_block4:
-                form_size = debug_info_data.getU32(&offset);
-                break;
-
-              // Inlined NULL terminated C-strings
-              case DW_FORM_string:
-                debug_info_data.getCStr(&offset);
-                break;
-
-              // Compile unit address sized values
-              case DW_FORM_addr:
-              case DW_FORM_ref_addr:
-                form_size = cu_addr_size;
-                break;
-
-              // 0 byte value
-              case DW_FORM_flag_present:
-                form_size = 0;
-                break;
-
-              // 1 byte values
-              case DW_FORM_data1:
-              case DW_FORM_flag:
-              case DW_FORM_ref1:
-                form_size = 1;
-                break;
-
-              // 2 byte values
-              case DW_FORM_data2:
-              case DW_FORM_ref2:
-                form_size = 2;
-                break;
-
-                // 4 byte values
-              case DW_FORM_strp:
-                form_size = 4;
-                break;
-
-              case DW_FORM_data4:
-              case DW_FORM_ref4:
-                form_size = 4;
-                break;
-
-              // 8 byte values
-              case DW_FORM_data8:
-              case DW_FORM_ref8:
-              case DW_FORM_ref_sig8:
-                form_size = 8;
-                break;
-
-              // signed or unsigned LEB 128 values
-              case DW_FORM_sdata:
-              case DW_FORM_udata:
-              case DW_FORM_ref_udata:
-              case DW_FORM_GNU_str_index:
-              case DW_FORM_GNU_addr_index:
-                debug_info_data.getULEB128(&offset);
-                break;
-
-              case DW_FORM_indirect:
-                form = debug_info_data.getULEB128(&offset);
-                form_is_indirect = true;
-                break;
-
-                // FIXME: 64-bit for DWARF64.
-              case DW_FORM_sec_offset:
-                debug_info_data.getU32(offset_ptr);
-                break;
-
-              default:
-                *offset_ptr = offset;
-                return false;
-              }
-
-              offset += form_size;
-            } while (form_is_indirect);
-          }
-        }
-        *offset_ptr = offset;
-        return true;
+DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU,
+                                    uint32_t *OffsetPtr) {
+  DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
+  const uint32_t CUEndOffset = CU->getNextCompileUnitOffset();
+  Offset = *OffsetPtr;
+  if ((Offset >= CUEndOffset) || !DebugInfoData.isValidOffset(Offset))
+    return false;
+  uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
+  if (0 == AbbrCode) {
+    // NULL debug tag entry.
+    AbbrevDecl = NULL;
+    return true;
+  }
+  AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+  if (0 == AbbrevDecl) {
+    // Restore the original offset.
+    *OffsetPtr = Offset;
+    return false;
+  }
+  bool IsCompileUnitTag = (AbbrevDecl->getTag() == DW_TAG_compile_unit);
+  if (IsCompileUnitTag)
+    const_cast<DWARFCompileUnit*>(CU)->setBaseAddress(0);
+
+  // Skip all data in the .debug_info for the attributes
+  for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
+    uint16_t Attr = AbbrevDecl->getAttrByIndex(i);
+    uint16_t Form = AbbrevDecl->getFormByIndex(i);
+
+    if (IsCompileUnitTag &&
+        ((Attr == DW_AT_entry_pc) || (Attr == DW_AT_low_pc))) {
+      DWARFFormValue FormValue(Form);
+      if (FormValue.extractValue(DebugInfoData, OffsetPtr, CU)) {
+        if (Attr == DW_AT_low_pc || Attr == DW_AT_entry_pc)
+          const_cast<DWARFCompileUnit*>(CU)
+            ->setBaseAddress(FormValue.getUnsigned());
       }
-    } else {
-      AbbrevDecl = NULL;
-      *offset_ptr = offset;
-      return true;    // NULL debug tag entry
+    } else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
+                                          CU)) {
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
     }
   }
-
-  return false;
+  return true;
 }
 
 bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const {
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
index 9c1b2be0a71f..9003591cbe10 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -45,12 +45,17 @@ class DWARFDebugInfoEntryMinimal {
                      uint32_t *offset_ptr, uint16_t attr, uint16_t form,
                      unsigned indent = 0) const;
 
-  bool extractFast(const DWARFCompileUnit *cu, const uint8_t *fixed_form_sizes,
-                   uint32_t *offset_ptr);
+  /// Extracts a debug info entry, which is a child of a given compile unit,
+  /// starting at a given offset. If DIE can't be extracted, returns false and
+  /// doesn't change OffsetPtr.
+  bool extractFast(const DWARFCompileUnit *CU, const uint8_t *FixedFormSizes,
+                   uint32_t *OffsetPtr);
 
   /// Extract a debug info entry for a given compile unit from the
   /// .debug_info and .debug_abbrev data starting at the given offset.
-  bool extract(const DWARFCompileUnit *cu, uint32_t *offset_ptr);
+  /// If compile unit can't be parsed, returns false and doesn't change
+  /// OffsetPtr.
+  bool extract(const DWARFCompileUnit *CU, uint32_t *OffsetPtr);
 
   uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
   bool isNULL() const { return AbbrevDecl == 0; }
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index 9f807aac5fd4..02498733d55f 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
 #include "llvm/Support/Debug.h"
@@ -18,39 +18,16 @@
 using namespace llvm;
 using namespace dwarf;
 
-static const uint8_t form_sizes_addr4[] = {
-  0, // 0x00 unused
-  4, // 0x01 DW_FORM_addr
-  0, // 0x02 unused
-  0, // 0x03 DW_FORM_block2
-  0, // 0x04 DW_FORM_block4
-  2, // 0x05 DW_FORM_data2
-  4, // 0x06 DW_FORM_data4
-  8, // 0x07 DW_FORM_data8
-  0, // 0x08 DW_FORM_string
-  0, // 0x09 DW_FORM_block
-  0, // 0x0a DW_FORM_block1
-  1, // 0x0b DW_FORM_data1
-  1, // 0x0c DW_FORM_flag
-  0, // 0x0d DW_FORM_sdata
-  4, // 0x0e DW_FORM_strp
-  0, // 0x0f DW_FORM_udata
-  4, // 0x10 DW_FORM_ref_addr
-  1, // 0x11 DW_FORM_ref1
-  2, // 0x12 DW_FORM_ref2
-  4, // 0x13 DW_FORM_ref4
-  8, // 0x14 DW_FORM_ref8
-  0, // 0x15 DW_FORM_ref_udata
-  0, // 0x16 DW_FORM_indirect
-  4, // 0x17 DW_FORM_sec_offset
-  0, // 0x18 DW_FORM_exprloc
-  0, // 0x19 DW_FORM_flag_present
-  8, // 0x20 DW_FORM_ref_sig8
+namespace {
+template <uint8_t AddrSize, uint8_t RefAddrSize> struct FixedFormSizes {
+  static const uint8_t sizes[];
 };
+}
 
-static const uint8_t form_sizes_addr8[] = {
+template <uint8_t AddrSize, uint8_t RefAddrSize>
+const uint8_t FixedFormSizes<AddrSize, RefAddrSize>::sizes[] = {
   0, // 0x00 unused
-  8, // 0x01 DW_FORM_addr
+  AddrSize, // 0x01 DW_FORM_addr
   0, // 0x02 unused
   0, // 0x03 DW_FORM_block2
   0, // 0x04 DW_FORM_block4
@@ -65,7 +42,7 @@ static const uint8_t form_sizes_addr8[] = {
   0, // 0x0d DW_FORM_sdata
   4, // 0x0e DW_FORM_strp
   0, // 0x0f DW_FORM_udata
-  8, // 0x10 DW_FORM_ref_addr
+  RefAddrSize, // 0x10 DW_FORM_ref_addr
   1, // 0x11 DW_FORM_ref1
   2, // 0x12 DW_FORM_ref2
   4, // 0x13 DW_FORM_ref4
@@ -78,13 +55,23 @@ static const uint8_t form_sizes_addr8[] = {
   8, // 0x20 DW_FORM_ref_sig8
 };
 
+static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) {
+  // FIXME: Support DWARF64.
+  return (Version == 2) ? AddrSize : 4;
+}
+
 const uint8_t *
-DWARFFormValue::getFixedFormSizesForAddressSize(uint8_t addr_size) {
-  switch (addr_size) {
-  case 4: return form_sizes_addr4;
-  case 8: return form_sizes_addr8;
-  }
-  return NULL;
+DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, uint16_t Version) {
+  uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version);
+  if (AddrSize == 4 && RefAddrSize == 4)
+    return FixedFormSizes<4, 4>::sizes;
+  if (AddrSize == 4 && RefAddrSize == 8)
+    return FixedFormSizes<4, 8>::sizes;
+  if (AddrSize == 8 && RefAddrSize == 4)
+    return FixedFormSizes<8, 4>::sizes;
+  if (AddrSize == 8 && RefAddrSize == 8)
+    return FixedFormSizes<8, 8>::sizes;
+  return 0;
 }
 
 bool
@@ -100,14 +87,16 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
     switch (Form) {
     case DW_FORM_addr:
     case DW_FORM_ref_addr: {
-      RelocAddrMap::const_iterator AI
-        = cu->getRelocMap()->find(*offset_ptr);
+      uint16_t AddrSize =
+          (Form == DW_FORM_addr)
+              ? cu->getAddressByteSize()
+              : getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+      RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr);
       if (AI != cu->getRelocMap()->end()) {
         const std::pair<uint8_t, int64_t> &R = AI->second;
-        Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()) +
-                     R.second;
+        Value.uval = data.getUnsigned(offset_ptr, AddrSize) + R.second;
       } else
-        Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize());
+        Value.uval = data.getUnsigned(offset_ptr, AddrSize);
       break;
     }
     case DW_FORM_exprloc:
@@ -172,10 +161,17 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
       Form = data.getULEB128(offset_ptr);
       indirect = true;
       break;
-    case DW_FORM_sec_offset:
+    case DW_FORM_sec_offset: {
       // FIXME: This is 64-bit for DWARF64.
-      Value.uval = data.getU32(offset_ptr);
+      RelocAddrMap::const_iterator AI
+        = cu->getRelocMap()->find(*offset_ptr);
+      if (AI != cu->getRelocMap()->end()) {
+        const std::pair<uint8_t, int64_t> &R = AI->second;
+        Value.uval = data.getU32(offset_ptr) + R.second;
+      } else
+        Value.uval = data.getU32(offset_ptr);
       break;
+    }
     case DW_FORM_flag_present:
       Value.uval = 1;
       break;
@@ -249,9 +245,11 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
 
     // Compile unit address sized values
     case DW_FORM_addr:
-    case DW_FORM_ref_addr:
       *offset_ptr += cu->getAddressByteSize();
       return true;
+    case DW_FORM_ref_addr:
+      *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+      return true;
 
     // 0 byte values - implied from the form.
     case DW_FORM_flag_present:
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 3d59d251a085..e43ba4f1dd02 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -535,6 +535,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
   if (isa<UndefValue>(C)) {
     GenericValue Result;
     switch (C->getType()->getTypeID()) {
+    default:
+      break;
     case Type::IntegerTyID:
     case Type::X86_FP80TyID:
     case Type::FP128TyID:
@@ -543,7 +545,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       // with the correct bit width.
       Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
       break;
-    default:
+    case Type::VectorTyID:
+      // if the whole vector is 'undef' just reserve memory for the value.
+      const VectorType* VTy = dyn_cast<VectorType>(C->getType());
+      const Type *ElemTy = VTy->getElementType();
+      unsigned int elemNum = VTy->getNumElements();
+      Result.AggregateVal.resize(elemNum);
+      if (ElemTy->isIntegerTy())
+        for (unsigned int i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].IntVal = 
+            APInt(ElemTy->getPrimitiveSizeInBits(), 0);
       break;
     }
     return Result;
@@ -825,6 +836,101 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     else
       llvm_unreachable("Unknown constant pointer type!");
     break;
+  case Type::VectorTyID: {
+    unsigned elemNum;
+    Type* ElemTy;
+    const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+    const ConstantVector *CV = dyn_cast<ConstantVector>(C);
+    const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(C);
+
+    if (CDV) {
+        elemNum = CDV->getNumElements();
+        ElemTy = CDV->getElementType();
+    } else if (CV || CAZ) {
+        VectorType* VTy = dyn_cast<VectorType>(C->getType());
+        elemNum = VTy->getNumElements();
+        ElemTy = VTy->getElementType();
+    } else {
+        llvm_unreachable("Unknown constant vector type!");
+    }
+
+    Result.AggregateVal.resize(elemNum);
+    // Check if vector holds floats.
+    if(ElemTy->isFloatTy()) {
+      if (CAZ) {
+        GenericValue floatZero;
+        floatZero.FloatVal = 0.f;
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  floatZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].FloatVal = cast<ConstantFP>(
+              CV->getOperand(i))->getValueAPF().convertToFloat();
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].FloatVal = CDV->getElementAsFloat(i);
+
+      break;
+    }
+    // Check if vector holds doubles.
+    if (ElemTy->isDoubleTy()) {
+      if (CAZ) {
+        GenericValue doubleZero;
+        doubleZero.DoubleVal = 0.0;
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  doubleZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].DoubleVal = cast<ConstantFP>(
+              CV->getOperand(i))->getValueAPF().convertToDouble();
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].DoubleVal = CDV->getElementAsDouble(i);
+
+      break;
+    }
+    // Check if vector holds integers.
+    if (ElemTy->isIntegerTy()) {
+      if (CAZ) {
+        GenericValue intZero;     
+        intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull);
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  intZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].IntVal = cast<ConstantInt>(
+                                            CV->getOperand(i))->getValue();
+          else {
+            Result.AggregateVal[i].IntVal =
+              APInt(CV->getOperand(i)->getType()->getPrimitiveSizeInBits(), 0);
+          }
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].IntVal = APInt(
+            CDV->getElementType()->getPrimitiveSizeInBits(),
+            CDV->getElementAsInteger(i));
+
+      break;
+    }
+    llvm_unreachable("Unknown constant pointer type!");
+  }
+  break;
+
   default:
     SmallString<256> Msg;
     raw_svector_ostream OS(Msg);
@@ -842,7 +948,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
 
-  if (sys::isLittleEndianHost()) {
+  if (sys::IsLittleEndianHost) {
     // Little-endian host - the source is ordered from LSB to MSB.  Order the
     // destination from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, StoreBytes);
@@ -866,6 +972,9 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
   const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty);
 
   switch (Ty->getTypeID()) {
+  default:
+    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+    break;
   case Type::IntegerTyID:
     StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
     break;
@@ -885,11 +994,22 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
 
     *((PointerTy*)Ptr) = Val.PointerVal;
     break;
-  default:
-    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+  case Type::VectorTyID:
+    for (unsigned i = 0; i < Val.AggregateVal.size(); ++i) {
+      if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+        *(((double*)Ptr)+i) = Val.AggregateVal[i].DoubleVal;
+      if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
+        *(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal;
+      if (cast<VectorType>(Ty)->getElementType()->isIntegerTy()) {
+        unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8;
+        StoreIntToMemory(Val.AggregateVal[i].IntVal, 
+          (uint8_t*)Ptr + numOfBytes*i, numOfBytes);
+      }
+    }
+    break;
   }
 
-  if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
+  if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian())
     // Host and target are different endian - reverse the stored bytes.
     std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
 }
@@ -901,7 +1021,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
   uint8_t *Dst = reinterpret_cast<uint8_t *>(
                    const_cast<uint64_t *>(IntVal.getRawData()));
 
-  if (sys::isLittleEndianHost())
+  if (sys::IsLittleEndianHost)
     // Little-endian host - the destination must be ordered from LSB to MSB.
     // The source is ordered from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, LoadBytes);
@@ -951,6 +1071,31 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     Result.IntVal = APInt(80, y);
     break;
   }
+  case Type::VectorTyID: {
+    const VectorType *VT = cast<VectorType>(Ty);
+    const Type *ElemT = VT->getElementType();
+    const unsigned numElems = VT->getNumElements();
+    if (ElemT->isFloatTy()) {
+      Result.AggregateVal.resize(numElems);
+      for (unsigned i = 0; i < numElems; ++i)
+        Result.AggregateVal[i].FloatVal = *((float*)Ptr+i);
+    }
+    if (ElemT->isDoubleTy()) {
+      Result.AggregateVal.resize(numElems);
+      for (unsigned i = 0; i < numElems; ++i)
+        Result.AggregateVal[i].DoubleVal = *((double*)Ptr+i);
+    }
+    if (ElemT->isIntegerTy()) {
+      GenericValue intZero;
+      const unsigned elemBitWidth = cast<IntegerType>(ElemT)->getBitWidth();
+      intZero.IntVal = APInt(elemBitWidth, 0);
+      Result.AggregateVal.resize(numElems, intZero);
+      for (unsigned i = 0; i < numElems; ++i)
+        LoadIntFromMemory(Result.AggregateVal[i].IntVal,
+          (uint8_t*)Ptr+((elemBitWidth+7)/8)*i, (elemBitWidth+7)/8);
+    }
+  break;
+  }
   default:
     SmallString<256> Msg;
     raw_svector_ostream OS(Msg);
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index ec4f7f681364..526c04e082d2 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1187,6 +1187,39 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
   ++VAList.UIntPairVal.second;
 }
 
+void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+
+  Type *Ty = I.getType();
+  const unsigned indx = unsigned(Src2.IntVal.getZExtValue());
+
+  if(Src1.AggregateVal.size() > indx) {
+    switch (Ty->getTypeID()) {
+    default:
+      dbgs() << "Unhandled destination type for extractelement instruction: "
+      << *Ty << "\n";
+      llvm_unreachable(0);
+      break;
+    case Type::IntegerTyID:
+      Dest.IntVal = Src1.AggregateVal[indx].IntVal;
+      break;
+    case Type::FloatTyID:
+      Dest.FloatVal = Src1.AggregateVal[indx].FloatVal;
+      break;
+    case Type::DoubleTyID:
+      Dest.DoubleVal = Src1.AggregateVal[indx].DoubleVal;
+      break;
+    }
+  } else {
+    dbgs() << "Invalid index in extractelement instruction\n";
+  }
+
+  SetValue(&I, Dest, SF);
+}
+
 GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
                                                 ExecutionContext &SF) {
   switch (CE->getOpcode()) {
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index e95db2fc4eba..2952d7eabe2b 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -178,6 +178,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
   void visitAShr(BinaryOperator &I);
 
   void visitVAArgInst(VAArgInst &I);
+  void visitExtractElementInst(ExtractElementInst &I);
   void visitInstruction(Instruction &I) {
     errs() << I << "\n";
     llvm_unreachable("Instruction not interpretable yet!");
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index b8537b1f2f9c..7e9407217b43 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -41,7 +41,7 @@ error_code check(error_code Err) {
 template<class ELFT>
 class DyldELFObject
   : public ELFObjectFile<ELFT> {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
 
   typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
   typedef Elf_Sym_Impl<ELFT> Elf_Sym;
@@ -617,14 +617,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
   Symbol.getType(SymType);
   if (lsi != Symbols.end()) {
     Value.SectionID = lsi->second.first;
-    Value.Addend = lsi->second.second;
+    Value.Addend = lsi->second.second + Addend;
   } else {
     // Search for the symbol in the global symbol table
     SymbolTableMap::const_iterator gsi =
         GlobalSymbolTable.find(TargetName.data());
     if (gsi != GlobalSymbolTable.end()) {
       Value.SectionID = gsi->second.first;
-      Value.Addend = gsi->second.second;
+      Value.Addend = gsi->second.second + Addend;
     } else {
       switch (SymType) {
         case SymbolRef::ST_Debug: {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index f1009945775c..555ea96943ec 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -202,14 +202,14 @@ class RuntimeDyldImpl {
 
 
   void writeInt16BE(uint8_t *Addr, uint16_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 8) & 0xFF;
     *(Addr+1) = Value & 0xFF;
   }
 
   void writeInt32BE(uint8_t *Addr, uint32_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 24) & 0xFF;
     *(Addr+1) = (Value >> 16) & 0xFF;
@@ -218,7 +218,7 @@ class RuntimeDyldImpl {
   }
 
   void writeInt64BE(uint8_t *Addr, uint64_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 56) & 0xFF;
     *(Addr+1) = (Value >> 48) & 0xFF;
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index fb591a891dae..5e9c52080fd3 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -42,6 +43,12 @@
 #include <cctype>
 using namespace llvm;
 
+static cl::opt<bool>
+OldStyleAttrSyntax("enable-old-style-attr-syntax",
+    cl::desc("Output attributes on functions rather than in attribute groups"),
+    cl::Hidden,
+    cl::init(false));
+
 // Make virtual table appear in this compilation unit.
 AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
 
@@ -1378,7 +1385,7 @@ void AssemblyWriter::printModule(const Module *M) {
     printFunction(I);
 
   // Output all attribute groups.
-  if (!Machine.as_empty()) {
+  if (!OldStyleAttrSyntax && !Machine.as_empty()) {
     Out << '\n';
     writeAllAttributeGroups();
   }
@@ -1605,6 +1612,14 @@ void AssemblyWriter::printFunction(const Function *F) {
   if (F->isMaterializable())
     Out << "; Materializable\n";
 
+  const AttributeSet &Attrs = F->getAttributes();
+  if (!OldStyleAttrSyntax && Attrs.hasAttributes(AttributeSet::FunctionIndex)) {
+    AttributeSet AS = Attrs.getFnAttributes();
+    std::string AttrStr = AS.getAsString(AttributeSet::FunctionIndex, false);
+    if (!AttrStr.empty())
+      Out << "; Function Attrs: " << AttrStr << '\n';
+  }
+
   if (F->isDeclaration())
     Out << "declare ";
   else
@@ -1620,7 +1635,6 @@ void AssemblyWriter::printFunction(const Function *F) {
   }
 
   FunctionType *FT = F->getFunctionType();
-  const AttributeSet &Attrs = F->getAttributes();
   if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
     Out <<  Attrs.getAsString(AttributeSet::ReturnIndex) << ' ';
   TypePrinter.print(F->getReturnType(), Out);
@@ -1663,8 +1677,15 @@ void AssemblyWriter::printFunction(const Function *F) {
   Out << ')';
   if (F->hasUnnamedAddr())
     Out << " unnamed_addr";
-  if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
-    Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
+  if (!OldStyleAttrSyntax) {
+    if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+      Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
+  } else {
+    AttributeSet AS = Attrs.getFnAttributes();
+    std::string AttrStr = AS.getAsString(AttributeSet::FunctionIndex, false);
+    if (!AttrStr.empty())
+      Out << ' ' << AttrStr;
+  }
   if (F->hasSection()) {
     Out << " section \"";
     PrintEscapedString(F->getSection(), Out);
@@ -1761,10 +1782,8 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
 /// which slot it occupies.
 ///
 void AssemblyWriter::printInfoComment(const Value &V) {
-  if (AnnotationWriter) {
+  if (AnnotationWriter)
     AnnotationWriter->printInfoComment(V, Out);
-    return;
-  }
 }
 
 // This member is called for each Instruction in a function..
@@ -2137,7 +2156,8 @@ void AssemblyWriter::writeAllAttributeGroups() {
   for (std::vector<std::pair<AttributeSet, unsigned> >::iterator
          I = asVec.begin(), E = asVec.end(); I != E; ++I)
     Out << "attributes #" << I->second << " = { "
-        << I->first.getAsString(AttributeSet::FunctionIndex, true) << " }\n";
+        << I->first.getAsString(AttributeSet::FunctionIndex, true, true)
+        << " }\n";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index ad2670dade12..b17aa43b2f3d 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -178,7 +178,7 @@ class AttributeSetNode : public FoldingSetNode {
 
   unsigned getAlignment() const;
   unsigned getStackAlignment() const;
-  std::string getAsString(bool InAttrGrp) const;
+  std::string getAsString(bool TargetIndependent, bool InAttrGrp) const;
 
   typedef SmallVectorImpl<Attribute>::iterator       iterator;
   typedef SmallVectorImpl<Attribute>::const_iterator const_iterator;
@@ -248,15 +248,15 @@ class AttributeSetImpl : public FoldingSetNode {
   typedef AttributeSetNode::iterator       iterator;
   typedef AttributeSetNode::const_iterator const_iterator;
 
-  iterator begin(unsigned Idx)
-    { return AttrNodes[Idx].second->begin(); }
-  iterator end(unsigned Idx)
-    { return AttrNodes[Idx].second->end(); }
+  iterator begin(unsigned Slot)
+    { return AttrNodes[Slot].second->begin(); }
+  iterator end(unsigned Slot)
+    { return AttrNodes[Slot].second->end(); }
 
-  const_iterator begin(unsigned Idx) const
-    { return AttrNodes[Idx].second->begin(); }
-  const_iterator end(unsigned Idx) const
-    { return AttrNodes[Idx].second->end(); }
+  const_iterator begin(unsigned Slot) const
+    { return AttrNodes[Slot].second->begin(); }
+  const_iterator end(unsigned Slot) const
+    { return AttrNodes[Slot].second->end(); }
 
   void Profile(FoldingSetNodeID &ID) const {
     Profile(ID, AttrNodes);
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 2d828914cdca..44d720ecf054 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -211,6 +211,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "sanitize_thread";
   if (hasAttribute(Attribute::SanitizeMemory))
     return "sanitize_memory";
+  if (hasAttribute(Attribute::FixedStackSegment))
+    return "fixedstacksegment";
   if (hasAttribute(Attribute::UWTable))
     return "uwtable";
   if (hasAttribute(Attribute::ZExt))
@@ -393,6 +395,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
   case Attribute::SanitizeThread:  return 1ULL << 36;
   case Attribute::SanitizeMemory:  return 1ULL << 37;
   case Attribute::NoBuiltin:       return 1ULL << 38;
+  case Attribute::FixedStackSegment:  return 1ULL << 39;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -480,12 +483,16 @@ unsigned AttributeSetNode::getStackAlignment() const {
   return 0;
 }
 
-std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
-  std::string Str = "";
+std::string AttributeSetNode::getAsString(bool TargetIndependent,
+                                          bool InAttrGrp) const {
+  std::string Str;
   for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
-         E = AttrList.end(); I != E; ) {
-    Str += I->getAsString(InAttrGrp);
-    if (++I != E) Str += " ";
+         E = AttrList.end(); I != E; ++I) {
+    if (TargetIndependent || !I->isStringAttribute()) {
+      if (I != AttrList.begin())
+        Str += ' ';
+      Str += I->getAsString(InAttrGrp);
+    }
   }
   return Str;
 }
@@ -592,7 +599,7 @@ AttributeSet AttributeSet::get(LLVMContext &C,
   return getImpl(C, Attrs);
 }
 
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) {
   if (!B.hasAttributes())
     return AttributeSet();
 
@@ -604,29 +611,29 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
       continue;
 
     if (Kind == Attribute::Alignment)
-      Attrs.push_back(std::make_pair(Idx, Attribute::
+      Attrs.push_back(std::make_pair(Index, Attribute::
                                      getWithAlignment(C, B.getAlignment())));
     else if (Kind == Attribute::StackAlignment)
-      Attrs.push_back(std::make_pair(Idx, Attribute::
+      Attrs.push_back(std::make_pair(Index, Attribute::
                               getWithStackAlignment(C, B.getStackAlignment())));
     else
-      Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind)));
+      Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
   }
 
   // Add target-dependent (string) attributes.
   for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end();
        I != E; ++I)
-    Attrs.push_back(std::make_pair(Idx, Attribute::get(C, I->first,I->second)));
+    Attrs.push_back(std::make_pair(Index, Attribute::get(C, I->first,I->second)));
 
   return get(C, Attrs);
 }
 
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
                                ArrayRef<Attribute::AttrKind> Kind) {
   SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
   for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(),
          E = Kind.end(); I != E; ++I)
-    Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I)));
+    Attrs.push_back(std::make_pair(Index, Attribute::get(C, *I)));
   return get(C, Attrs);
 }
 
@@ -643,20 +650,20 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
   return getImpl(C, AttrNodeVec);
 }
 
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
                                         Attribute::AttrKind Attr) const {
-  if (hasAttribute(Idx, Attr)) return *this;
-  return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+  if (hasAttribute(Index, Attr)) return *this;
+  return addAttributes(C, Index, AttributeSet::get(C, Index, Attr));
 }
 
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
                                         StringRef Kind) const {
   llvm::AttrBuilder B;
   B.addAttribute(Kind);
-  return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+  return addAttributes(C, Index, AttributeSet::get(C, Index, B));
 }
 
-AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index,
                                          AttributeSet Attrs) const {
   if (!pImpl) return Attrs;
   if (!Attrs.pImpl) return *this;
@@ -664,8 +671,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment. For now, say
   // we can't change a known alignment.
-  unsigned OldAlign = getParamAlignment(Idx);
-  unsigned NewAlign = Attrs.getParamAlignment(Idx);
+  unsigned OldAlign = getParamAlignment(Index);
+  unsigned NewAlign = Attrs.getParamAlignment(Index);
   assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
          "Attempt to change alignment!");
 #endif
@@ -676,8 +683,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
   AttributeSet AS;
   uint64_t LastIndex = 0;
   for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
-    if (getSlotIndex(I) >= Idx) {
-      if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+    if (getSlotIndex(I) >= Index) {
+      if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
       break;
     }
     LastIndex = I + 1;
@@ -686,17 +693,17 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
 
   // Now add the attribute into the correct slot. There may already be an
   // AttributeSet there.
-  AttrBuilder B(AS, Idx);
+  AttrBuilder B(AS, Index);
 
   for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
-    if (Attrs.getSlotIndex(I) == Idx) {
+    if (Attrs.getSlotIndex(I) == Index) {
       for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I),
              IE = Attrs.pImpl->end(I); II != IE; ++II)
         B.addAttribute(*II);
       break;
     }
 
-  AttrSet.push_back(AttributeSet::get(C, Idx, B));
+  AttrSet.push_back(AttributeSet::get(C, Index, B));
 
   // Add the remaining attribute slots.
   for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -705,13 +712,13 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
   return get(C, AttrSet);
 }
 
-AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Index,
                                            Attribute::AttrKind Attr) const {
-  if (!hasAttribute(Idx, Attr)) return *this;
-  return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+  if (!hasAttribute(Index, Attr)) return *this;
+  return removeAttributes(C, Index, AttributeSet::get(C, Index, Attr));
 }
 
-AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
                                             AttributeSet Attrs) const {
   if (!pImpl) return AttributeSet();
   if (!Attrs.pImpl) return *this;
@@ -719,7 +726,7 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
   // For now, say we can't pass in alignment, which no current use does.
-  assert(!Attrs.hasAttribute(Idx, Attribute::Alignment) &&
+  assert(!Attrs.hasAttribute(Index, Attribute::Alignment) &&
          "Attempt to change alignment!");
 #endif
 
@@ -729,8 +736,8 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
   AttributeSet AS;
   uint64_t LastIndex = 0;
   for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
-    if (getSlotIndex(I) >= Idx) {
-      if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+    if (getSlotIndex(I) >= Index) {
+      if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
       break;
     }
     LastIndex = I + 1;
@@ -739,15 +746,15 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
 
   // Now remove the attribute from the correct slot. There may already be an
   // AttributeSet there.
-  AttrBuilder B(AS, Idx);
+  AttrBuilder B(AS, Index);
 
   for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
-    if (Attrs.getSlotIndex(I) == Idx) {
-      B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Idx);
+    if (Attrs.getSlotIndex(I) == Index) {
+      B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index);
       break;
     }
 
-  AttrSet.push_back(AttributeSet::get(C, Idx, B));
+  AttrSet.push_back(AttributeSet::get(C, Index, B));
 
   // Add the remaining attribute slots.
   for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -764,11 +771,11 @@ LLVMContext &AttributeSet::getContext() const {
   return pImpl->getContext();
 }
 
-AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const {
-  return pImpl && hasAttributes(Idx) ?
+AttributeSet AttributeSet::getParamAttributes(unsigned Index) const {
+  return pImpl && hasAttributes(Index) ?
     AttributeSet::get(pImpl->getContext(),
                       ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
-                        std::make_pair(Idx, getAttributes(Idx)))) :
+                        std::make_pair(Index, getAttributes(Index)))) :
     AttributeSet();
 }
 
@@ -841,34 +848,35 @@ unsigned AttributeSet::getStackAlignment(unsigned Index) const {
   return ASN ? ASN->getStackAlignment() : 0;
 }
 
-std::string AttributeSet::getAsString(unsigned Index,
+std::string AttributeSet::getAsString(unsigned Index, bool TargetIndependent,
                                       bool InAttrGrp) const {
   AttributeSetNode *ASN = getAttributes(Index);
-  return ASN ? ASN->getAsString(InAttrGrp) : std::string("");
+  return ASN ? ASN->getAsString(TargetIndependent, InAttrGrp) :
+    std::string("");
 }
 
 /// \brief The attributes for the specified index are returned.
-AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const {
+AttributeSetNode *AttributeSet::getAttributes(unsigned Index) const {
   if (!pImpl) return 0;
 
   // Loop through to find the attribute node we want.
   for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
-    if (pImpl->getSlotIndex(I) == Idx)
+    if (pImpl->getSlotIndex(I) == Index)
       return pImpl->getSlotNode(I);
 
   return 0;
 }
 
-AttributeSet::iterator AttributeSet::begin(unsigned Idx) const {
+AttributeSet::iterator AttributeSet::begin(unsigned Slot) const {
   if (!pImpl)
     return ArrayRef<Attribute>().begin();
-  return pImpl->begin(Idx);
+  return pImpl->begin(Slot);
 }
 
-AttributeSet::iterator AttributeSet::end(unsigned Idx) const {
+AttributeSet::iterator AttributeSet::end(unsigned Slot) const {
   if (!pImpl)
     return ArrayRef<Attribute>().end();
-  return pImpl->end(Idx);
+  return pImpl->end(Slot);
 }
 
 //===----------------------------------------------------------------------===//
@@ -919,13 +927,13 @@ void AttributeSet::dump() const {
 // AttrBuilder Method Implementations
 //===----------------------------------------------------------------------===//
 
-AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx)
+AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
   : Attrs(0), Alignment(0), StackAlignment(0) {
   AttributeSetImpl *pImpl = AS.pImpl;
   if (!pImpl) return;
 
   for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) {
-    if (pImpl->getSlotIndex(I) != Idx) continue;
+    if (pImpl->getSlotIndex(I) != Index) continue;
 
     for (AttributeSetImpl::const_iterator II = pImpl->begin(I),
            IE = pImpl->end(I); II != IE; ++II)
@@ -982,16 +990,16 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
 }
 
 AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
-  unsigned Idx = ~0U;
+  unsigned Slot = ~0U;
   for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
     if (A.getSlotIndex(I) == Index) {
-      Idx = I;
+      Slot = I;
       break;
     }
 
-  assert(Idx != ~0U && "Couldn't find index in AttributeSet!");
+  assert(Slot != ~0U && "Couldn't find index in AttributeSet!");
 
-  for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) {
+  for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
     Attribute Attr = *I;
     if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
       Attribute::AttrKind Kind = I->getKindAsEnum();
@@ -1069,16 +1077,16 @@ bool AttrBuilder::hasAttributes() const {
 }
 
 bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
-  unsigned Idx = ~0U;
+  unsigned Slot = ~0U;
   for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
     if (A.getSlotIndex(I) == Index) {
-      Idx = I;
+      Slot = I;
       break;
     }
 
-  assert(Idx != ~0U && "Couldn't find the index!");
+  assert(Slot != ~0U && "Couldn't find the index!");
 
-  for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx);
+  for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot);
        I != E; ++I) {
     Attribute Attr = *I;
     if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
@@ -1109,33 +1117,6 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
   return Alignment == B.Alignment && StackAlignment == B.StackAlignment;
 }
 
-void AttrBuilder::removeFunctionOnlyAttrs() {
-  removeAttribute(Attribute::NoReturn)
-    .removeAttribute(Attribute::NoUnwind)
-    .removeAttribute(Attribute::ReadNone)
-    .removeAttribute(Attribute::ReadOnly)
-    .removeAttribute(Attribute::NoInline)
-    .removeAttribute(Attribute::AlwaysInline)
-    .removeAttribute(Attribute::OptimizeForSize)
-    .removeAttribute(Attribute::StackProtect)
-    .removeAttribute(Attribute::StackProtectReq)
-    .removeAttribute(Attribute::StackProtectStrong)
-    .removeAttribute(Attribute::NoRedZone)
-    .removeAttribute(Attribute::NoImplicitFloat)
-    .removeAttribute(Attribute::Naked)
-    .removeAttribute(Attribute::InlineHint)
-    .removeAttribute(Attribute::StackAlignment)
-    .removeAttribute(Attribute::UWTable)
-    .removeAttribute(Attribute::NonLazyBind)
-    .removeAttribute(Attribute::ReturnsTwice)
-    .removeAttribute(Attribute::SanitizeAddress)
-    .removeAttribute(Attribute::SanitizeThread)
-    .removeAttribute(Attribute::SanitizeMemory)
-    .removeAttribute(Attribute::MinSize)
-    .removeAttribute(Attribute::NoDuplicate)
-    .removeAttribute(Attribute::NoBuiltin);
-}
-
 AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
   // FIXME: Remove this in 4.0.
   if (!Val) return *this;
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 70f7e0176e85..2c6971c83e75 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -53,12 +53,9 @@ bool Constant::isNegativeZeroValue() const {
       if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
         return true;
 
-  // However, vectors of zeroes which are floating point represent +0.0's.
-  if (const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(this))
-    if (const VectorType *VT = dyn_cast<VectorType>(CAZ->getType()))
-      if (VT->getElementType()->isFloatingPointTy())
-        // As it's a CAZ, we know it's the zero bit-pattern (ie, +0.0) in each element.
-        return false;
+  // We've already handled true FP case; any other FP vectors can't represent -0.0.
+  if (getType()->isFPOrFPVectorTy())
+    return false;
 
   // Otherwise, just use +0.0.
   return isNullValue();
@@ -240,18 +237,21 @@ void Constant::destroyConstantImpl() {
   delete this;
 }
 
-/// canTrap - Return true if evaluation of this constant could trap.  This is
-/// true for things like constant expressions that could divide by zero.
-bool Constant::canTrap() const {
-  assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+static bool canTrapImpl(const Constant *C,
+                        SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) {
+  assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
   // The only thing that could possibly trap are constant exprs.
-  const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
-  if (!CE) return false;
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE)
+    return false;
 
   // ConstantExpr traps if any operands can trap.
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (CE->getOperand(i)->canTrap())
-      return true;
+  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+    if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
+      if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps))
+        return true;
+    }
+  }
 
   // Otherwise, only specific operations can trap.
   switch (CE->getOpcode()) {
@@ -270,6 +270,13 @@ bool Constant::canTrap() const {
   }
 }
 
+/// canTrap - Return true if evaluation of this constant could trap.  This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+  SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps;
+  return canTrapImpl(this, NonTrappingOps);
+}
+
 /// isThreadDependent - Return true if the value can vary between threads.
 bool Constant::isThreadDependent() const {
   SmallPtrSet<const Constant*, 64> Visited;
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 983b49c628b4..14686eda8d96 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -1301,6 +1301,53 @@ void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
   unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
 }
 
+LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar) {
+  switch (unwrap<GlobalVariable>(GlobalVar)->getThreadLocalMode()) {
+  case GlobalVariable::NotThreadLocal:
+    return LLVMNotThreadLocal;
+  case GlobalVariable::GeneralDynamicTLSModel:
+    return LLVMGeneralDynamicTLSModel;
+  case GlobalVariable::LocalDynamicTLSModel:
+    return LLVMLocalDynamicTLSModel;
+  case GlobalVariable::InitialExecTLSModel:
+    return LLVMInitialExecTLSModel;
+  case GlobalVariable::LocalExecTLSModel:
+    return LLVMLocalExecTLSModel;
+  }
+
+  llvm_unreachable("Invalid GlobalVariable thread local mode");
+}
+
+void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+
+  switch (Mode) {
+  case LLVMNotThreadLocal:
+    GV->setThreadLocalMode(GlobalVariable::NotThreadLocal);
+    break;
+  case LLVMGeneralDynamicTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::GeneralDynamicTLSModel);
+    break;
+  case LLVMLocalDynamicTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
+    break;
+  case LLVMInitialExecTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+    break;
+  case LLVMLocalExecTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::LocalExecTLSModel);
+    break;
+  }
+}
+
+LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isExternallyInitialized();
+}
+
+void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
+  unwrap<GlobalVariable>(GlobalVar)->setExternallyInitialized(IsExtInit);
+}
+
 /*--.. Operations on aliases ......................................--*/
 
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
@@ -1385,10 +1432,11 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
     F->clearGC();
 }
 
-void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+void LLVMAddFunctionAttr(LLVMValueRef Fn, unsigned PA, unsigned HighPA) {
   Function *Func = unwrap<Function>(Fn);
   const AttributeSet PAL = Func->getAttributes();
-  AttrBuilder B(PA);
+  AttrBuilder B(((unsigned long long)PA) |
+                (((unsigned long long)HighPA) << 32));
   const AttributeSet PALnew =
     PAL.addAttributes(Func->getContext(), AttributeSet::FunctionIndex,
                       AttributeSet::get(Func->getContext(),
@@ -1396,6 +1444,18 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Func->setAttributes(PALnew);
 }
 
+void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
+                                        const char *V) {
+  Function *Func = unwrap<Function>(Fn);
+  AttributeSet::AttrIndex Idx =
+    AttributeSet::AttrIndex(AttributeSet::FunctionIndex);
+  AttrBuilder B;
+
+  B.addAttribute(A, V);
+  AttributeSet Set = AttributeSet::get(Func->getContext(), Idx, B);
+  Func->addAttributes(Idx, Set);
+}
+
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttributeSet PAL = Func->getAttributes();
@@ -2397,6 +2457,13 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(
       StringRef(BufferName)));
 }
 
+const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf) {
+  return unwrap(MemBuf)->getBufferStart();
+}
+
+size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf) {
+  return unwrap(MemBuf)->getBufferSize();
+}
 
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
   delete unwrap(MemBuf);
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 34921af645c0..ed9f98f514e2 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -71,6 +71,16 @@ static MDNode *getNonCompileUnitScope(MDNode *N) {
   return N;
 }
 
+static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
+                                  StringRef Directory) {
+  assert(!Filename.empty() && "Unable to create file without name");
+  Value *Pair[] = {
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+  };
+  return MDNode::get(VMContext, Pair);
+}
+
 /// createCompileUnit - A CompileUnit provides an anchor for all debugging
 /// information generated during this instance of compilation.
 void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
@@ -93,9 +103,8 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
 
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    createFilePathPair(VMContext, Filename, Directory),
     ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
-    createFile(Filename, Directory),
     MDString::get(VMContext, Producer),
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
     MDString::get(VMContext, Flags),
@@ -116,14 +125,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
 /// createFile - Create a file descriptor to hold debugging information
 /// for a file.
 DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
-  assert(!Filename.empty() && "Unable to create file without name");
-  Value *Pair[] = {
-    MDString::get(VMContext, Filename),
-    MDString::get(VMContext, Directory),
-  };
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
-    MDNode::get(VMContext, Pair)
+    createFilePathPair(VMContext, Filename, Directory)
   };
   return DIFile(MDNode::get(VMContext, Elts));
 }
@@ -146,9 +150,9 @@ DIType DIBuilder::createNullPtrType(StringRef Name) {
   // ,size, alignment, offset and flags are always empty here.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
+    NULL, // Filename
     NULL, //TheCU,
     MDString::get(VMContext, Name),
-    NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -169,9 +173,9 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
   // offset and flags are always empty here.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+    NULL, // File/directory name
     NULL, //TheCU,
     MDString::get(VMContext, Name),
-    NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -188,9 +192,9 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
   // Qualified types are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, Tag),
+    NULL, // Filename
     NULL, //TheCU,
     MDString::get(VMContext, StringRef()), // Empty name.
-    NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -208,9 +212,9 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
   // Pointer types are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+    NULL, // Filename
     NULL, //TheCU,
     MDString::get(VMContext, Name),
-    NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -221,13 +225,14 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
   return DIDerivedType(MDNode::get(VMContext, Elts));
 }
 
-DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) {
+DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy,
+                                                 DIType Base) {
   // Pointer types are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
+    NULL, // Filename
     NULL, //TheCU,
     NULL,
-    NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
@@ -246,9 +251,9 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
   // References are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, Tag),
+    NULL, // Filename
     NULL, // TheCU,
     NULL, // Name
-    NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -266,9 +271,9 @@ DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
   assert(Ty.Verify() && "Invalid typedef type!");
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+    File.getFileNode(),
     getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -286,9 +291,9 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
   assert(FriendTy.Verify() && "Invalid friend type!");
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+    NULL,
     Ty,
     NULL, // Name
-    Ty.getFile(),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -307,9 +312,9 @@ DIDerivedType DIBuilder::createInheritance(
   // TAG_inheritance is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+    NULL,
     Ty,
     NULL, // Name
-    Ty.getFile(),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
     ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -328,9 +333,9 @@ DIDerivedType DIBuilder::createMemberType(
   // TAG_member is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File.getFileNode(),
     getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -351,9 +356,9 @@ DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name,
   Flags |= DIDescriptor::FlagStaticMember;
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File.getFileNode(),
     getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/),
@@ -377,9 +382,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
   // TAG_member is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File.getFileNode(),
     getNonCompileUnitScope(File),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -404,9 +409,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
   // TAG_member is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File.getFileNode(),
     getNonCompileUnitScope(File),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -423,7 +428,7 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
 DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
                                              DIFile File, unsigned LineNumber,
                                              StringRef GetterName,
-                                             StringRef SetterName, 
+                                             StringRef SetterName,
                                              unsigned PropertyAttributes,
                                              DIType Ty) {
   Value *Elts[] = {
@@ -478,21 +483,23 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
 }
 
 /// createClassType - Create debugging information entry for a class.
-DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
-                                  DIFile File, unsigned LineNumber,
-                                  uint64_t SizeInBits, uint64_t AlignInBits,
-                                  uint64_t OffsetInBits, unsigned Flags,
-                                  DIType DerivedFrom, DIArray Elements,
-                                  MDNode *VTableHolder,
-                                  MDNode *TemplateParams) {
+DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
+                                           DIFile File, unsigned LineNumber,
+                                           uint64_t SizeInBits,
+                                           uint64_t AlignInBits,
+                                           uint64_t OffsetInBits,
+                                           unsigned Flags, DIType DerivedFrom,
+                                           DIArray Elements,
+                                           MDNode *VTableHolder,
+                                           MDNode *TemplateParams) {
   assert((!Context || Context.Verify()) &&
          "createClassType should be called with a valid Context");
   // TAG_class_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+    File.getFileNode(),
     getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -504,7 +511,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
     VTableHolder,
     TemplateParams
   };
-  DIType R(MDNode::get(VMContext, Elts));
+  DICompositeType R(MDNode::get(VMContext, Elts));
   assert(R.Verify() && "createClassType should return a verifiable DIType");
   return R;
 }
@@ -522,9 +529,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
  // TAG_structure_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+    File.getFileNode(),
     getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -542,16 +549,18 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
 }
 
 /// createUnionType - Create debugging information entry for an union.
-DICompositeType DIBuilder::createUnionType(
-    DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
-    uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, DIArray Elements,
-    unsigned RunTimeLang) {
+DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
+                                           DIFile File, unsigned LineNumber,
+                                           uint64_t SizeInBits,
+                                           uint64_t AlignInBits, unsigned Flags,
+                                           DIArray Elements,
+                                           unsigned RunTimeLang) {
   // TAG_union_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+    File.getFileNode(),
     getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -560,7 +569,8 @@ DICompositeType DIBuilder::createUnionType(
     NULL,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
-    Constant::getNullValue(Type::getInt32Ty(VMContext))
+    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL
   };
   return DICompositeType(MDNode::get(VMContext, Elts));
 }
@@ -572,8 +582,8 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
     Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    MDString::get(VMContext, ""),
     Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    MDString::get(VMContext, ""),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
@@ -592,19 +602,19 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
 DICompositeType DIBuilder::createEnumerationType(
     DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
     uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
-    DIType ClassType) {
+    DIType UnderlyingType) {
   // TAG_enumeration_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+    File.getFileNode(),
     getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ClassType,
+    UnderlyingType,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     Constant::getNullValue(Type::getInt32Ty(VMContext))
@@ -620,9 +630,9 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
   // TAG_array_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+    NULL, // Filename/Directory,
     NULL, //TheCU,
     MDString::get(VMContext, ""),
-    NULL, //TheCU,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), Size),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -643,9 +653,9 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
   // A vector is an array type with the FlagVector flag applied.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+    NULL, // Filename/Directory,
     NULL, //TheCU,
     MDString::get(VMContext, ""),
-    NULL, //TheCU,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), Size),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -723,29 +733,6 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() {
   return DIDescriptor(MDNode::get(VMContext, Elts));
 }
 
-/// createTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::createTemporaryType() {
-  // Give the temporary MDNode a tag. It doesn't matter what tag we
-  // use here as long as DIType accepts it.
-  Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
-  return DIType(Node);
-}
-
-/// createTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::createTemporaryType(DIFile F) {
-  // Give the temporary MDNode a tag. It doesn't matter what tag we
-  // use here as long as DIType accepts it.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, DW_TAG_base_type),
-    TheCU,
-    NULL,
-    F
-  };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
-  return DIType(Node);
-}
-
 /// createForwardDecl - Create a temporary forward-declared type that
 /// can be RAUW'd if the full type is seen.
 DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
@@ -756,9 +743,9 @@ DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
   // Create a temporary MDNode.
   Value *Elts[] = {
     GetTagConstant(VMContext, Tag),
+    F.getFileNode(),
     getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
-    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), Line),
     ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -796,17 +783,18 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
   return DISubrange(MDNode::get(VMContext, Elts));
 }
 
-/// createGlobalVariable - Create a new descriptor for the specified global.
+/// \brief Create a new descriptor for the specified global.
 DIGlobalVariable DIBuilder::
-createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
-                     DIType Ty, bool isLocalToUnit, Value *Val) {
+createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F,
+                     unsigned LineNumber, DIType Ty, bool isLocalToUnit,
+                     Value *Val) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_variable),
     Constant::getNullValue(Type::getInt32Ty(VMContext)),
     NULL, // TheCU,
     MDString::get(VMContext, Name),
     MDString::get(VMContext, Name),
-    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
     F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
     Ty,
@@ -820,6 +808,14 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
   return DIGlobalVariable(Node);
 }
 
+/// \brief Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+                     DIType Ty, bool isLocalToUnit, Value *Val) {
+  return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit,
+                              Val);
+}
+
 /// createStaticVariable - Create a new descriptor for the specified static
 /// variable.
 DIGlobalVariable DIBuilder::
@@ -918,12 +914,11 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
   Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    File.getFileNode(),
     getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, LinkageName),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     Ty,
     ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
@@ -944,7 +939,9 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
   // Create a named metadata so that we do not lose this mdnode.
   if (isDefinition)
     AllSubprograms.push_back(Node);
-  return DISubprogram(Node);
+  DISubprogram S(Node);
+  assert(S.Verify() && "createFunction should return a valid DISubprogram");
+  return S;
 }
 
 /// createMethod - Create a new descriptor for the specified C++ method.
@@ -964,12 +961,11 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
   Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    F.getFileNode(),
     getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, LinkageName),
-    F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     Ty,
     ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
@@ -989,7 +985,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
   MDNode *Node = MDNode::get(VMContext, Elts);
   if (isDefinition)
     AllSubprograms.push_back(Node);
-  return DISubprogram(Node);
+  DISubprogram S(Node);
+  assert(S.Verify() && "createMethod should return a valid DISubprogram");
+  return S;
 }
 
 /// createNameSpace - This creates new descriptor for a namespace
@@ -998,9 +996,9 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
                                        DIFile File, unsigned LineNo) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+    File.getFileNode(),
     getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
   };
   DINameSpace R(MDNode::get(VMContext, Elts));
@@ -1015,8 +1013,8 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
                                                      DIFile File) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
-    Scope,
-    File
+    File.getFileNode(),
+    Scope
   };
   DILexicalBlockFile R(MDNode::get(VMContext, Elts));
   assert(
@@ -1031,10 +1029,10 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
   static unsigned int unique_id = 0;
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+    File.getFileNode(),
     getNonCompileUnitScope(Scope),
     ConstantInt::get(Type::getInt32Ty(VMContext), Line),
     ConstantInt::get(Type::getInt32Ty(VMContext), Col),
-    File,
     ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
   };
   DILexicalBlock R(MDNode::get(VMContext, Elts));
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index 4100c4ff614b..5658f561144b 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -41,7 +41,7 @@ char DataLayout::ID = 0;
 // Support for StructLayout
 //===----------------------------------------------------------------------===//
 
-StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
+StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
   assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
   StructAlignment = 0;
   StructSize = 0;
@@ -50,7 +50,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
   // Loop over each of the elements, placing them in memory.
   for (unsigned i = 0, e = NumElements; i != e; ++i) {
     Type *Ty = ST->getElementType(i);
-    unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+    unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty);
 
     // Add padding if necessary to align the data element properly.
     if ((StructSize & (TyAlign-1)) != 0)
@@ -60,7 +60,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
     StructAlignment = std::max(TyAlign, StructAlignment);
 
     MemberOffsets[i] = StructSize;
-    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+    StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item
   }
 
   // Empty structures have alignment of 1 byte.
@@ -510,47 +510,6 @@ std::string DataLayout::getStringRepresentation() const {
 }
 
 
-uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
-  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
-  switch (Ty->getTypeID()) {
-  case Type::LabelTyID:
-    return getPointerSizeInBits(0);
-  case Type::PointerTyID: {
-    unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace();
-    return getPointerSizeInBits(AS);
-  }
-  case Type::ArrayTyID: {
-    ArrayType *ATy = cast<ArrayType>(Ty);
-    return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
-  }
-  case Type::StructTyID:
-    // Get the layout annotation... which is lazily created on demand.
-    return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
-  case Type::IntegerTyID:
-    return cast<IntegerType>(Ty)->getBitWidth();
-  case Type::HalfTyID:
-    return 16;
-  case Type::FloatTyID:
-    return 32;
-  case Type::DoubleTyID:
-  case Type::X86_MMXTyID:
-    return 64;
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID:
-    return 128;
-  // In memory objects this is always aligned to a higher boundary, but
-  // only 80 bits contain information.
-  case Type::X86_FP80TyID:
-    return 80;
-  case Type::VectorTyID: {
-    VectorType *VTy = cast<VectorType>(Ty);
-    return VTy->getNumElements()*getTypeSizeInBits(VTy->getElementType());
-  }
-  default:
-    llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
-  }
-}
-
 /*!
   \param abi_or_pref Flag that determines which alignment is returned. true
   returns the ABI alignment, false returns the preferred alignment.
@@ -662,6 +621,13 @@ Type *DataLayout::getIntPtrType(Type *Ty) const {
   return IntTy;
 }
 
+Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const {
+  for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+    if (Width <= LegalIntWidths[i])
+      return Type::getIntNTy(C, LegalIntWidths[i]);
+  return 0;
+}
+
 uint64_t DataLayout::getIndexedOffset(Type *ptrTy,
                                       ArrayRef<Value *> Indices) const {
   Type *Ty = ptrTy;
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 1a5454e16fcf..10beb6982430 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 using namespace llvm::dwarf;
@@ -331,7 +332,7 @@ bool DIDescriptor::isEnumerator() const {
   return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
 }
 
-/// isObjCProperty - Return true if the specified tag is DW_TAG
+/// isObjCProperty - Return true if the specified tag is DW_TAG_APPLE_property.
 bool DIDescriptor::isObjCProperty() const {
   return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
 }
@@ -417,7 +418,7 @@ bool DICompileUnit::Verify() const {
   if (N.empty())
     return false;
   // It is possible that directory and produce string is empty.
-  return DbgNode->getNumOperands() == 13;
+  return DbgNode->getNumOperands() == 12;
 }
 
 /// Verify - Verify that an ObjC property is well formed.
@@ -485,7 +486,7 @@ bool DISubprogram::Verify() const {
   DICompositeType Ty = getType();
   if (!Ty.Verify())
     return false;
-  return DbgNode->getNumOperands() == 21;
+  return DbgNode->getNumOperands() == 20;
 }
 
 /// Verify - Verify that a global variable descriptor is well formed.
@@ -539,6 +540,11 @@ bool DINameSpace::Verify() const {
   return DbgNode->getNumOperands() == 5;
 }
 
+/// \brief Retrieve the MDNode for the directory/file pair.
+MDNode *DIFile::getFileNode() const {
+  return const_cast<MDNode*>(getNodeField(DbgNode, 1));
+}
+
 /// \brief Verify that the file descriptor is well formed.
 bool DIFile::Verify() const {
   return isFile() && DbgNode->getNumOperands() == 2;
@@ -610,6 +616,25 @@ MDNode *DIDerivedType::getObjCProperty() const {
   return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10));
 }
 
+/// \brief Set the array of member DITypes.
+void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) {
+  assert((!TParams || DbgNode->getNumOperands() == 14) &&
+         "If you're setting the template parameters this should include a slot "
+         "for that!");
+  TrackingVH<MDNode> N(*this);
+  N->replaceOperandWith(10, Elements);
+  if (TParams)
+    N->replaceOperandWith(13, TParams);
+  DbgNode = N;
+}
+
+/// \brief Set the containing type.
+void DICompositeType::setContainingType(DICompositeType ContainingType) {
+  TrackingVH<MDNode> N(*this);
+  N->replaceOperandWith(12, ContainingType);
+  DbgNode = N;
+}
+
 /// isInlinedFnArgument - Return true if this variable provides debugging
 /// information for an inlined function arguments.
 bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
@@ -637,21 +662,21 @@ bool DISubprogram::describes(const Function *F) {
 
 unsigned DISubprogram::isOptimized() const {
   assert (DbgNode && "Invalid subprogram descriptor!");
-  if (DbgNode->getNumOperands() == 16)
-    return getUnsignedField(15);
+  if (DbgNode->getNumOperands() == 15)
+    return getUnsignedField(14);
   return 0;
 }
 
 MDNode *DISubprogram::getVariablesNodes() const {
-  if (!DbgNode || DbgNode->getNumOperands() <= 19)
+  if (!DbgNode || DbgNode->getNumOperands() <= 18)
     return NULL;
-  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(19));
+  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(18));
 }
 
 DIArray DISubprogram::getVariables() const {
-  if (!DbgNode || DbgNode->getNumOperands() <= 19)
+  if (!DbgNode || DbgNode->getNumOperands() <= 18)
     return DIArray();
-  if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
+  if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(18)))
     return DIArray(T);
   return DIArray();
 }
@@ -659,72 +684,48 @@ DIArray DISubprogram::getVariables() const {
 StringRef DIScope::getFilename() const {
   if (!DbgNode)
     return StringRef();
-  if (isLexicalBlockFile())
-    return DILexicalBlockFile(DbgNode).getFilename();
-  if (isLexicalBlock())
-    return DILexicalBlock(DbgNode).getFilename();
-  if (isSubprogram())
-    return DISubprogram(DbgNode).getFilename();
-  if (isCompileUnit())
-    return DICompileUnit(DbgNode).getFilename();
-  if (isNameSpace())
-    return DINameSpace(DbgNode).getFilename();
-  if (isType())
-    return DIType(DbgNode).getFilename();
   return ::getStringField(getNodeField(DbgNode, 1), 0);
 }
 
 StringRef DIScope::getDirectory() const {
   if (!DbgNode)
     return StringRef();
-  if (isLexicalBlockFile())
-    return DILexicalBlockFile(DbgNode).getDirectory();
-  if (isLexicalBlock())
-    return DILexicalBlock(DbgNode).getDirectory();
-  if (isSubprogram())
-    return DISubprogram(DbgNode).getDirectory();
-  if (isCompileUnit())
-    return DICompileUnit(DbgNode).getDirectory();
-  if (isNameSpace())
-    return DINameSpace(DbgNode).getDirectory();
-  if (isType())
-    return DIType(DbgNode).getDirectory();
   return ::getStringField(getNodeField(DbgNode, 1), 1);
 }
 
 DIArray DICompileUnit::getEnumTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 13)
+  if (!DbgNode || DbgNode->getNumOperands() < 12)
     return DIArray();
 
-  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)))
     return DIArray(N);
   return DIArray();
 }
 
 DIArray DICompileUnit::getRetainedTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 13)
+  if (!DbgNode || DbgNode->getNumOperands() < 12)
     return DIArray();
 
-  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
     return DIArray(N);
   return DIArray();
 }
 
 DIArray DICompileUnit::getSubprograms() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 13)
+  if (!DbgNode || DbgNode->getNumOperands() < 12)
     return DIArray();
 
-  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
     return DIArray(N);
   return DIArray();
 }
 
 
 DIArray DICompileUnit::getGlobalVariables() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 13)
+  if (!DbgNode || DbgNode->getNumOperands() < 12)
     return DIArray();
 
-  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
     return DIArray(N);
   return DIArray();
 }
@@ -1032,6 +1033,8 @@ void DIDescriptor::print(raw_ostream &OS) const {
     DIVariable(DbgNode).printInternal(OS);
   } else if (this->isObjCProperty()) {
     DIObjCProperty(DbgNode).printInternal(OS);
+  } else if (this->isNameSpace()) {
+    DINameSpace(DbgNode).printInternal(OS);
   } else if (this->isScope()) {
     DIScope(DbgNode).printInternal(OS);
   }
@@ -1051,8 +1054,13 @@ void DIScope::printInternal(raw_ostream &OS) const {
 
 void DICompileUnit::printInternal(raw_ostream &OS) const {
   DIScope::printInternal(OS);
-  if (const char *Lang = dwarf::LanguageString(getLanguage()))
-    OS << " [" << Lang << ']';
+  OS << " [";
+  unsigned Lang = getLanguage();
+  if (const char *LangStr = dwarf::LanguageString(Lang))
+    OS << LangStr;
+  else
+    (OS << "lang 0x").write_hex(Lang);
+  OS << ']';
 }
 
 void DIEnumerator::printInternal(raw_ostream &OS) const {
@@ -1105,6 +1113,14 @@ void DICompositeType::printInternal(raw_ostream &OS) const {
   OS << " [" << A.getNumElements() << " elements]";
 }
 
+void DINameSpace::printInternal(raw_ostream &OS) const {
+  StringRef Name = getName();
+  if (!Name.empty())
+    OS << " [" << Name << ']';
+
+  OS << " [line " << getLineNumber() << ']';
+}
+
 void DISubprogram::printInternal(raw_ostream &OS) const {
   // TODO : Print context
   OS << " [line " << getLineNumber() << ']';
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 5559a6c56e6e..1e72b90a13ce 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -211,7 +211,7 @@ Function::~Function() {
   clearGC();
 
   // Remove the intrinsicID from the Cache.
-  if(getValueName() && isIntrinsic())
+  if (getValueName() && isIntrinsic())
     getContext().pImpl->IntrinsicIDCache.erase(this);
 }
 
@@ -352,7 +352,7 @@ unsigned Function::getIntrinsicID() const {
 
   LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache =
     getContext().pImpl->IntrinsicIDCache;
-  if(!IntrinsicIDCache.count(this)) {
+  if (!IntrinsicIDCache.count(this)) {
     unsigned Id = lookupIntrinsicID();
     IntrinsicIDCache[this]=Id;
     return Id;
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 2e3a52582688..d58877ef773a 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -3000,8 +3000,8 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
   uint32_t BitWidth = C.getBitWidth();
   switch (pred) {
   default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
-  case ICmpInst::ICMP_EQ: Upper++; break;
-  case ICmpInst::ICMP_NE: Lower++; break;
+  case ICmpInst::ICMP_EQ: ++Upper; break;
+  case ICmpInst::ICMP_NE: ++Lower; break;
   case ICmpInst::ICMP_ULT:
     Lower = APInt::getMinValue(BitWidth);
     // Check for an empty-set condition.
@@ -3015,25 +3015,25 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
       return ConstantRange(BitWidth, /*isFullSet=*/false);
     break;
   case ICmpInst::ICMP_UGT: 
-    Lower++; Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    ++Lower; Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
     // Check for an empty-set condition.
     if (Lower == Upper)
       return ConstantRange(BitWidth, /*isFullSet=*/false);
     break;
   case ICmpInst::ICMP_SGT:
-    Lower++; Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    ++Lower; Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
     // Check for an empty-set condition.
     if (Lower == Upper)
       return ConstantRange(BitWidth, /*isFullSet=*/false);
     break;
   case ICmpInst::ICMP_ULE: 
-    Lower = APInt::getMinValue(BitWidth); Upper++; 
+    Lower = APInt::getMinValue(BitWidth); ++Upper; 
     // Check for a full-set condition.
     if (Lower == Upper)
       return ConstantRange(BitWidth, /*isFullSet=*/true);
     break;
   case ICmpInst::ICMP_SLE: 
-    Lower = APInt::getSignedMinValue(BitWidth); Upper++; 
+    Lower = APInt::getSignedMinValue(BitWidth); ++Upper; 
     // Check for a full-set condition.
     if (Lower == Upper)
       return ConstantRange(BitWidth, /*isFullSet=*/true);
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
index 5a8df703dbef..3c968aac164f 100644
--- a/lib/IR/PassManager.cpp
+++ b/lib/IR/PassManager.cpp
@@ -1739,10 +1739,8 @@ bool PassManager::run(Module &M) {
 }
 
 //===----------------------------------------------------------------------===//
-// TimingInfo Class - This class is used to calculate information about the
-// amount of time each pass takes to execute.  This only happens with
-// -time-passes is enabled on the command line.
-//
+// TimingInfo implementation
+
 bool llvm::TimePassesIsEnabled = false;
 static cl::opt<bool,true>
 EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index adc702e05e68..e9eb012e6cef 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -118,7 +118,7 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
   for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
       return true;
-    if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+    if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator
       break;
   }
 
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 8bfbb322cf4c..620d600778bb 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -301,9 +301,12 @@ namespace {
     bool VerifyIntrinsicType(Type *Ty,
                              ArrayRef<Intrinsic::IITDescriptor> &Infos,
                              SmallVectorImpl<Type*> &ArgTys);
-    void VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+    bool VerifyAttributeCount(AttributeSet Attrs, unsigned Params);
+    void VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
+                              bool isFunction, const Value *V);
+    void VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
                               bool isReturnValue, const Value *V);
-    void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs,
+    void VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
                              const Value *V);
 
     void WriteValue(const Value *V) {
@@ -626,37 +629,67 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*>&SeenIDs,
   }
 }
 
+void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
+                                    bool isFunction, const Value* V) {
+  unsigned Slot = ~0U;
+  for (unsigned I = 0, E = Attrs.getNumSlots(); I != E; ++I)
+    if (Attrs.getSlotIndex(I) == Idx) {
+      Slot = I;
+      break;
+    }
+
+  assert(Slot != ~0U && "Attribute set inconsistency!");
+
+  for (AttributeSet::iterator I = Attrs.begin(Slot), E = Attrs.end(Slot);
+         I != E; ++I) {
+    if (I->isStringAttribute())
+      continue;
+
+    if (I->getKindAsEnum() == Attribute::NoReturn ||
+        I->getKindAsEnum() == Attribute::NoUnwind ||
+        I->getKindAsEnum() == Attribute::ReadNone ||
+        I->getKindAsEnum() == Attribute::ReadOnly ||
+        I->getKindAsEnum() == Attribute::NoInline ||
+        I->getKindAsEnum() == Attribute::AlwaysInline ||
+        I->getKindAsEnum() == Attribute::OptimizeForSize ||
+        I->getKindAsEnum() == Attribute::StackProtect ||
+        I->getKindAsEnum() == Attribute::StackProtectReq ||
+        I->getKindAsEnum() == Attribute::StackProtectStrong ||
+        I->getKindAsEnum() == Attribute::NoRedZone ||
+        I->getKindAsEnum() == Attribute::NoImplicitFloat ||
+        I->getKindAsEnum() == Attribute::Naked ||
+        I->getKindAsEnum() == Attribute::InlineHint ||
+        I->getKindAsEnum() == Attribute::StackAlignment ||
+        I->getKindAsEnum() == Attribute::UWTable ||
+        I->getKindAsEnum() == Attribute::NonLazyBind ||
+        I->getKindAsEnum() == Attribute::ReturnsTwice ||
+        I->getKindAsEnum() == Attribute::SanitizeAddress ||
+        I->getKindAsEnum() == Attribute::SanitizeThread ||
+        I->getKindAsEnum() == Attribute::SanitizeMemory ||
+        I->getKindAsEnum() == Attribute::MinSize ||
+        I->getKindAsEnum() == Attribute::NoDuplicate ||
+        I->getKindAsEnum() == Attribute::NoBuiltin ||
+        I->getKindAsEnum() == Attribute::FixedStackSegment) {
+      if (!isFunction)
+          CheckFailed("Attribute '" + I->getKindAsString() +
+                      "' only applies to functions!", V);
+          return;
+    } else if (isFunction) {
+        CheckFailed("Attribute '" + I->getKindAsString() +
+                    "' does not apply to functions!", V);
+        return;
+    }
+  }
+}
+
 // VerifyParameterAttrs - Check the given attributes for an argument or return
 // value of the specified type.  The value V is printed in error messages.
-void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
                                     bool isReturnValue, const Value *V) {
   if (!Attrs.hasAttributes(Idx))
     return;
 
-  Assert1(!Attrs.hasAttribute(Idx, Attribute::NoReturn) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoUnwind) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReadOnly) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoInline) &&
-          !Attrs.hasAttribute(Idx, Attribute::AlwaysInline) &&
-          !Attrs.hasAttribute(Idx, Attribute::OptimizeForSize) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackProtect) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackProtectReq) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoRedZone) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoImplicitFloat) &&
-          !Attrs.hasAttribute(Idx, Attribute::Naked) &&
-          !Attrs.hasAttribute(Idx, Attribute::InlineHint) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackAlignment) &&
-          !Attrs.hasAttribute(Idx, Attribute::UWTable) &&
-          !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeAddress) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeThread) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeMemory) &&
-          !Attrs.hasAttribute(Idx, Attribute::MinSize) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoBuiltin),
-          "Some attributes in '" + Attrs.getAsString(Idx) +
-          "' only apply to functions!", V);
+  VerifyAttributeTypes(Attrs, Idx, false, V);
 
   if (isReturnValue)
     Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
@@ -712,8 +745,7 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
 
 // VerifyFunctionAttrs - Check parameter attributes against a function type.
 // The value V is printed in error messages.
-void Verifier::VerifyFunctionAttrs(FunctionType *FT,
-                                   const AttributeSet &Attrs,
+void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
                                    const Value *V) {
   if (Attrs.isEmpty())
     return;
@@ -721,72 +753,31 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
   bool SawNest = false;
 
   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
-    unsigned Index = Attrs.getSlotIndex(i);
+    unsigned Idx = Attrs.getSlotIndex(i);
 
     Type *Ty;
-    if (Index == 0)
+    if (Idx == 0)
       Ty = FT->getReturnType();
-    else if (Index-1 < FT->getNumParams())
-      Ty = FT->getParamType(Index-1);
+    else if (Idx-1 < FT->getNumParams())
+      Ty = FT->getParamType(Idx-1);
     else
       break;  // VarArgs attributes, verified elsewhere.
 
-    VerifyParameterAttrs(Attrs, Index, Ty, Index == 0, V);
+    VerifyParameterAttrs(Attrs, Idx, Ty, Idx == 0, V);
 
     if (Attrs.hasAttribute(i, Attribute::Nest)) {
       Assert1(!SawNest, "More than one parameter has attribute nest!", V);
       SawNest = true;
     }
 
-    if (Attrs.hasAttribute(Index, Attribute::StructRet))
-      Assert1(Index == 1, "Attribute sret is not on first parameter!", V);
+    if (Attrs.hasAttribute(Idx, Attribute::StructRet))
+      Assert1(Idx == 1, "Attribute sret is not on first parameter!", V);
   }
 
   if (!Attrs.hasAttributes(AttributeSet::FunctionIndex))
     return;
 
-  AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex);
-  NotFn.removeFunctionOnlyAttrs();
-  Assert1(NotFn.empty(), "Attributes '" +
-          AttributeSet::get(V->getContext(),
-                            AttributeSet::FunctionIndex,
-                            NotFn).getAsString(AttributeSet::FunctionIndex) +
-          "' do not apply to the function!", V);
-
-  // Check for mutually incompatible attributes.
-  Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::StructRet)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::StructRet))),
-          "Attributes 'byval, nest, and sret' are incompatible!", V);
-
-  Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::InReg)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::InReg))),
-          "Attributes 'byval, nest, and inreg' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::ZExt) &&
-            Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::SExt)),
-          "Attributes 'zeroext and signext' are incompatible!", V);
+  VerifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V);
 
   Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
                                Attribute::ReadNone) &&
@@ -801,7 +792,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
           "Attributes 'noinline and alwaysinline' are incompatible!", V);
 }
 
-static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) {
+bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) {
   if (Attrs.getNumSlots() == 0)
     return true;
 
@@ -837,7 +828,7 @@ void Verifier::visitFunction(Function &F) {
   Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
           "Invalid struct return type!", &F);
 
-  const AttributeSet &Attrs = F.getAttributes();
+  AttributeSet Attrs = F.getAttributes();
 
   Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
           "Attribute after last parameter!", &F);
@@ -1350,7 +1341,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
             "Call parameter type does not match function signature!",
             CS.getArgument(i), FTy->getParamType(i), I);
 
-  const AttributeSet &Attrs = CS.getAttributes();
+  AttributeSet Attrs = CS.getAttributes();
 
   Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
           "Attribute after last parameter!", I);
diff --git a/lib/IRReader/CMakeLists.txt b/lib/IRReader/CMakeLists.txt
new file mode 100644
index 000000000000..cf10d8b7dba9
--- /dev/null
+++ b/lib/IRReader/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMIRReader
+  IRReader.cpp
+  )
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
new file mode 100644
index 000000000000..eeec14e834c1
--- /dev/null
+++ b/lib/IRReader/IRReader.cpp
@@ -0,0 +1,89 @@
+//===---- IRReader.cpp - Reader for LLVM IR files -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+namespace llvm {
+  extern bool TimePassesIsEnabled;
+}
+
+static const char *TimeIRParsingGroupName = "LLVM IR Parsing";
+static const char *TimeIRParsingName = "Parse IR";
+
+
+Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
+                              LLVMContext &Context) {
+  if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                (const unsigned char *)Buffer->getBufferEnd())) {
+    std::string ErrMsg;
+    Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
+    if (M == 0) {
+      Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                         ErrMsg);
+      // ParseBitcodeFile does not take ownership of the Buffer in the
+      // case of an error.
+      delete Buffer;
+    }
+    return M;
+  }
+
+  return ParseAssembly(Buffer, 0, Err, Context);
+}
+
+Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
+                                  LLVMContext &Context) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+    Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+                       "Could not open input file: " + ec.message());
+    return 0;
+  }
+
+  return getLazyIRModule(File.take(), Err, Context);
+}
+
+Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
+                      LLVMContext &Context) {
+  NamedRegionTimer T(TimeIRParsingName, TimeIRParsingGroupName,
+                     TimePassesIsEnabled);
+  if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                (const unsigned char *)Buffer->getBufferEnd())) {
+    std::string ErrMsg;
+    Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
+    if (M == 0)
+      Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                         ErrMsg);
+    // ParseBitcodeFile does not take ownership of the Buffer.
+    delete Buffer;
+    return M;
+  }
+
+  return ParseAssembly(Buffer, 0, Err, Context);
+}
+
+Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
+                          LLVMContext &Context) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+    Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+                       "Could not open input file: " + ec.message());
+    return 0;
+  }
+
+  return ParseIR(File.take(), Err, Context);
+}
diff --git a/lib/IRReader/LLVMBuild.txt b/lib/IRReader/LLVMBuild.txt
new file mode 100644
index 000000000000..b7bc74d61649
--- /dev/null
+++ b/lib/IRReader/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/IRReader/LLVMBuild.txt -----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = IRReader
+parent = Libraries
+required_libraries = AsmParser BitReader Core Support
diff --git a/lib/IRReader/Makefile b/lib/IRReader/Makefile
new file mode 100644
index 000000000000..cf6bc1135427
--- /dev/null
+++ b/lib/IRReader/Makefile
@@ -0,0 +1,14 @@
+##===- lib/IRReader/Makefile -------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME := LLVMIRReader
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index a31793c5fd0b..056544380698 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR MC Object Option Support TableGen Target Transforms
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR IRReader MC Object Option Support TableGen Target Transforms
 
 [component_0]
 type = Group
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 0acbcfadafb2..74cbdadd61eb 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -17,13 +17,13 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index c8ea8ff0a9e3..74d24f278b77 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
@@ -24,7 +23,6 @@ Linker::Linker(StringRef progname, StringRef modname,
                LLVMContext& C, unsigned flags):
   Context(C),
   Composite(new Module(modname, C)),
-  LibPaths(),
   Flags(flags),
   Error(),
   ProgramName(progname) { }
@@ -32,7 +30,6 @@ Linker::Linker(StringRef progname, StringRef modname,
 Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
   Context(aModule->getContext()),
   Composite(aModule),
-  LibPaths(),
   Flags(flags),
   Error(),
   ProgramName(progname) { }
@@ -63,27 +60,9 @@ Linker::verbose(StringRef message) {
     errs() << "  " << message << "\n";
 }
 
-void
-Linker::addPath(const sys::Path& path) {
-  LibPaths.push_back(path);
-}
-
-void
-Linker::addPaths(const std::vector<std::string>& paths) {
-  for (unsigned i = 0, e = paths.size(); i != e; ++i)
-    LibPaths.push_back(sys::Path(paths[i]));
-}
-
-void
-Linker::addSystemPaths() {
-  sys::Path::GetBitcodeLibraryPaths(LibPaths);
-  LibPaths.insert(LibPaths.begin(),sys::Path("./"));
-}
-
 Module*
 Linker::releaseModule() {
   Module* result = Composite;
-  LibPaths.clear();
   Error.clear();
   Composite = 0;
   Flags = 0;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 35613b411c24..9e867859da51 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -124,19 +124,15 @@ class MCAsmStreamer : public MCStreamer {
   /// @name MCStreamer Interface
   /// @{
 
-  virtual void ChangeSection(const MCSection *Section);
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection);
 
   virtual void InitSections() {
     InitToTextSection();
   }
 
   virtual void InitToTextSection() {
-    // FIXME, this is MachO specific, but the testsuite
-    // expects this.
-    SwitchSection(getContext().getMachOSection(
-                                      "__TEXT", "__text",
-                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                      0, SectionKind::getText()));
+    SwitchSection(getContext().getObjectFileInfo()->getTextSection());
   }
 
   virtual void EmitLabel(MCSymbol *Symbol);
@@ -333,9 +329,10 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
   return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
 }
 
-void MCAsmStreamer::ChangeSection(const MCSection *Section) {
+void MCAsmStreamer::ChangeSection(const MCSection *Section,
+                                  const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
-  Section->PrintSwitchToSection(MAI, OS);
+  Section->PrintSwitchToSection(MAI, OS, Subsection);
 }
 
 void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
@@ -642,7 +639,8 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
 
 
 void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
   if (Data.empty()) return;
 
   if (Data.size() == 1) {
@@ -673,7 +671,8 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
 
 void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                   unsigned AddrSpace) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
   const char *Directive = 0;
   switch (Size) {
   default: break;
@@ -1368,7 +1367,8 @@ void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
 }
 
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
 
   // Show the encoding in a comment if we have a code emitter.
   if (Emitter)
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 1829266f96cb..fb5ab28bcf50 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -243,6 +243,36 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
     A->getSectionList().push_back(this);
 }
 
+MCSectionData::iterator
+MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
+  if (Subsection == 0 && SubsectionFragmentMap.empty())
+    return end();
+
+  SmallVectorImpl<std::pair<unsigned, MCFragment *> >::iterator MI =
+    std::lower_bound(SubsectionFragmentMap.begin(), SubsectionFragmentMap.end(),
+                     std::make_pair(Subsection, (MCFragment *)0));
+  bool ExactMatch = false;
+  if (MI != SubsectionFragmentMap.end()) {
+    ExactMatch = MI->first == Subsection;
+    if (ExactMatch)
+      ++MI;
+  }
+  iterator IP;
+  if (MI == SubsectionFragmentMap.end())
+    IP = end();
+  else
+    IP = MI->second;
+  if (!ExactMatch && Subsection != 0) {
+    // The GNU as documentation claims that subsections have an alignment of 4,
+    // although this appears not to be the case.
+    MCFragment *F = new MCDataFragment();
+    SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F));
+    getFragmentList().insert(IP, F);
+    F->setParent(this);
+  }
+  return IP;
+}
+
 /* *** */
 
 MCSymbolData::MCSymbolData() : Symbol(0) {}
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 0f8f0741bd7c..f91cb54e639f 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -197,6 +197,8 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
   // actually a DW_LNE_end_sequence.
 
   // Switch to the section to be able to create a symbol at its end.
+  // TODO: keep track of the last subsection so that this symbol appears in the
+  // correct place.
   MCOS->SwitchSection(Section);
 
   MCContext &context = MCOS->getContext();
@@ -787,7 +789,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
   if (Symbol->isTemporary())
     return;
   MCContext &context = MCOS->getContext();
-  if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+  if (context.getGenDwarfSection() != MCOS->getCurrentSection().first)
     return;
 
   // The dwarf label's name does not have the symbol name's leading
@@ -899,7 +901,7 @@ namespace {
     /// EmitCompactUnwind - Emit the unwind information in a compact way. If
     /// we're successful, return 'true'. Otherwise, return 'false' and it will
     /// emit the normal CIE and FDE.
-    bool EmitCompactUnwind(MCStreamer &streamer,
+    void EmitCompactUnwind(MCStreamer &streamer,
                            const MCDwarfFrameInfo &frame);
 
     const MCSymbol &EmitCIE(MCStreamer &streamer,
@@ -1139,7 +1141,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
 /// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
 /// successful, return 'true'. Otherwise, return 'false' and it will emit the
 /// normal CIE and FDE.
-bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
                                          const MCDwarfFrameInfo &Frame) {
   MCContext &Context = Streamer.getContext();
   const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
@@ -1168,10 +1170,11 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
   //   .quad except_tab1
 
   uint32_t Encoding = Frame.CompactUnwindEncoding;
-  if (!Encoding) return false;
+  if (!Encoding) return;
+  bool DwarfEHFrameOnly = (Encoding == MOFI->getCompactUnwindDwarfEHFrameOnly());
 
   // The encoding needs to know we have an LSDA.
-  if (Frame.Lsda)
+  if (!DwarfEHFrameOnly && Frame.Lsda)
     Encoding |= 0x40000000;
 
   Streamer.SwitchSection(MOFI->getCompactUnwindSection());
@@ -1194,11 +1197,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
                                       Twine::utohexstr(Encoding));
   Streamer.EmitIntValue(Encoding, Size);
 
-
   // Personality Function
   Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
   if (VerboseAsm) Streamer.AddComment("Personality Function");
-  if (Frame.Personality)
+  if (!DwarfEHFrameOnly && Frame.Personality)
     Streamer.EmitSymbolValue(Frame.Personality, Size);
   else
     Streamer.EmitIntValue(0, Size); // No personality fn
@@ -1206,12 +1208,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
   // LSDA
   Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
   if (VerboseAsm) Streamer.AddComment("LSDA");
-  if (Frame.Lsda)
+  if (!DwarfEHFrameOnly && Frame.Lsda)
     Streamer.EmitSymbolValue(Frame.Lsda, Size);
   else
     Streamer.EmitIntValue(0, Size); // No LSDA
-
-  return true;
 }
 
 const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
@@ -1485,8 +1485,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
   if (IsEH && MOFI->getCompactUnwindSection())
     for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
       const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
-      if (Frame.CompactUnwindEncoding)
-        Emitter.EmitCompactUnwind(Streamer, Frame);
+      Emitter.EmitCompactUnwind(Streamer, Frame);
     }
 
   const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() :
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 7f5f1b63e5fe..116f86feb8a5 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
@@ -108,14 +109,15 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   llvm_unreachable("invalid assembler flag!");
 }
 
-void MCELFStreamer::ChangeSection(const MCSection *Section) {
+void MCELFStreamer::ChangeSection(const MCSection *Section,
+                                  const MCExpr *Subsection) {
   MCSectionData *CurSection = getCurrentSectionData();
   if (CurSection && CurSection->isBundleLocked())
     report_fatal_error("Unterminated .bundle_lock when changing a section");
   const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
   if (Grp)
     getAssembler().getOrCreateSymbolData(*Grp);
-  this->MCObjectStreamer::ChangeSection(Section);
+  this->MCObjectStreamer::ChangeSection(Section, Subsection);
 }
 
 void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
@@ -126,6 +128,26 @@ void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
   Alias->setVariableValue(Value);
 }
 
+// When GNU as encounters more than one .type declaration for an object it seems
+// to use a mechanism similar to the one below to decide which type is actually
+// used in the object file.  The greater of T1 and T2 is selected based on the
+// following ordering:
+//  STT_NOTYPE < STT_OBJECT < STT_FUNC < STT_GNU_IFUNC < STT_TLS < anything else
+// If neither T1 < T2 nor T2 < T1 according to this ordering, use T2 (the user
+// provided type).
+static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
+  unsigned TypeOrdering[] = {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC,
+                             ELF::STT_GNU_IFUNC, ELF::STT_TLS};
+  for (unsigned i = 0; i != array_lengthof(TypeOrdering); ++i) {
+    if (T1 == TypeOrdering[i])
+      return T2;
+    if (T2 == TypeOrdering[i])
+      return T1;
+  }
+
+  return T2;
+}
+
 void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                           MCSymbolAttr Attribute) {
   // Indirect symbols are handled differently, to match how 'as' handles
@@ -187,27 +209,34 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     break;
 
   case MCSA_ELF_TypeFunction:
-    MCELF::SetType(SD, ELF::STT_FUNC);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_FUNC));
     break;
 
   case MCSA_ELF_TypeIndFunction:
-    MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_GNU_IFUNC));
     break;
 
   case MCSA_ELF_TypeObject:
-    MCELF::SetType(SD, ELF::STT_OBJECT);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_OBJECT));
     break;
 
   case MCSA_ELF_TypeTLS:
-    MCELF::SetType(SD, ELF::STT_TLS);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_TLS));
     break;
 
   case MCSA_ELF_TypeCommon:
-    MCELF::SetType(SD, ELF::STT_COMMON);
+    // TODO: Emit these as a common symbol.
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_OBJECT));
     break;
 
   case MCSA_ELF_TypeNoType:
-    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_NOTYPE));
     break;
 
   case MCSA_Protected:
@@ -290,7 +319,7 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
 // entry in the module's symbol table (the first being the null symbol).
 void MCELFStreamer::EmitFileDirective(StringRef Filename) {
   MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
   Symbol->setAbsolute();
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -406,11 +435,13 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
       // Optimize memory usage by emitting the instruction to a
       // MCCompactEncodedInstFragment when not in a bundle-locked group and
       // there are no fixups registered.
-      MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD);
+      MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment();
+      insert(CEIF);
       CEIF->getContents().append(Code.begin(), Code.end());
       return;
     } else {
-      DF = new MCDataFragment(SD);
+      DF = new MCDataFragment();
+      insert(DF);
       if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
         // If this is a new fragment created for a bundle-locked group, and the
         // group was marked as "align_to_end", set a flag in the fragment.
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 1a53934fef9d..d54c26418340 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -194,7 +194,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_TPOFF: return "TPOFF";
   case VK_DTPOFF: return "DTPOFF";
   case VK_TLVP: return "TLVP";
-  case VK_SECREL: return "SECREL";
+  case VK_SECREL: return "SECREL32";
   case VK_ARM_NONE: return "(NONE)";
   case VK_ARM_PLT: return "(PLT)";
   case VK_ARM_GOT: return "(GOT)";
@@ -250,6 +250,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_Mips_GOT_LO16: return "GOT_LO16";
   case VK_Mips_CALL_HI16: return "CALL_HI16";
   case VK_Mips_CALL_LO16: return "CALL_LO16";
+  case VK_COFF_IMGREL32: return "IMGREL32";
   }
   llvm_unreachable("Invalid variant kind");
 }
@@ -285,6 +286,8 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("dtpoff", VK_DTPOFF)
     .Case("TLVP", VK_TLVP)
     .Case("tlvp", VK_TLVP)
+    .Case("IMGREL", VK_COFF_IMGREL32)
+    .Case("imgrel", VK_COFF_IMGREL32)
     .Default(VK_Invalid);
 }
 
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 7d08d0ecd5e0..e08b01b7aeaa 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -122,11 +122,11 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
   // isSymbolLinkerVisible uses the section.
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
   // We have to create a new fragment if this is an atom defining symbol,
   // fragments cannot span atoms.
   if (getAssembler().isSymbolLinkerVisible(*Symbol))
-    new MCDataFragment(getCurrentSectionData());
+    insert(new MCDataFragment());
 
   MCObjectStreamer::EmitLabel(Symbol);
 
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index c872b2203f87..659706a1d804 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -30,13 +30,14 @@ namespace {
     virtual void InitSections() {
     }
 
-    virtual void ChangeSection(const MCSection *Section) {
+    virtual void ChangeSection(const MCSection *Section,
+                               const MCExpr *Subsection) {
     }
 
     virtual void EmitLabel(MCSymbol *Symbol) {
       assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-      assert(getCurrentSection() && "Cannot emit before setting section!");
-      Symbol->setSection(*getCurrentSection());
+      assert(getCurrentSection().first &&"Cannot emit before setting section!");
+      Symbol->setSection(*getCurrentSection().first);
     }
     virtual void EmitDebugLabel(MCSymbol *Symbol) {
       EmitLabel(Symbol);
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index bafa002e3d44..79ebad15e670 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -145,12 +145,16 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
   LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                      SectionKind::getReadOnlyWithRel());
 
-  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) {
     CompactUnwindSection =
       Ctx->getMachOSection("__LD", "__compact_unwind",
                            MCSectionMachO::S_ATTR_DEBUG,
                            SectionKind::getReadOnly());
 
+    if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
+      CompactUnwindDwarfEHFrameOnly = 0x04000000;
+  }
+
   // Debug Information.
   DwarfAccelNamesSection =
     Ctx->getMachOSection("__DWARF", "__apple_names",
@@ -223,9 +227,13 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
 }
 
 void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
-  // FIXME: Check this. Mips64el is using the base values, which is most likely
-  // incorrect.
-  if (T.getArch() != Triple::mips64el)
+  if (T.getArch() == Triple::mips ||
+      T.getArch() == Triple::mipsel)
+    FDECFIEncoding = dwarf::DW_EH_PE_sdata4;
+  else if (T.getArch() == Triple::mips64 ||
+           T.getArch() == Triple::mips64el)
+    FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
+  else
     FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
 
   if (T.getArch() == Triple::x86) {
@@ -625,6 +633,8 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
   PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
     TTypeEncoding = dwarf::DW_EH_PE_absptr;
 
+  CompactUnwindDwarfEHFrameOnly = 0;
+
   EHFrameSection = 0;             // Created on demand.
   CompactUnwindSection = 0;       // Used only by selected targets.
   DwarfAccelNamesSection = 0;     // Used only by selected targets.
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 0d2ce83a8a10..d21ce8d1a117 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
@@ -45,14 +46,15 @@ void MCObjectStreamer::reset() {
   if (Assembler)
     Assembler->reset();
   CurSectionData = 0;
+  CurInsertionPoint = MCSectionData::iterator();
   MCStreamer::reset();
 }
 
 MCFragment *MCObjectStreamer::getCurrentFragment() const {
   assert(getCurrentSectionData() && "No current section!");
 
-  if (!getCurrentSectionData()->empty())
-    return &getCurrentSectionData()->getFragmentList().back();
+  if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin())
+    return prior(CurInsertionPoint);
 
   return 0;
 }
@@ -61,8 +63,10 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
   MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
   // When bundling is enabled, we don't want to add data to a fragment that
   // already has instructions (see MCELFStreamer::EmitInstToData for details)
-  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions()))
-    F = new MCDataFragment(getCurrentSectionData());
+  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
+    F = new MCDataFragment();
+    insert(F);
+  }
   return F;
 }
 
@@ -145,7 +149,7 @@ void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
     return;
   }
   Value = ForceExpAbs(Value);
-  new MCLEBFragment(*Value, false, getCurrentSectionData());
+  insert(new MCLEBFragment(*Value, false));
 }
 
 void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
@@ -155,7 +159,7 @@ void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
     return;
   }
   Value = ForceExpAbs(Value);
-  new MCLEBFragment(*Value, true, getCurrentSectionData());
+  insert(new MCLEBFragment(*Value, true));
 }
 
 void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
@@ -163,10 +167,20 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
   report_fatal_error("This file format doesn't support weak aliases.");
 }
 
-void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+void MCObjectStreamer::ChangeSection(const MCSection *Section,
+                                     const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
 
   CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+
+  int64_t IntSubsection = 0;
+  if (Subsection &&
+      !Subsection->EvaluateAsAbsolute(IntSubsection, getAssembler()))
+    report_fatal_error("Cannot evaluate subsection number");
+  if (IntSubsection < 0 || IntSubsection > 8192)
+    report_fatal_error("Subsection number out of range");
+  CurInsertionPoint =
+    CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection));
 }
 
 void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -185,7 +199,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
 
   // Now that a machine instruction has been assembled into this section, make
   // a line entry for any .loc directive that has been seen.
-  MCLineEntry::Make(this, getCurrentSection());
+  MCLineEntry::Make(this, getCurrentSection().first);
 
   // If this instruction doesn't need relaxation, just emit it as data.
   MCAssembler &Assembler = getAssembler();
@@ -216,8 +230,8 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
 void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
   // Always create a new, separate fragment here, because its size can change
   // during relaxation.
-  MCRelaxableFragment *IF =
-    new MCRelaxableFragment(Inst, getCurrentSectionData());
+  MCRelaxableFragment *IF = new MCRelaxableFragment(Inst);
+  insert(IF);
 
   SmallString<128> Code;
   raw_svector_ostream VecOS(Code);
@@ -258,7 +272,7 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
     return;
   }
   AddrDelta = ForceExpAbs(AddrDelta);
-  new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+  insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta));
 }
 
 void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
@@ -270,7 +284,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
     return;
   }
   AddrDelta = ForceExpAbs(AddrDelta);
-  new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+  insert(new MCDwarfCallFrameFragment(*AddrDelta));
 }
 
 void MCObjectStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
@@ -284,8 +298,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
                                             unsigned MaxBytesToEmit) {
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
-                      getCurrentSectionData());
+  insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > getCurrentSectionData()->getAlignment())
@@ -302,7 +315,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
                                          unsigned char Value) {
   int64_t Res;
   if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
-    new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+    insert(new MCOrgFragment(*Offset, Value));
     return false;
   }
 
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 9d523774e4eb..8783bc072b17 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -221,6 +221,7 @@ class AsmParser : public MCAsmParser {
 
   bool parseExpression(const MCExpr *&Res);
   virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
   virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
   virtual bool parseAbsoluteExpression(int64_t &Res);
 
@@ -601,7 +602,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // If we are generating dwarf for assembly source files save the initial text
   // section and generate a .file directive.
   if (getContext().getGenDwarfForAssembly()) {
-    getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+    getContext().setGenDwarfSection(getStreamer().getCurrentSection().first);
     MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
     getStreamer().EmitLabel(SectionStartSym);
     getContext().setGenDwarfSectionStartSym(SectionStartSym);
@@ -666,7 +667,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
 }
 
 void AsmParser::checkForValidSection() {
-  if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
+  if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) {
     TokError("expected section directive before assembly directive");
     Out.InitToTextSection();
   }
@@ -869,6 +870,10 @@ bool AsmParser::parseExpression(const MCExpr *&Res) {
   return parseExpression(Res, EndLoc);
 }
 
+bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  return ParsePrimaryExpr(Res, EndLoc);
+}
+
 const MCExpr *
 AsmParser::ApplyModifierToExpr(const MCExpr *E,
                                MCSymbolRefExpr::VariantKind Variant) {
@@ -1488,7 +1493,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
   // section is the initial text section then generate a .loc directive for
   // the instruction.
   if (!HadError && getContext().getGenDwarfForAssembly() &&
-      getContext().getGenDwarfSection() == getStreamer().getCurrentSection()) {
+      getContext().getGenDwarfSection() ==
+      getStreamer().getCurrentSection().first) {
 
     unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
 
@@ -2479,7 +2485,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
 
   // Check whether we should use optimal code alignment for this .align
   // directive.
-  bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
+  bool UseCodeAlign = getStreamer().getCurrentSection().first->UseCodeAlign();
   if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
       ValueSize == 1 && UseCodeAlign) {
     getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
@@ -4046,19 +4052,17 @@ static int RewritesSort(const void *A, const void *B) {
   if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
     return 1;
 
-  // It's possible to have a SizeDirective rewrite and an Input/Output rewrite
-  // to the same location.  Make sure the SizeDirective rewrite is performed
-  // first.  This also ensure the sort algorithm is stable.
-  if (AsmRewriteA->Kind == AOK_SizeDirective) {
-    assert ((AsmRewriteB->Kind == AOK_Input || AsmRewriteB->Kind == AOK_Output) &&
-            "Expected an Input/Output rewrite!");
+  // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
+  // rewrite to the same location.  Make sure the SizeDirective rewrite is
+  // performed first, then the Imm/ImmPrefix and finally the Input/Output.  This
+  // ensures the sort algorithm is stable.
+  if (AsmRewritePrecedence [AsmRewriteA->Kind] >
+      AsmRewritePrecedence [AsmRewriteB->Kind])
     return -1;
-  }
-  if (AsmRewriteB->Kind == AOK_SizeDirective) {
-    assert ((AsmRewriteA->Kind == AOK_Input || AsmRewriteA->Kind == AOK_Output) &&
-            "Expected an Input/Output rewrite!");
+
+  if (AsmRewritePrecedence [AsmRewriteA->Kind] <
+      AsmRewritePrecedence [AsmRewriteB->Kind])
     return 1;
-  }
   llvm_unreachable ("Unstable rewrite sort.");
 }
 
@@ -4105,12 +4109,8 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
       MCParsedAsmOperand *Operand = Info.ParsedOperands[i];
 
       // Immediate.
-      if (Operand->isImm()) {
-        if (Operand->needAsmRewrite())
-          AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix,
-                                              Operand->getStartLoc()));
+      if (Operand->isImm())
         continue;
-      }
 
       // Register operand.
       if (Operand->isReg() && !Operand->needAddressOf()) {
@@ -4124,31 +4124,29 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
       // Expr/Input or Output.
       bool IsVarDecl;
       unsigned Length, Size, Type;
-      void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
+      StringRef SymName = Operand->getSymName();
+      if (SymName.empty())
+        continue;
+
+      void *OpDecl = SI.LookupInlineAsmIdentifier(SymName, AsmLoc,
                                                   Length, Size, Type,
                                                   IsVarDecl);
       if (!OpDecl)
         continue;
 
       bool isOutput = (i == 1) && Desc.mayStore();
-      if (Operand->isMem() && Operand->needSizeDirective())
-        AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective,
-                                            Operand->getStartLoc(), /*Len*/0,
-                                            Operand->getMemSize()));
-
+      SMLoc Start = SMLoc::getFromPointer(SymName.data());
       if (isOutput) {
         ++InputIdx;
         OutputDecls.push_back(OpDecl);
         OutputDeclsAddressOf.push_back(Operand->needAddressOf());
         OutputConstraints.push_back('=' + Operand->getConstraint().str());
-        AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Operand->getStartLoc(),
-                                            Operand->getNameLen()));
+        AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
       } else {
         InputDecls.push_back(OpDecl);
         InputDeclsAddressOf.push_back(Operand->needAddressOf());
         InputConstraints.push_back(Operand->getConstraint().str());
-        AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(),
-                                            Operand->getNameLen()));
+        AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size()));
       }
     }
   }
@@ -4184,31 +4182,32 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
 
   // Build the IR assembly string.
   std::string AsmStringIR;
-  AsmRewriteKind PrevKind = AOK_Imm;
   raw_string_ostream OS(AsmStringIR);
-  const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+  const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+  const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
   array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort);
   for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
                                              E = AsmStrRewrites.end();
        I != E; ++I) {
-    const char *Loc = (*I).Loc.getPointer();
-    assert(Loc >= Start && "Expected Loc to be after Start!");
-
-    unsigned AdditionalSkip = 0;
     AsmRewriteKind Kind = (*I).Kind;
+    if (Kind == AOK_Delete)
+      continue;
 
-    // Emit everything up to the immediate/expression.  If the previous rewrite
-    // was a size directive, then this has already been done.
-    if (PrevKind != AOK_SizeDirective)
-      OS << StringRef(Start, Loc - Start);
-    PrevKind = Kind;
+    const char *Loc = (*I).Loc.getPointer();
+    assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
+
+    // Emit everything up to the immediate/expression.
+    unsigned Len = Loc - AsmStart;
+    if (Len)
+      OS << StringRef(AsmStart, Len);
 
     // Skip the original expression.
     if (Kind == AOK_Skip) {
-      Start = Loc + (*I).Len;
+      AsmStart = Loc + (*I).Len;
       continue;
     }
 
+    unsigned AdditionalSkip = 0;
     // Rewrite expressions in $N notation.
     switch (Kind) {
     default: break;
@@ -4254,14 +4253,12 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
     }
 
     // Skip the original expression.
-    if (Kind != AOK_SizeDirective)
-      Start = Loc + (*I).Len + AdditionalSkip;
+    AsmStart = Loc + (*I).Len + AdditionalSkip;
   }
 
   // Emit the remainder of the asm string.
-  const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
-  if (Start != AsmEnd)
-    OS << StringRef(Start, AsmEnd - Start);
+  if (AsmStart != AsmEnd)
+    OS << StringRef(AsmStart, AsmEnd - AsmStart);
 
   AsmString = OS.str();
   return false;
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 6d6409fb69e2..7eb8b748348e 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -566,10 +566,10 @@ bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
 /// ParseDirectivePrevious:
 ///   ::= .previous
 bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
-  const MCSection *PreviousSection = getStreamer().getPreviousSection();
-  if (PreviousSection == NULL)
+  MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection.first == NULL)
       return TokError(".previous without corresponding .section");
-  getStreamer().SwitchSection(PreviousSection);
+  getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
   return false;
 }
 
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 4c45e087445d..3134fc3d8597 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -76,6 +76,7 @@ class ELFAsmParser : public MCAsmParserExtension {
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal");
     addDirectiveHandler<
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveSubsection>(".subsection");
   }
 
   // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
@@ -147,9 +148,11 @@ class ELFAsmParser : public MCAsmParserExtension {
   bool ParseDirectiveVersion(StringRef, SMLoc);
   bool ParseDirectiveWeakref(StringRef, SMLoc);
   bool ParseDirectiveSymbolAttribute(StringRef, SMLoc);
+  bool ParseDirectiveSubsection(StringRef, SMLoc);
 
 private:
   bool ParseSectionName(StringRef &SectionName);
+  bool ParseSectionArguments(bool IsPush);
 };
 
 }
@@ -191,12 +194,15 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
 
 bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
                                       unsigned Flags, SectionKind Kind) {
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in section switching directive");
-  Lex();
+  const MCExpr *Subsection = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getParser().parseExpression(Subsection))
+      return true;
+  }
 
   getStreamer().SwitchSection(getContext().getELFSection(
-                                Section, Type, Flags, Kind));
+                                Section, Type, Flags, Kind),
+                              Subsection);
 
   return false;
 }
@@ -316,7 +322,7 @@ static int parseSectionFlags(StringRef flagsStr) {
 bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
   getStreamer().PushSection();
 
-  if (ParseDirectiveSection(s, loc)) {
+  if (ParseSectionArguments(/*IsPush=*/true)) {
     getStreamer().PopSection();
     return true;
   }
@@ -332,6 +338,10 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
 
 // FIXME: This is a work in progress.
 bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+  return ParseSectionArguments(/*IsPush=*/false);
+}
+
+bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
   StringRef SectionName;
 
   if (ParseSectionName(SectionName))
@@ -341,6 +351,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   int64_t Size = 0;
   StringRef GroupName;
   unsigned Flags = 0;
+  const MCExpr *Subsection = 0;
 
   // Set the defaults first.
   if (SectionName == ".fini" || SectionName == ".init" ||
@@ -352,6 +363,14 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   if (getLexer().is(AsmToken::Comma)) {
     Lex();
 
+    if (IsPush && getLexer().isNot(AsmToken::String)) {
+      if (getParser().parseExpression(Subsection))
+        return true;
+      if (getLexer().isNot(AsmToken::Comma))
+        goto EndStmt;
+      Lex();
+    }
+   
     if (getLexer().isNot(AsmToken::String))
       return TokError("expected string in directive");
 
@@ -408,6 +427,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
     }
   }
 
+EndStmt:
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
@@ -444,15 +464,16 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   SectionKind Kind = computeSectionKind(Flags);
   getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
                                                          Flags, Kind, Size,
-                                                         GroupName));
+                                                         GroupName),
+                              Subsection);
   return false;
 }
 
 bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
-  const MCSection *PreviousSection = getStreamer().getPreviousSection();
-  if (PreviousSection == NULL)
+  MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection.first == NULL)
       return TokError(".previous without corresponding .section");
-  getStreamer().SwitchSection(PreviousSection);
+  getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
 
   return false;
 }
@@ -613,6 +634,20 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
   return false;
 }
 
+bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) {
+  const MCExpr *Subsection = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getParser().parseExpression(Subsection))
+     return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  getStreamer().SubSection(Subsection);
+  return false;
+}
+
 namespace llvm {
 
 MCAsmParserExtension *createELFAsmParser() {
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
index 0e04c5537acb..8ae724facb0c 100644
--- a/lib/MC/MCPureStreamer.cpp
+++ b/lib/MC/MCPureStreamer.cpp
@@ -12,9 +12,8 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectStreamer.h"
-// FIXME: Remove this.
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -113,25 +112,22 @@ void MCPureStreamer::InitSections() {
 }
 
 void MCPureStreamer::InitToTextSection() {
-  // FIMXE: To what!?
-  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
-                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                    0, SectionKind::getText()));
+  SwitchSection(getContext().getObjectFileInfo()->getTextSection());
 }
 
 void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
 
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
 
   // We have to create a new fragment if this is an atom defining symbol,
   // fragments cannot span atoms.
   if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
-    new MCDataFragment(getCurrentSectionData());
+    insert(new MCDataFragment());
 
   // FIXME: This is wasteful, we don't necessarily need to create a data
   // fragment. Instead, we should mark the symbol as pointing into the data
@@ -166,8 +162,7 @@ void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
   // MCObjectStreamer.
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
-                      getCurrentSectionData());
+  insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > getCurrentSectionData()->getAlignment())
@@ -180,8 +175,8 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
   // MCObjectStreamer.
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
-                                           getCurrentSectionData());
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit);
+  insert(F);
   F->setEmitNops(true);
 
   // Update the maximum alignment on the current section if necessary.
@@ -191,13 +186,13 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
 
 bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
                                        unsigned char Value) {
-  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+  insert(new MCOrgFragment(*Offset, Value));
   return false;
 }
 
 void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
-  MCRelaxableFragment *IF =
-    new MCRelaxableFragment(Inst, getCurrentSectionData());
+  MCRelaxableFragment *IF = new MCRelaxableFragment(Inst);
+  insert(IF);
 
   // Add the fixups and data.
   //
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index aac93775aebe..6cedf0655cfd 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -29,7 +29,8 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
 }
 
 void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                         raw_ostream &OS) const {
+                                         raw_ostream &OS,
+                                         const MCExpr *Subsection) const {
 
   // standard sections don't require the '.section'
   if (ShouldOmitSectionDirective(SectionName, MAI)) {
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 0775cfa776d7..bf1a984a9bf6 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -10,6 +10,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/raw_ostream.h"
@@ -32,10 +33,14 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
 }
 
 void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                        raw_ostream &OS) const {
+                                        raw_ostream &OS,
+                                        const MCExpr *Subsection) const {
 
   if (ShouldOmitSectionDirective(SectionName, MAI)) {
-    OS << '\t' << getSectionName() << '\n';
+    OS << '\t' << getSectionName();
+    if (Subsection)
+      OS << '\t' << *Subsection;
+    OS << '\n';
     return;
   }
 
@@ -129,6 +134,9 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
   if (Flags & ELF::SHF_GROUP)
     OS << "," << Group->getName() << ",comdat";
   OS << '\n';
+
+  if (Subsection)
+    OS << "\t.subsection\t" << *Subsection << '\n';
 }
 
 bool MCSectionELF::UseCodeAlign() const {
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index fc323155befa..870451313bb1 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -91,7 +91,8 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
 }
 
 void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                          raw_ostream &OS) const {
+                                          raw_ostream &OS,
+                                          const MCExpr *Subsection) const {
   OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
 
   // Get the section type and attributes.
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 51ef41554266..4839c3470c76 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -24,8 +24,7 @@ using namespace llvm;
 MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
     : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
       CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
-  const MCSection *section = NULL;
-  SectionStack.push_back(std::make_pair(section, section));
+  SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
 }
 
 MCStreamer::~MCStreamer() {
@@ -40,9 +39,8 @@ void MCStreamer::reset() {
   EmitDebugFrame = false;
   CurrentW64UnwindInfo = 0;
   LastSymbol = 0;
-  const MCSection *section = NULL;
   SectionStack.clear();
-  SectionStack.push_back(std::make_pair(section, section));
+  SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
 }
 
 const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
@@ -172,7 +170,7 @@ void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
 
 MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
   if (FrameInfos.empty())
-    return NULL;
+    return 0;
   return &FrameInfos.back();
 }
 
@@ -188,15 +186,15 @@ void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
 
 void MCStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
-  Symbol->setSection(*getCurrentSection());
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection().first);
   LastSymbol = Symbol;
 }
 
 void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) {
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
-  Symbol->setSection(*getCurrentSection());
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection().first);
   LastSymbol = Symbol;
 }
 
@@ -473,7 +471,7 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
     report_fatal_error("Frame register and offset already specified!");
   if (Offset & 0x0F)
     report_fatal_error("Misaligned frame pointer offset!");
-  MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset);
+  MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset);
   CurFrame->LastFrameInst = CurFrame->Instructions.size();
   CurFrame->Instructions.push_back(Inst);
 }
@@ -623,5 +621,5 @@ void MCStreamer::Finish() {
 
 MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) {
   report_fatal_error("Not supported!");
-  return *(static_cast<MCSymbolData*> (NULL));
+  return *(static_cast<MCSymbolData*>(0));
 }
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 6dffed73dfb3..de7e67b4e37b 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -689,13 +689,8 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   ++Reloc.Symb->Relocations;
 
   Reloc.Data.VirtualAddress += Fixup.getOffset();
-
-  unsigned FixupKind = Fixup.getKind();
-
-  if (CrossSection)
-    FixupKind = FK_PCRel_4;
-
-  Reloc.Data.Type = TargetObjectWriter->getRelocType(FixupKind);
+  Reloc.Data.Type = TargetObjectWriter->getRelocType(Target, Fixup,
+                                                     CrossSection);
 
   // FIXME: Can anyone explain what this does other than adjust for the size
   // of the offset?
diff --git a/lib/Makefile b/lib/Makefile
index 043eda6b99b7..57f016bc8905 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,8 @@ LEVEL = ..
 include $(LEVEL)/Makefile.config
 
 PARALLEL_DIRS := IR AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Linker MC Object Option DebugInfo
+                 Target ExecutionEngine Linker MC Object Option DebugInfo \
+								 IRReader
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index ca90e0e3c3fc..46acd4d53702 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -429,7 +429,7 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
 }
 
 COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
-  : ObjectFile(Binary::ID_COFF, Object, ec)
+  : ObjectFile(Binary::ID_COFF, Object)
   , Header(0)
   , SectionTable(0)
   , SymbolTable(0)
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
index c9c341a207c7..d88f96f298c3 100644
--- a/lib/Object/MachOObject.cpp
+++ b/lib/Object/MachOObject.cpp
@@ -61,7 +61,7 @@ static void ReadInMemoryStruct(const MachOObject &MOO,
 MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
                          bool Is64Bit_)
   : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
-    IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
+    IsSwappedEndian(IsLittleEndian != sys::IsLittleEndianHost),
     HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
   // Load the common header.
   memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 6501df9fb986..2f5048688c05 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -15,7 +15,9 @@
 #include "llvm/Object/MachO.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cctype>
 #include <cstring>
@@ -27,237 +29,523 @@ using namespace object;
 namespace llvm {
 namespace object {
 
-MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
-                                 error_code &ec)
-    : ObjectFile(Binary::ID_MachO, Object, ec),
-      MachOObj(MOO),
-      RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
-  DataRefImpl DRI;
-  moveToNextSection(DRI);
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    Sections.push_back(DRI);
-    DRI.d.b++;
-    moveToNextSection(DRI);
+struct SymbolTableEntryBase {
+  uint32_t StringIndex;
+  uint8_t Type;
+  uint8_t SectionIndex;
+  uint16_t Flags;
+};
+
+struct SectionBase {
+  char Name[16];
+  char SegmentName[16];
+};
+
+template<typename T>
+static void SwapValue(T &Value) {
+  Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<>
+void SwapStruct(macho::RelocationEntry &H) {
+  SwapValue(H.Word0);
+  SwapValue(H.Word1);
+}
+
+template<>
+void SwapStruct(macho::LoadCommand &L) {
+  SwapValue(L.Type);
+  SwapValue(L.Size);
+}
+
+template<>
+void SwapStruct(SymbolTableEntryBase &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+}
+
+template<>
+void SwapStruct(macho::Section &S) {
+  SwapValue(S.Address);
+  SwapValue(S.Size);
+  SwapValue(S.Offset);
+  SwapValue(S.Align);
+  SwapValue(S.RelocationTableOffset);
+  SwapValue(S.NumRelocationTableEntries);
+  SwapValue(S.Flags);
+  SwapValue(S.Reserved1);
+  SwapValue(S.Reserved2);
+}
+
+template<>
+void SwapStruct(macho::Section64 &S) {
+  SwapValue(S.Address);
+  SwapValue(S.Size);
+  SwapValue(S.Offset);
+  SwapValue(S.Align);
+  SwapValue(S.RelocationTableOffset);
+  SwapValue(S.NumRelocationTableEntries);
+  SwapValue(S.Flags);
+  SwapValue(S.Reserved1);
+  SwapValue(S.Reserved2);
+  SwapValue(S.Reserved3);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+  SwapValue(S.Value);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+  SwapValue(S.Value);
+}
+
+template<>
+void SwapStruct(macho::Header &H) {
+  SwapValue(H.Magic);
+  SwapValue(H.CPUType);
+  SwapValue(H.CPUSubtype);
+  SwapValue(H.FileType);
+  SwapValue(H.NumLoadCommands);
+  SwapValue(H.SizeOfLoadCommands);
+  SwapValue(H.Flags);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.SymbolTableOffset);
+  SwapValue(C.NumSymbolTableEntries);
+  SwapValue(C.StringTableOffset);
+  SwapValue(C.StringTableSize);
+}
+
+template<>
+void SwapStruct(macho::LinkeditDataLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.DataOffset);
+  SwapValue(C.DataSize);
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.VMAddress);
+  SwapValue(C.VMSize);
+  SwapValue(C.FileOffset);
+  SwapValue(C.FileSize);
+  SwapValue(C.MaxVMProtection);
+  SwapValue(C.InitialVMProtection);
+  SwapValue(C.NumSections);
+  SwapValue(C.Flags);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.VMAddress);
+  SwapValue(C.VMSize);
+  SwapValue(C.FileOffset);
+  SwapValue(C.FileSize);
+  SwapValue(C.MaxVMProtection);
+  SwapValue(C.InitialVMProtection);
+  SwapValue(C.NumSections);
+  SwapValue(C.Flags);
+}
+
+template<typename T>
+T getStruct(const MachOObjectFile *O, const char *P) {
+  T Cmd;
+  memcpy(&Cmd, P, sizeof(T));
+  if (O->isLittleEndian() != sys::IsLittleEndianHost)
+    SwapStruct(Cmd);
+  return Cmd;
+}
+
+static macho::SegmentLoadCommand
+getSegmentLoadCommand(const MachOObjectFile *O,
+                      const MachOObjectFile::LoadCommandInfo &L) {
+  return getStruct<macho::SegmentLoadCommand>(O, L.Ptr);
+}
+
+static macho::Segment64LoadCommand
+getSegment64LoadCommand(const MachOObjectFile *O,
+                        const MachOObjectFile::LoadCommandInfo &L) {
+  return getStruct<macho::Segment64LoadCommand>(O, L.Ptr);
+}
+
+static uint32_t
+getSegmentLoadCommandNumSections(const MachOObjectFile *O,
+                                 const MachOObjectFile::LoadCommandInfo &L) {
+  if (O->is64Bit()) {
+    macho::Segment64LoadCommand S = getSegment64LoadCommand(O, L);
+    return S.NumSections;
   }
+  macho::SegmentLoadCommand S = getSegmentLoadCommand(O, L);
+  return S.NumSections;
 }
 
+static const SectionBase *
+getSectionBase(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
+               unsigned Sec) {
+  uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr);
 
-ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  bool Is64 = O->is64Bit();
+  unsigned SegmentLoadSize = Is64 ? sizeof(macho::Segment64LoadCommand) :
+                                    sizeof(macho::SegmentLoadCommand);
+  unsigned SectionSize = Is64 ? sizeof(macho::Section64) :
+                                sizeof(macho::Section);
+
+  uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize;
+  return reinterpret_cast<const SectionBase*>(SectionAddr);
+}
+
+static const char *getPtr(const MachOObjectFile *O, size_t Offset) {
+  return O->getData().substr(Offset, 1).data();
+}
+
+static const char *getSymbolTableEntryPtr(const MachOObjectFile *O,
+                                          DataRefImpl DRI) {
+  macho::SymtabLoadCommand S = O->getSymtabLoadCommand();
+
+  unsigned Index = DRI.d.b;
+
+  unsigned SymbolTableEntrySize = O->is64Bit() ?
+    sizeof(macho::Symbol64TableEntry) :
+    sizeof(macho::SymbolTableEntry);
+
+  uint64_t Offset = S.SymbolTableOffset + Index * SymbolTableEntrySize;
+  return getPtr(O, Offset);
+}
+
+static SymbolTableEntryBase
+getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) {
+  const char *P = getSymbolTableEntryPtr(O, DRI);
+  return getStruct<SymbolTableEntryBase>(O, P);
+}
+
+static StringRef parseSegmentOrSectionName(const char *P) {
+  if (P[15] == 0)
+    // Null terminated.
+    return P;
+  // Not null terminated, so this is a 16 char string.
+  return StringRef(P, 16);
+}
+
+// Helper to advance a section or symbol iterator multiple increments at a time.
+template<class T>
+static error_code advance(T &it, size_t Val) {
   error_code ec;
-  std::string Err;
-  MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
-  if (!MachOObj)
-    return NULL;
-  // MachOObject takes ownership of the Buffer we passed to it, and
-  // MachOObjectFile does, too, so we need to make sure they don't get the
-  // same object. A MemoryBuffer is cheap (it's just a reference to memory,
-  // not a copy of the memory itself), so just make a new copy here for
-  // the MachOObjectFile.
-  MemoryBuffer *NewBuffer =
-    MemoryBuffer::getMemBuffer(Buffer->getBuffer(),
-                               Buffer->getBufferIdentifier(), false);
-  return new MachOObjectFile(NewBuffer, MachOObj, ec);
-}
-
-/*===-- Symbols -----------------------------------------------------------===*/
-
-void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    if (LCI.Command.Type == macho::LCT_Symtab) {
-      InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-      MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
-      if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries)
-        return;
+  while (Val--) {
+    it.increment(ec);
+  }
+  return ec;
+}
+
+template<class T>
+static void advanceTo(T &it, size_t Val) {
+  if (error_code ec = advance(it, Val))
+    report_fatal_error(ec.message());
+}
+
+static unsigned getCPUType(const MachOObjectFile *O) {
+  return O->getHeader().CPUType;
+}
+
+static void printRelocationTargetName(const MachOObjectFile *O,
+                                      const macho::RelocationEntry &RE,
+                                      raw_string_ostream &fmt) {
+  bool IsScattered = O->isRelocationScattered(RE);
+
+  // Target of a scattered relocation is an address.  In the interest of
+  // generating pretty output, scan through the symbol table looking for a
+  // symbol that aligns with that address.  If we find one, print it.
+  // Otherwise, we just print the hex address of the target.
+  if (IsScattered) {
+    uint32_t Val = O->getPlainRelocationSymbolNum(RE);
+
+    error_code ec;
+    for (symbol_iterator SI = O->begin_symbols(), SE = O->end_symbols();
+         SI != SE; SI.increment(ec)) {
+      if (ec) report_fatal_error(ec.message());
+
+      uint64_t Addr;
+      StringRef Name;
+
+      if ((ec = SI->getAddress(Addr)))
+        report_fatal_error(ec.message());
+      if (Addr != Val) continue;
+      if ((ec = SI->getName(Name)))
+        report_fatal_error(ec.message());
+      fmt << Name;
+      return;
     }
 
-    DRI.d.a++;
-    DRI.d.b = 0;
+    // If we couldn't find a symbol that this relocation refers to, try
+    // to find a section beginning instead.
+    for (section_iterator SI = O->begin_sections(), SE = O->end_sections();
+         SI != SE; SI.increment(ec)) {
+      if (ec) report_fatal_error(ec.message());
+
+      uint64_t Addr;
+      StringRef Name;
+
+      if ((ec = SI->getAddress(Addr)))
+        report_fatal_error(ec.message());
+      if (Addr != Val) continue;
+      if ((ec = SI->getName(Name)))
+        report_fatal_error(ec.message());
+      fmt << Name;
+      return;
+    }
+
+    fmt << format("0x%x", Val);
+    return;
   }
-}
 
-void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
-    InMemoryStruct<macho::SymbolTableEntry> &Res) const {
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+  StringRef S;
+  bool isExtern = O->getPlainRelocationExternal(RE);
+  uint64_t Val = O->getAnyRelocationAddress(RE);
 
-  if (RegisteredStringTable != DRI.d.a) {
-    MachOObj->RegisterStringTable(*SymtabLoadCmd);
-    RegisteredStringTable = DRI.d.a;
+  if (isExtern) {
+    symbol_iterator SI = O->begin_symbols();
+    advanceTo(SI, Val);
+    SI->getName(S);
+  } else {
+    section_iterator SI = O->begin_sections();
+    advanceTo(SI, Val);
+    SI->getName(S);
   }
 
-  MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
-                                 Res);
+  fmt << S;
 }
 
-void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
-    InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+static uint32_t getPlainRelocationAddress(const macho::RelocationEntry &RE) {
+  return RE.Word0;
+}
 
-  if (RegisteredStringTable != DRI.d.a) {
-    MachOObj->RegisterStringTable(*SymtabLoadCmd);
-    RegisteredStringTable = DRI.d.a;
-  }
+static unsigned
+getScatteredRelocationAddress(const macho::RelocationEntry &RE) {
+  return RE.Word0 & 0xffffff;
+}
+
+static bool getPlainRelocationPCRel(const MachOObjectFile *O,
+                                    const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return (RE.Word1 >> 24) & 1;
+  return (RE.Word1 >> 7) & 1;
+}
 
-  MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
-                                   Res);
+static bool
+getScatteredRelocationPCRel(const MachOObjectFile *O,
+                            const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 30) & 1;
 }
 
+static unsigned getPlainRelocationLength(const MachOObjectFile *O,
+                                         const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return (RE.Word1 >> 25) & 3;
+  return (RE.Word1 >> 5) & 3;
+}
 
-error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
-                                          SymbolRef &Result) const {
-  DRI.d.b++;
-  moveToNextSymbol(DRI);
-  Result = SymbolRef(DRI, this);
-  return object_error::success;
+static unsigned
+getScatteredRelocationLength(const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 28) & 3;
 }
 
-error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
-                                          StringRef &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+static unsigned getPlainRelocationType(const MachOObjectFile *O,
+                                       const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return RE.Word1 >> 28;
+  return RE.Word1 & 0xf;
+}
+
+static unsigned getScatteredRelocationType(const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 24) & 0xf;
+}
+
+static uint32_t getSectionFlags(const MachOObjectFile *O,
+                                DataRefImpl Sec) {
+  if (O->is64Bit()) {
+    macho::Section64 Sect = O->getSection64(Sec);
+    return Sect.Flags;
   }
-  return object_error::success;
+  macho::Section Sect = O->getSection(Sec);
+  return Sect.Flags;
 }
 
-error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
-                                                uint64_t &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = Entry->Value;
-    if (Entry->SectionIndex) {
-      InMemoryStruct<macho::Section64> Section;
-      getSection64(Sections[Entry->SectionIndex-1], Section);
-      Result += Section->Offset - Section->Address;
+MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
+                                 bool IsLittleEndian, bool Is64bits,
+                                 error_code &ec)
+    : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
+      SymtabLoadCmd(NULL) {
+  uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
+  macho::LoadCommandType SegmentLoadType = is64Bit() ?
+    macho::LCT_Segment64 : macho::LCT_Segment;
+
+  MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo();
+  for (unsigned I = 0; ; ++I) {
+    if (Load.C.Type == macho::LCT_Symtab) {
+      assert(!SymtabLoadCmd && "Multiple symbol tables");
+      SymtabLoadCmd = Load.Ptr;
     }
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = Entry->Value;
-    if (Entry->SectionIndex) {
-      InMemoryStruct<macho::Section> Section;
-      getSection(Sections[Entry->SectionIndex-1], Section);
-      Result += Section->Offset - Section->Address;
+
+    if (Load.C.Type == SegmentLoadType) {
+      uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
+      for (unsigned J = 0; J < NumSections; ++J) {
+        const SectionBase *Sec = getSectionBase(this, Load, J);
+        Sections.push_back(reinterpret_cast<const char*>(Sec));
+      }
     }
+
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = getNextLoadCommandInfo(Load);
   }
+}
+
+error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb,
+                                          SymbolRef &Res) const {
+  Symb.d.b++;
+  Res = SymbolRef(Symb, this);
+  return object_error::success;
+}
 
+error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
+                                          StringRef &Res) const {
+  macho::SymtabLoadCommand S = getSymtabLoadCommand();
+  const char *StringTable = getPtr(this, S.StringTableOffset);
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  const char *Start = &StringTable[Entry.StringIndex];
+  Res = StringRef(Start);
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
-                                             uint64_t &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = Entry->Value;
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
+                                             uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Symbol64TableEntry Entry = getSymbol64TableEntry(Symb);
+    Res = Entry.Value;
   } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = Entry->Value;
+    macho::SymbolTableEntry Entry = getSymbolTableEntry(Symb);
+    Res = Entry.Value;
   }
   return object_error::success;
 }
 
+error_code
+MachOObjectFile::getSymbolFileOffset(DataRefImpl Symb,
+                                     uint64_t &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  getSymbolAddress(Symb, Res);
+  if (Entry.SectionIndex) {
+    uint64_t Delta;
+    DataRefImpl SecRel;
+    SecRel.d.a = Entry.SectionIndex-1;
+    if (is64Bit()) {
+      macho::Section64 Sec = getSection64(SecRel);
+      Delta = Sec.Offset - Sec.Address;
+    } else {
+      macho::Section Sec = getSection(SecRel);
+      Delta = Sec.Offset - Sec.Address;
+    }
+
+    Res += Delta;
+  }
+
+  return object_error::success;
+}
+
 error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
                                           uint64_t &Result) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
   uint64_t BeginOffset;
   uint64_t EndOffset = 0;
   uint8_t SectionIndex;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    BeginOffset = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
-    if (!SectionIndex) {
-      uint32_t flags = SymbolRef::SF_None;
-      getSymbolFlags(DRI, flags);
-      if (flags & SymbolRef::SF_Common)
-        Result = Entry->Value;
-      else
-        Result = UnknownAddressOrSize;
-      return object_error::success;
-    }
-    // Unfortunately symbols are unsorted so we need to touch all
-    // symbols from load command
-    DRI.d.b = 0;
-    uint32_t Command = DRI.d.a;
-    while (Command == DRI.d.a) {
-      moveToNextSymbol(DRI);
-      if (DRI.d.a < LoadCommandCount) {
-        getSymbol64TableEntry(DRI, Entry);
-        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
-          if (!EndOffset || Entry->Value < EndOffset)
-            EndOffset = Entry->Value;
-      }
-      DRI.d.b++;
-    }
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    BeginOffset = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
-    if (!SectionIndex) {
-      uint32_t flags = SymbolRef::SF_None;
-      getSymbolFlags(DRI, flags);
-      if (flags & SymbolRef::SF_Common)
-        Result = Entry->Value;
-      else
-        Result = UnknownAddressOrSize;
-      return object_error::success;
-    }
-    // Unfortunately symbols are unsorted so we need to touch all
-    // symbols from load command
-    DRI.d.b = 0;
-    uint32_t Command = DRI.d.a;
-    while (Command == DRI.d.a) {
-      moveToNextSymbol(DRI);
-      if (DRI.d.a < LoadCommandCount) {
-        getSymbolTableEntry(DRI, Entry);
-        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
-          if (!EndOffset || Entry->Value < EndOffset)
-            EndOffset = Entry->Value;
-      }
-      DRI.d.b++;
-    }
+
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+  uint64_t Value;
+  getSymbolAddress(DRI, Value);
+
+  BeginOffset = Value;
+
+  SectionIndex = Entry.SectionIndex;
+  if (!SectionIndex) {
+    uint32_t flags = SymbolRef::SF_None;
+    this->getSymbolFlags(DRI, flags);
+    if (flags & SymbolRef::SF_Common)
+      Result = Value;
+    else
+      Result = UnknownAddressOrSize;
+    return object_error::success;
+  }
+  // Unfortunately symbols are unsorted so we need to touch all
+  // symbols from load command
+  macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+  DRI.d.b = 0;
+  while (DRI.d.b <= Symtab.NumSymbolTableEntries) {
+    Entry = getSymbolTableEntryBase(this, DRI);
+    getSymbolAddress(DRI, Value);
+    if (Entry.SectionIndex == SectionIndex && Value > BeginOffset)
+      if (!EndOffset || Value < EndOffset)
+        EndOffset = Value;
+    DRI.d.b++;
   }
   if (!EndOffset) {
     uint64_t Size;
-    getSectionSize(Sections[SectionIndex-1], Size);
-    getSectionAddress(Sections[SectionIndex-1], EndOffset);
+    DataRefImpl Sec;
+    Sec.d.a = SectionIndex-1;
+    getSectionSize(Sec, Size);
+    getSectionAddress(Sec, EndOffset);
     EndOffset += Size;
   }
   Result = EndOffset - BeginOffset;
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
-                                                char &Result) const {
-  uint8_t Type, Flags;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Type = Entry->Type;
-    Flags = Entry->Flags;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Type = Entry->Type;
-    Flags = Entry->Flags;
+error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
+                                          SymbolRef::Type &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t n_type = Entry.Type;
+
+  Res = SymbolRef::ST_Other;
+
+  // If this is a STAB debugging symbol, we can do nothing more.
+  if (n_type & MachO::NlistMaskStab) {
+    Res = SymbolRef::ST_Debug;
+    return object_error::success;
   }
 
+  switch (n_type & MachO::NlistMaskType) {
+    case MachO::NListTypeUndefined :
+      Res = SymbolRef::ST_Unknown;
+      break;
+    case MachO::NListTypeSection :
+      Res = SymbolRef::ST_Function;
+      break;
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+                                                char &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t Type = Entry.Type;
+  uint16_t Flags = Entry.Flags;
+
   char Char;
   switch (Type & macho::STF_TypeMask) {
     case macho::STT_Undefined:
@@ -274,25 +562,16 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
 
   if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
     Char = toupper(static_cast<unsigned char>(Char));
-  Result = Char;
+  Res = Char;
   return object_error::success;
 }
 
 error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
                                            uint32_t &Result) const {
-  uint16_t MachOFlags;
-  uint8_t MachOType;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    MachOFlags = Entry->Flags;
-    MachOType = Entry->Type;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    MachOFlags = Entry->Flags;
-    MachOType = Entry->Type;
-  }
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+
+  uint8_t MachOType = Entry.Type;
+  uint16_t MachOFlags = Entry.Flags;
 
   // TODO: Correctly set SF_ThreadLocal
   Result = SymbolRef::SF_None;
@@ -318,332 +597,153 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
-                                             section_iterator &Res) const {
-  uint8_t index;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    index = Entry->SectionIndex;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    index = Entry->SectionIndex;
-  }
+error_code
+MachOObjectFile::getSymbolSection(DataRefImpl Symb,
+                                  section_iterator &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t index = Entry.SectionIndex;
 
-  if (index == 0)
+  if (index == 0) {
     Res = end_sections();
-  else
-    Res = section_iterator(SectionRef(Sections[index-1], this));
-
-  return object_error::success;
-}
-
-error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
-                                          SymbolRef::Type &Res) const {
-  uint8_t n_type;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    n_type = Entry->Type;
   } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    n_type = Entry->Type;
-  }
-  Res = SymbolRef::ST_Other;
-
-  // If this is a STAB debugging symbol, we can do nothing more.
-  if (n_type & MachO::NlistMaskStab) {
-    Res = SymbolRef::ST_Debug;
-    return object_error::success;
+    DataRefImpl DRI;
+    DRI.d.a = index - 1;
+    Res = section_iterator(SectionRef(DRI, this));
   }
 
-  switch (n_type & MachO::NlistMaskType) {
-    case MachO::NListTypeUndefined :
-      Res = SymbolRef::ST_Unknown;
-      break;
-    case MachO::NListTypeSection :
-      Res = SymbolRef::ST_Function;
-      break;
-  }
   return object_error::success;
 }
 
 error_code MachOObjectFile::getSymbolValue(DataRefImpl Symb,
-                                           uint64_t &Val) const {
+                                               uint64_t &Val) const {
   report_fatal_error("getSymbolValue unimplemented in MachOObjectFile");
 }
 
-symbol_iterator MachOObjectFile::begin_symbols() const {
-  // DRI.d.a = segment number; DRI.d.b = symbol index.
-  DataRefImpl DRI;
-  moveToNextSymbol(DRI);
-  return symbol_iterator(SymbolRef(DRI, this));
-}
-
-symbol_iterator MachOObjectFile::end_symbols() const {
-  DataRefImpl DRI;
-  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
-  return symbol_iterator(SymbolRef(DRI, this));
-}
-
-symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
-  // TODO: implement
-  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
-}
-
-symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
-  // TODO: implement
-  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::begin_libraries_needed() const {
-  // TODO: implement
-  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::end_libraries_needed() const {
-  // TODO: implement
-  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-StringRef MachOObjectFile::getLoadName() const {
-  // TODO: Implement
-  report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
-}
-
-/*===-- Sections ----------------------------------------------------------===*/
-
-void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    if (LCI.Command.Type == macho::LCT_Segment) {
-      InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd;
-      MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd);
-      if (DRI.d.b < SegmentLoadCmd->NumSections)
-        return;
-    } else if (LCI.Command.Type == macho::LCT_Segment64) {
-      InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd;
-      MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd);
-      if (DRI.d.b < Segment64LoadCmd->NumSections)
-        return;
-    }
-
-    DRI.d.a++;
-    DRI.d.b = 0;
-  }
-}
-
-error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
-                                           SectionRef &Result) const {
-  DRI.d.b++;
-  moveToNextSection(DRI);
-  Result = SectionRef(DRI, this);
+error_code MachOObjectFile::getSectionNext(DataRefImpl Sec,
+                                           SectionRef &Res) const {
+  Sec.d.a++;
+  Res = SectionRef(Sec, this);
   return object_error::success;
 }
 
-void
-MachOObjectFile::getSection(DataRefImpl DRI,
-                            InMemoryStruct<macho::Section> &Res) const {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSection(LCI, DRI.d.b, Res);
-}
-
-std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
-  SectionList::const_iterator loc =
-    std::find(Sections.begin(), Sections.end(), Sec);
-  assert(loc != Sections.end() && "Sec is not a valid section!");
-  return std::distance(Sections.begin(), loc);
-}
-
-void
-MachOObjectFile::getSection64(DataRefImpl DRI,
-                            InMemoryStruct<macho::Section64> &Res) const {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSection64(LCI, DRI.d.b, Res);
-}
-
-static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  if (LCI.Command.Type == macho::LCT_Segment64)
-    return true;
-  assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
-  return false;
-}
-
-static StringRef parseSegmentOrSectionName(const char *P) {
-  if (P[15] == 0)
-    // Null terminated.
-    return P;
-  // Not null terminated, so this is a 16 char string.
-  return StringRef(P, 16);
-}
-
-error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
-                                           StringRef &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
-      DRI.d.b * sizeof(macho::Section64);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
-    const macho::Section64 *sec =
-      reinterpret_cast<const macho::Section64*>(Data.data());
-    Result = parseSegmentOrSectionName(sec->Name);
-  } else {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
-      DRI.d.b * sizeof(macho::Section);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
-    const macho::Section *sec =
-      reinterpret_cast<const macho::Section*>(Data.data());
-    Result = parseSegmentOrSectionName(sec->Name);
-  }
+error_code
+MachOObjectFile::getSectionName(DataRefImpl Sec,
+                                    StringRef &Result) const {
+  ArrayRef<char> Raw = getSectionRawName(Sec);
+  Result = parseSegmentOrSectionName(Raw.data());
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec,
-                                                       StringRef &Res) const {
-  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
-      Sec.d.b * sizeof(macho::Section64);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
-    const macho::Section64 *sec =
-      reinterpret_cast<const macho::Section64*>(Data.data());
-    Res = parseSegmentOrSectionName(sec->SegmentName);
+error_code
+MachOObjectFile::getSectionAddress(DataRefImpl Sec,
+                                   uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Res = Sect.Address;
   } else {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
-      Sec.d.b * sizeof(macho::Section);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
-    const macho::Section *sec =
-      reinterpret_cast<const macho::Section*>(Data.data());
-    Res = parseSegmentOrSectionName(sec->SegmentName);
+    macho::Section Sect = getSection(Sec);
+    Res = Sect.Address;
   }
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
-                                              uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Address;
+error_code
+MachOObjectFile::getSectionSize(DataRefImpl Sec,
+                                uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Res = Sect.Size;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Address;
+    macho::Section Sect = getSection(Sec);
+    Res = Sect.Size;
   }
-  return object_error::success;
-}
 
-error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
-                                           uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Size;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Size;
-  }
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
-                                               StringRef &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+error_code
+MachOObjectFile::getSectionContents(DataRefImpl Sec,
+                                    StringRef &Res) const {
+  uint32_t Offset;
+  uint64_t Size;
+
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Offset = Sect.Offset;
+    Size = Sect.Size;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+    macho::Section Sect =getSection(Sec);
+    Offset = Sect.Offset;
+    Size = Sect.Size;
   }
+
+  Res = this->getData().substr(Offset, Size);
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
-                                                uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = uint64_t(1) << Sect->Align;
+error_code
+MachOObjectFile::getSectionAlignment(DataRefImpl Sec,
+                                         uint64_t &Res) const {
+  uint32_t Align;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Align = Sect.Align;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = uint64_t(1) << Sect->Align;
+    macho::Section Sect = getSection(Sec);
+    Align = Sect.Align;
   }
+
+  Res = uint64_t(1) << Align;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
-                                          bool &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Flags & macho::SF_PureInstructions;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Flags & macho::SF_PureInstructions;
-  }
+error_code
+MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const {
+  uint32_t Flags = getSectionFlags(this, Sec);
+  Res = Flags & macho::SF_PureInstructions;
   return object_error::success;
 }
 
 error_code MachOObjectFile::isSectionData(DataRefImpl DRI,
-                                          bool &Result) const {
+                                              bool &Result) const {
   // FIXME: Unimplemented.
   Result = false;
   return object_error::success;
 }
 
 error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI,
-                                         bool &Result) const {
+                                             bool &Result) const {
   // FIXME: Unimplemented.
   Result = false;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
-                                                          bool &Result) const {
+error_code
+MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+                                                   bool &Result) const {
   // FIXME: Unimplemented.
   Result = true;
   return object_error::success;
 }
 
 error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
-                                             bool &Result) const {
+                                                 bool &Result) const {
   // FIXME: Unimplemented.
   Result = false;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI,
-                                              bool &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
-    Result = (SectionType == MachO::SectionTypeZeroFill ||
-              SectionType == MachO::SectionTypeZeroFillLarge);
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
-    Result = (SectionType == MachO::SectionTypeZeroFill ||
-              SectionType == MachO::SectionTypeZeroFillLarge);
-  }
-
+error_code
+MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const {
+  uint32_t Flags = getSectionFlags(this, Sec);
+  unsigned SectionType = Flags & MachO::SectionFlagMaskSectionType;
+  Res = SectionType == MachO::SectionTypeZeroFill ||
+    SectionType == MachO::SectionTypeZeroFillLarge;
   return object_error::success;
 }
 
 error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
-                                                  bool &Result) const {
+                                                      bool &Result) const {
   // Consider using the code from isSectionText to look for __const sections.
   // Alternately, emit S_ATTR_PURE_INSTRUCTIONS and/or S_ATTR_SOME_INSTRUCTIONS
   // to use section attributes to distinguish code from data.
@@ -653,11 +753,12 @@ error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
   return object_error::success;
 }
 
-error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
-                                                  DataRefImpl Symb,
-                                                  bool &Result) const {
+error_code
+MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+                                       DataRefImpl Symb,
+                                       bool &Result) const {
   SymbolRef::Type ST;
-  getSymbolType(Symb, ST);
+  this->getSymbolType(Symb, ST);
   if (ST == SymbolRef::ST_Unknown) {
     Result = false;
     return object_error::success;
@@ -668,164 +769,100 @@ error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
   getSectionSize(Sec, SectEnd);
   SectEnd += SectBegin;
 
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    uint64_t SymAddr= Entry->Value;
-    Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    uint64_t SymAddr= Entry->Value;
-    Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
-  }
+  uint64_t SymAddr;
+  getSymbolAddress(Symb, SymAddr);
+  Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
 
   return object_error::success;
 }
 
 relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
   DataRefImpl ret;
-  ret.d.b = getSectionIndex(Sec);
+  ret.d.b = Sec.d.a;
   return relocation_iterator(RelocationRef(ret, this));
 }
-relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
-  uint32_t last_reloc;
-  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sec, Sect);
-    last_reloc = Sect->NumRelocationTableEntries;
+
+relocation_iterator
+MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+  uint32_t LastReloc;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    LastReloc = Sect.NumRelocationTableEntries;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sec, Sect);
-    last_reloc = Sect->NumRelocationTableEntries;
+    macho::Section Sect = getSection(Sec);
+    LastReloc = Sect.NumRelocationTableEntries;
   }
-  DataRefImpl ret;
-  ret.d.a = last_reloc;
-  ret.d.b = getSectionIndex(Sec);
-  return relocation_iterator(RelocationRef(ret, this));
-}
 
-section_iterator MachOObjectFile::begin_sections() const {
-  DataRefImpl DRI;
-  moveToNextSection(DRI);
-  return section_iterator(SectionRef(DRI, this));
+  DataRefImpl Ret;
+  Ret.d.a = LastReloc;
+  Ret.d.b = Sec.d.a;
+  return relocation_iterator(RelocationRef(Ret, this));
 }
 
-section_iterator MachOObjectFile::end_sections() const {
-  DataRefImpl DRI;
-  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
-  return section_iterator(SectionRef(DRI, this));
-}
-
-/*===-- Relocations -------------------------------------------------------===*/
-
-void MachOObjectFile::
-getRelocation(DataRefImpl Rel,
-              InMemoryStruct<macho::RelocationEntry> &Res) const {
-  uint32_t relOffset;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sections[Rel.d.b], Sect);
-    relOffset = Sect->RelocationTableOffset;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sections[Rel.d.b], Sect);
-    relOffset = Sect->RelocationTableOffset;
-  }
-  MachOObj->ReadRelocationEntry(relOffset, Rel.d.a, Res);
-}
 error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
-                                              RelocationRef &Res) const {
+                                                  RelocationRef &Res) const {
   ++Rel.d.a;
   Res = RelocationRef(Rel, this);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
-                                                 uint64_t &Res) const {
-  const uint8_t* sectAddress = 0;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sections[Rel.d.b], Sect);
-    sectAddress += Sect->Address;
+
+error_code
+MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
+                                      uint64_t &Res) const {
+  uint64_t SectAddress;
+  DataRefImpl Sec;
+  Sec.d.a = Rel.d.b;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    SectAddress = Sect.Address;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sections[Rel.d.b], Sect);
-    sectAddress += Sect->Address;
+    macho::Section Sect = getSection(Sec);
+    SectAddress = Sect.Address;
   }
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  uint64_t RelAddr = 0;
-  if (isScattered)
-    RelAddr = RE->Word0 & 0xFFFFFF;
-  else
-    RelAddr = RE->Word0;
 
-  Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
+  macho::RelocationEntry RE = getRelocation(Rel);
+  uint64_t RelAddr = getAnyRelocationAddress(RE);
+  Res = SectAddress + RelAddr;
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
-                                                uint64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
 
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  if (isScattered)
-    Res = RE->Word0 & 0xFFFFFF;
-  else
-    Res = RE->Word0;
+error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
+                                                    uint64_t &Res) const {
+  macho::RelocationEntry RE = getRelocation(Rel);
+  Res = getAnyRelocationAddress(RE);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
-                                                SymbolRef &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  uint32_t SymbolIdx = RE->Word1 & 0xffffff;
-  bool isExtern = (RE->Word1 >> 27) & 1;
+
+error_code
+MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
+                                         SymbolRef &Res) const {
+  macho::RelocationEntry RE = getRelocation(Rel);
+  uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE);
+  bool isExtern = getPlainRelocationExternal(RE);
 
   DataRefImpl Sym;
-  moveToNextSymbol(Sym);
   if (isExtern) {
-    for (unsigned i = 0; i < SymbolIdx; i++) {
-      Sym.d.b++;
-      moveToNextSymbol(Sym);
-      assert(Sym.d.a < MachOObj->getHeader().NumLoadCommands &&
-             "Relocation symbol index out of range!");
-    }
+    Sym.d.b = SymbolIdx;
   }
   Res = SymbolRef(Sym, this);
   return object_error::success;
 }
+
 error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
-                                              uint64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  Res = RE->Word0;
-  Res <<= 32;
-  Res |= RE->Word1;
+                                                  uint64_t &Res) const {
+  macho::RelocationEntry RE = getRelocation(Rel);
+  Res = getAnyRelocationType(RE);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
-                                          SmallVectorImpl<char> &Result) const {
-  // TODO: Support scattered relocations.
-  StringRef res;
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
 
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
+error_code
+MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
+                                       SmallVectorImpl<char> &Result) const {
+  StringRef res;
+  uint64_t RType;
+  getRelocationType(Rel, RType);
 
-  unsigned r_type;
-  if (isScattered)
-    r_type = (RE->Word0 >> 24) & 0xF;
-  else
-    r_type = (RE->Word1 >> 28) & 0xF;
+  unsigned Arch = this->getArch();
 
   switch (Arch) {
     case Triple::x86: {
@@ -837,10 +874,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "GENERIC_RELOC_LOCAL_SECTDIFF",
         "GENERIC_RELOC_TLV" };
 
-      if (r_type > 6)
+      if (RType > 6)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::x86_64: {
@@ -856,10 +893,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "X86_64_RELOC_SIGNED_4",
         "X86_64_RELOC_TLV" };
 
-      if (r_type > 9)
+      if (RType > 9)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::arm: {
@@ -875,10 +912,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "ARM_RELOC_HALF",
         "ARM_RELOC_HALF_SECTDIFF" };
 
-      if (r_type > 9)
+      if (RType > 9)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::ppc: {
@@ -900,7 +937,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "PPC_RELOC_LO14_SECTDIFF",
         "PPC_RELOC_LOCAL_SECTDIFF" };
 
-      res = Table[r_type];
+      res = Table[RType];
       break;
     }
     case Triple::UnknownArch:
@@ -910,193 +947,79 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
   Result.append(res.begin(), res.end());
   return object_error::success;
 }
+
 error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
                                                         int64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  bool isExtern = (RE->Word1 >> 27) & 1;
   Res = 0;
-  if (!isExtern) {
-    const uint8_t* sectAddress = base();
-    if (MachOObj->is64Bit()) {
-      InMemoryStruct<macho::Section64> Sect;
-      getSection64(Sections[Rel.d.b], Sect);
-      sectAddress += Sect->Offset;
-    } else {
-      InMemoryStruct<macho::Section> Sect;
-      getSection(Sections[Rel.d.b], Sect);
-      sectAddress += Sect->Offset;
-    }
-    Res = reinterpret_cast<uintptr_t>(sectAddress);
-  }
   return object_error::success;
 }
 
-// Helper to advance a section or symbol iterator multiple increments at a time.
-template<class T>
-error_code advance(T &it, size_t Val) {
-  error_code ec;
-  while (Val--) {
-    it.increment(ec);
-  }
-  return ec;
-}
-
-template<class T>
-void advanceTo(T &it, size_t Val) {
-  if (error_code ec = advance(it, Val))
-    report_fatal_error(ec.message());
-}
-
-void MachOObjectFile::printRelocationTargetName(
-                                     InMemoryStruct<macho::RelocationEntry>& RE,
-                                     raw_string_ostream &fmt) const {
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-
-  // Target of a scattered relocation is an address.  In the interest of
-  // generating pretty output, scan through the symbol table looking for a
-  // symbol that aligns with that address.  If we find one, print it.
-  // Otherwise, we just print the hex address of the target.
-  if (isScattered) {
-    uint32_t Val = RE->Word1;
-
-    error_code ec;
-    for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE;
-        SI.increment(ec)) {
-      if (ec) report_fatal_error(ec.message());
-
-      uint64_t Addr;
-      StringRef Name;
-
-      if ((ec = SI->getAddress(Addr)))
-        report_fatal_error(ec.message());
-      if (Addr != Val) continue;
-      if ((ec = SI->getName(Name)))
-        report_fatal_error(ec.message());
-      fmt << Name;
-      return;
-    }
-
-    // If we couldn't find a symbol that this relocation refers to, try
-    // to find a section beginning instead.
-    for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE;
-         SI.increment(ec)) {
-      if (ec) report_fatal_error(ec.message());
-
-      uint64_t Addr;
-      StringRef Name;
-
-      if ((ec = SI->getAddress(Addr)))
-        report_fatal_error(ec.message());
-      if (Addr != Val) continue;
-      if ((ec = SI->getName(Name)))
-        report_fatal_error(ec.message());
-      fmt << Name;
-      return;
-    }
-
-    fmt << format("0x%x", Val);
-    return;
-  }
-
-  StringRef S;
-  bool isExtern = (RE->Word1 >> 27) & 1;
-  uint32_t Val = RE->Word1 & 0xFFFFFF;
-
-  if (isExtern) {
-    symbol_iterator SI = begin_symbols();
-    advanceTo(SI, Val);
-    SI->getName(S);
-  } else {
-    section_iterator SI = begin_sections();
-    advanceTo(SI, Val);
-    SI->getName(S);
-  }
-
-  fmt << S;
-}
-
-error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
-                                          SmallVectorImpl<char> &Result) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
+error_code
+MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
+                                        SmallVectorImpl<char> &Result) const {
+  macho::RelocationEntry RE = getRelocation(Rel);
 
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
+  unsigned Arch = this->getArch();
 
   std::string fmtbuf;
   raw_string_ostream fmt(fmtbuf);
-
-  unsigned Type;
-  if (isScattered)
-    Type = (RE->Word0 >> 24) & 0xF;
-  else
-    Type = (RE->Word1 >> 28) & 0xF;
-
-  bool isPCRel;
-  if (isScattered)
-    isPCRel = ((RE->Word0 >> 30) & 1);
-  else
-    isPCRel = ((RE->Word1 >> 24) & 1);
+  unsigned Type = this->getAnyRelocationType(RE);
+  bool IsPCRel = this->getAnyRelocationPCRel(RE);
 
   // Determine any addends that should be displayed with the relocation.
   // These require decoding the relocation type, which is triple-specific.
 
   // X86_64 has entirely custom relocation types.
   if (Arch == Triple::x86_64) {
-    bool isPCRel = ((RE->Word1 >> 24) & 1);
+    bool isPCRel = getAnyRelocationPCRel(RE);
 
     switch (Type) {
       case macho::RIT_X86_64_GOTLoad:   // X86_64_RELOC_GOT_LOAD
       case macho::RIT_X86_64_GOT: {     // X86_64_RELOC_GOT
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "@GOT";
         if (isPCRel) fmt << "PCREL";
         break;
       }
       case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR
-        InMemoryStruct<macho::RelocationEntry> RENext;
         DataRefImpl RelNext = Rel;
         RelNext.d.a++;
-        getRelocation(RelNext, RENext);
+        macho::RelocationEntry RENext = getRelocation(RelNext);
 
         // X86_64_SUBTRACTOR must be followed by a relocation of type
         // X86_64_RELOC_UNSIGNED.
         // NOTE: Scattered relocations don't exist on x86_64.
-        unsigned RType = (RENext->Word1 >> 28) & 0xF;
+        unsigned RType = getAnyRelocationType(RENext);
         if (RType != 0)
           report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
                              "X86_64_RELOC_SUBTRACTOR.");
 
         // The X86_64_RELOC_UNSIGNED contains the minuend symbol,
         // X86_64_SUBTRACTOR contains to the subtrahend.
-        printRelocationTargetName(RENext, fmt);
+        printRelocationTargetName(this, RENext, fmt);
         fmt << "-";
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         break;
       }
       case macho::RIT_X86_64_TLV:
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "@TLV";
         if (isPCRel) fmt << "P";
         break;
       case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-1";
         break;
       case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-2";
         break;
       case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-4";
         break;
       default:
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         break;
     }
   // X86 and ARM share some relocation types in common.
@@ -1106,27 +1029,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
       case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info
         return object_error::success;
       case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF
-        InMemoryStruct<macho::RelocationEntry> RENext;
         DataRefImpl RelNext = Rel;
         RelNext.d.a++;
-        getRelocation(RelNext, RENext);
+        macho::RelocationEntry RENext = getRelocation(RelNext);
 
         // X86 sect diff's must be followed by a relocation of type
         // GENERIC_RELOC_PAIR.
-        bool isNextScattered = (Arch != Triple::x86_64) &&
-                               (RENext->Word0 & macho::RF_Scattered);
-        unsigned RType;
-        if (isNextScattered)
-          RType = (RENext->Word0 >> 24) & 0xF;
-        else
-          RType = (RENext->Word1 >> 28) & 0xF;
+        unsigned RType = getAnyRelocationType(RENext);
+
         if (RType != 1)
           report_fatal_error("Expected GENERIC_RELOC_PAIR after "
                              "GENERIC_RELOC_SECTDIFF.");
 
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-";
-        printRelocationTargetName(RENext, fmt);
+        printRelocationTargetName(this, RENext, fmt);
         break;
       }
     }
@@ -1136,37 +1053,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
       // handled in the generic code.
       switch (Type) {
         case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF
-          InMemoryStruct<macho::RelocationEntry> RENext;
           DataRefImpl RelNext = Rel;
           RelNext.d.a++;
-          getRelocation(RelNext, RENext);
+          macho::RelocationEntry RENext = getRelocation(RelNext);
 
           // X86 sect diff's must be followed by a relocation of type
           // GENERIC_RELOC_PAIR.
-          bool isNextScattered = (Arch != Triple::x86_64) &&
-                               (RENext->Word0 & macho::RF_Scattered);
-          unsigned RType;
-          if (isNextScattered)
-            RType = (RENext->Word0 >> 24) & 0xF;
-          else
-            RType = (RENext->Word1 >> 28) & 0xF;
+          unsigned RType = getAnyRelocationType(RENext);
           if (RType != 1)
             report_fatal_error("Expected GENERIC_RELOC_PAIR after "
                                "GENERIC_RELOC_LOCAL_SECTDIFF.");
 
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
           fmt << "-";
-          printRelocationTargetName(RENext, fmt);
+          printRelocationTargetName(this, RENext, fmt);
           break;
         }
         case macho::RIT_Generic_TLV: {
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
           fmt << "@TLV";
-          if (isPCRel) fmt << "P";
+          if (IsPCRel) fmt << "P";
           break;
         }
         default:
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
       }
     } else { // ARM-specific relocations
       switch (Type) {
@@ -1174,33 +1084,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
         case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF
           // Half relocations steal a bit from the length field to encode
           // whether this is an upper16 or a lower16 relocation.
-          bool isUpper;
-          if (isScattered)
-            isUpper = (RE->Word0 >> 28) & 1;
-          else
-            isUpper = (RE->Word1 >> 25) & 1;
+          bool isUpper = getAnyRelocationLength(RE) >> 1;
 
           if (isUpper)
             fmt << ":upper16:(";
           else
             fmt << ":lower16:(";
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
 
-          InMemoryStruct<macho::RelocationEntry> RENext;
           DataRefImpl RelNext = Rel;
           RelNext.d.a++;
-          getRelocation(RelNext, RENext);
+          macho::RelocationEntry RENext = getRelocation(RelNext);
 
           // ARM half relocs must be followed by a relocation of type
           // ARM_RELOC_PAIR.
-          bool isNextScattered = (Arch != Triple::x86_64) &&
-                                 (RENext->Word0 & macho::RF_Scattered);
-          unsigned RType;
-          if (isNextScattered)
-            RType = (RENext->Word0 >> 24) & 0xF;
-          else
-            RType = (RENext->Word1 >> 28) & 0xF;
-
+          unsigned RType = getAnyRelocationType(RENext);
           if (RType != 1)
             report_fatal_error("Expected ARM_RELOC_PAIR after "
                                "GENERIC_RELOC_HALF");
@@ -1214,38 +1112,31 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
           // symbol/section pointer of the follow-on relocation.
           if (Type == macho::RIT_ARM_HalfDifference) {
             fmt << "-";
-            printRelocationTargetName(RENext, fmt);
+            printRelocationTargetName(this, RENext, fmt);
           }
 
           fmt << ")";
           break;
         }
         default: {
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
         }
       }
     }
   } else
-    printRelocationTargetName(RE, fmt);
+    printRelocationTargetName(this, RE, fmt);
 
   fmt.flush();
   Result.append(fmtbuf.begin(), fmtbuf.end());
   return object_error::success;
 }
 
-error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
-                                                bool &Result) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-
+error_code
+MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
+                                     bool &Result) const {
   unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  unsigned Type;
-  if (isScattered)
-    Type = (RE->Word0 >> 24) & 0xF;
-  else
-    Type = (RE->Word1 >> 28) & 0xF;
+  uint64_t Type;
+  getRelocationType(Rel, Type);
 
   Result = false;
 
@@ -1259,12 +1150,10 @@ error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
     if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) {
       DataRefImpl RelPrev = Rel;
       RelPrev.d.a--;
-      InMemoryStruct<macho::RelocationEntry> REPrev;
-      getRelocation(RelPrev, REPrev);
-
-      unsigned PrevType = (REPrev->Word1 >> 28) & 0xF;
-
-      if (PrevType == macho::RIT_X86_64_Subtractor) Result = true;
+      uint64_t PrevType;
+      getRelocationType(RelPrev, PrevType);
+      if (PrevType == macho::RIT_X86_64_Subtractor)
+        Result = true;
     }
   }
 
@@ -1272,25 +1161,69 @@ error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
 }
 
 error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData,
-                                           LibraryRef &Res) const {
+                                               LibraryRef &Res) const {
   report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
 }
 
 error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
-                                           StringRef &Res) const {
+                                               StringRef &Res) const {
   report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
 }
 
+symbol_iterator MachOObjectFile::begin_symbols() const {
+  // DRI.d.a = segment number; DRI.d.b = symbol index.
+  DataRefImpl DRI;
+  return symbol_iterator(SymbolRef(DRI, this));
+}
 
-/*===-- Miscellaneous -----------------------------------------------------===*/
+symbol_iterator MachOObjectFile::end_symbols() const {
+  DataRefImpl DRI;
+  if (SymtabLoadCmd) {
+    macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+    DRI.d.b = Symtab.NumSymbolTableEntries;
+  }
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+section_iterator MachOObjectFile::begin_sections() const {
+  DataRefImpl DRI;
+  return section_iterator(SectionRef(DRI, this));
+}
+
+section_iterator MachOObjectFile::end_sections() const {
+  DataRefImpl DRI;
+  DRI.d.a = Sections.size();
+  return section_iterator(SectionRef(DRI, this));
+}
+
+library_iterator MachOObjectFile::begin_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::end_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
 
 uint8_t MachOObjectFile::getBytesInAddress() const {
-  return MachOObj->is64Bit() ? 8 : 4;
+  return is64Bit() ? 8 : 4;
 }
 
 StringRef MachOObjectFile::getFileFormatName() const {
-  if (!MachOObj->is64Bit()) {
-    switch (MachOObj->getHeader().CPUType) {
+  unsigned CPUType = getCPUType(this);
+  if (!is64Bit()) {
+    switch (CPUType) {
     case llvm::MachO::CPUTypeI386:
       return "Mach-O 32-bit i386";
     case llvm::MachO::CPUTypeARM:
@@ -1298,18 +1231,18 @@ StringRef MachOObjectFile::getFileFormatName() const {
     case llvm::MachO::CPUTypePowerPC:
       return "Mach-O 32-bit ppc";
     default:
-      assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+      assert((CPUType & llvm::MachO::CPUArchABI64) == 0 &&
              "64-bit object file when we're not 64-bit?");
       return "Mach-O 32-bit unknown";
     }
   }
 
   // Make sure the cpu type has the correct mask.
-  assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64)
+  assert((CPUType & llvm::MachO::CPUArchABI64)
 	 == llvm::MachO::CPUArchABI64 &&
 	 "32-bit object file when we're 64-bit?");
 
-  switch (MachOObj->getHeader().CPUType) {
+  switch (CPUType) {
   case llvm::MachO::CPUTypeX86_64:
     return "Mach-O 64-bit x86-64";
   case llvm::MachO::CPUTypePowerPC64:
@@ -1320,7 +1253,7 @@ StringRef MachOObjectFile::getFileFormatName() const {
 }
 
 unsigned MachOObjectFile::getArch() const {
-  switch (MachOObj->getHeader().CPUType) {
+  switch (getCPUType(this)) {
   case llvm::MachO::CPUTypeI386:
     return Triple::x86;
   case llvm::MachO::CPUTypeX86_64:
@@ -1336,5 +1269,194 @@ unsigned MachOObjectFile::getArch() const {
   }
 }
 
+StringRef MachOObjectFile::getLoadName() const {
+  // TODO: Implement
+  report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
+}
+
+StringRef
+MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
+  ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
+  return parseSegmentOrSectionName(Raw.data());
+}
+
+ArrayRef<char>
+MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
+  const SectionBase *Base =
+    reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
+  return ArrayRef<char>(Base->Name);
+}
+
+ArrayRef<char>
+MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
+  const SectionBase *Base =
+    reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
+  return ArrayRef<char>(Base->SegmentName);
+}
+
+bool
+MachOObjectFile::isRelocationScattered(const macho::RelocationEntry &RE)
+  const {
+  if (getCPUType(this) == llvm::MachO::CPUTypeX86_64)
+    return false;
+  return getPlainRelocationAddress(RE) & macho::RF_Scattered;
+}
+
+unsigned MachOObjectFile::getPlainRelocationSymbolNum(const macho::RelocationEntry &RE) const {
+  if (isLittleEndian())
+    return RE.Word1 & 0xffffff;
+  return RE.Word1 >> 8;
+}
+
+bool MachOObjectFile::getPlainRelocationExternal(const macho::RelocationEntry &RE) const {
+  if (isLittleEndian())
+    return (RE.Word1 >> 27) & 1;
+  return (RE.Word1 >> 4) & 1;
+}
+
+bool
+MachOObjectFile::getScatteredRelocationScattered(const macho::RelocationEntry &RE) const {
+  return RE.Word0 >> 31;
+}
+
+uint32_t
+MachOObjectFile::getScatteredRelocationValue(const macho::RelocationEntry &RE) const {
+  return RE.Word1;
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationAddress(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationAddress(RE);
+  return getPlainRelocationAddress(RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationPCRel(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationPCRel(this, RE);
+  return getPlainRelocationPCRel(this, RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationLength(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationLength(RE);
+  return getPlainRelocationLength(this, RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationType(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationType(RE);
+  return getPlainRelocationType(this, RE);
+}
+
+MachOObjectFile::LoadCommandInfo
+MachOObjectFile::getFirstLoadCommandInfo() const {
+  MachOObjectFile::LoadCommandInfo Load;
+
+  unsigned HeaderSize = is64Bit() ? macho::Header64Size : macho::Header32Size;
+  Load.Ptr = getPtr(this, HeaderSize);
+  Load.C = getStruct<macho::LoadCommand>(this, Load.Ptr);
+  return Load;
+}
+
+MachOObjectFile::LoadCommandInfo
+MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const {
+  MachOObjectFile::LoadCommandInfo Next;
+  Next.Ptr = L.Ptr + L.C.Size;
+  Next.C = getStruct<macho::LoadCommand>(this, Next.Ptr);
+  return Next;
+}
+
+macho::Section MachOObjectFile::getSection(DataRefImpl DRI) const {
+  return getStruct<macho::Section>(this, Sections[DRI.d.a]);
+}
+
+macho::Section64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
+  return getStruct<macho::Section64>(this, Sections[DRI.d.a]);
+}
+
+macho::SymbolTableEntry
+MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const {
+  const char *P = getSymbolTableEntryPtr(this, DRI);
+  return getStruct<macho::SymbolTableEntry>(this, P);
+}
+
+macho::Symbol64TableEntry
+MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const {
+  const char *P = getSymbolTableEntryPtr(this, DRI);
+  return getStruct<macho::Symbol64TableEntry>(this, P);
+}
+
+macho::LinkeditDataLoadCommand
+MachOObjectFile::getLinkeditDataLoadCommand(const MachOObjectFile::LoadCommandInfo &L) const {
+  return getStruct<macho::LinkeditDataLoadCommand>(this, L.Ptr);
+}
+
+macho::RelocationEntry
+MachOObjectFile::getRelocation(DataRefImpl Rel) const {
+  uint32_t RelOffset;
+  DataRefImpl Sec;
+  Sec.d.a = Rel.d.b;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    RelOffset = Sect.RelocationTableOffset;
+  } else {
+    macho::Section Sect = getSection(Sec);
+    RelOffset = Sect.RelocationTableOffset;
+  }
+
+  uint64_t Offset = RelOffset + Rel.d.a * sizeof(macho::RelocationEntry);
+  return getStruct<macho::RelocationEntry>(this, getPtr(this, Offset));
+}
+
+macho::Header MachOObjectFile::getHeader() const {
+  return getStruct<macho::Header>(this, getPtr(this, 0));
+}
+
+macho::SymtabLoadCommand
+MachOObjectFile::getSymtabLoadCommand() const {
+  return getStruct<macho::SymtabLoadCommand>(this, SymtabLoadCmd);
+}
+
+bool MachOObjectFile::is64Bit() const {
+  return getType() == getMachOType(false, true) ||
+    getType() == getMachOType(true, true);
+}
+
+void MachOObjectFile::ReadULEB128s(uint64_t Index,
+                                   SmallVectorImpl<uint64_t> &Out) const {
+  DataExtractor extractor(ObjectFile::getData(), true, 0);
+
+  uint32_t offset = Index;
+  uint64_t data = 0;
+  while (uint64_t delta = extractor.getULEB128(&offset)) {
+    data += delta;
+    Out.push_back(data);
+  }
+}
+
+ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  StringRef Magic = Buffer->getBuffer().slice(0, 4);
+  error_code ec;
+  ObjectFile *Ret;
+  if (Magic == "\xFE\xED\xFA\xCE")
+    Ret = new MachOObjectFile(Buffer, false, false, ec);
+  else if (Magic == "\xCE\xFA\xED\xFE")
+    Ret = new MachOObjectFile(Buffer, true, false, ec);
+  else if (Magic == "\xFE\xED\xFA\xCF")
+    Ret = new MachOObjectFile(Buffer, false, true, ec);
+  else if (Magic == "\xCF\xFA\xED\xFE")
+    Ret = new MachOObjectFile(Buffer, true, true, ec);
+  else
+    return NULL;
+
+  if (ec)
+    return NULL;
+  return Ret;
+}
+
 } // end namespace object
 } // end namespace llvm
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 860c87be9846..518959aa059d 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -23,7 +23,7 @@ using namespace object;
 
 void ObjectFile::anchor() { }
 
-ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source)
   : Binary(Type, source) {
 }
 
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 5b68fbb2705c..6182e3415005 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -3311,10 +3311,8 @@ namespace {
 
     significand = significand.udiv(divisor);
 
-    // Truncate the significand down to its active bit count, but
-    // don't try to drop below 32.
-    unsigned newPrecision = std::max(32U, significand.getActiveBits());
-    significand = significand.trunc(newPrecision);
+    // Truncate the significand down to its active bit count.
+    significand = significand.trunc(significand.getActiveBits());
   }
 
 
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 07cb057b4895..e8534753b46e 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -559,12 +559,12 @@ bool APInt::slt(const APInt& RHS) const {
   if (lhsNeg) {
     // Sign bit is set so perform two's complement to make it positive
     lhs.flipAllBits();
-    lhs++;
+    ++lhs;
   }
   if (rhsNeg) {
     // Sign bit is set so perform two's complement to make it positive
     rhs.flipAllBits();
-    rhs++;
+    ++rhs;
   }
 
   // Now we have unsigned values to compare so do the comparison if necessary
@@ -2116,7 +2116,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
   }
   // If its negative, put it in two's complement form
   if (isNeg) {
-    (*this)--;
+    --(*this);
     this->flipAllBits();
   }
 }
@@ -2197,7 +2197,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     // Flip the bits and add one to turn it into the equivalent positive
     // value and put a '-' in the result.
     Tmp.flipAllBits();
-    Tmp++;
+    ++Tmp;
     Str.push_back('-');
   }
 
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 5ba69fc3c8c8..3746a810114f 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -83,6 +83,7 @@ add_llvm_library(LLVMSupport
   Threading.cpp
   TimeValue.cpp
   Valgrind.cpp
+  Watchdog.cpp
   Unix/Host.inc
   Unix/Memory.inc
   Unix/Mutex.inc
@@ -95,6 +96,7 @@ add_llvm_library(LLVMSupport
   Unix/system_error.inc
   Unix/ThreadLocal.inc
   Unix/TimeValue.inc
+  Unix/Watchdog.inc
   Windows/DynamicLibrary.inc
   Windows/Host.inc
   Windows/Memory.inc
@@ -108,4 +110,5 @@ add_llvm_library(LLVMSupport
   Windows/system_error.inc
   Windows/ThreadLocal.inc
   Windows/TimeValue.inc
+  Windows/Watchdog.inc
   )
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index 3d5cce05358c..a564d211b1d0 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -20,7 +20,7 @@ static T getU(uint32_t *offset_ptr, const DataExtractor *de,
   uint32_t offset = *offset_ptr;
   if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) {
     std::memcpy(&val, &Data[offset], sizeof(val));
-    if (sys::isLittleEndianHost() != isLittleEndian)
+    if (sys::IsLittleEndianHost != isLittleEndian)
       val = sys::SwapByteOrder(val);
 
     // Advance the offset
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index d4382e54e071..f4b591e777eb 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -49,21 +49,21 @@ void llvm::remove_fatal_error_handler() {
   ErrorHandler = 0;
 }
 
-void llvm::report_fatal_error(const char *Reason) {
-  report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) {
+  report_fatal_error(Twine(Reason), GenCrashDiag);
 }
 
-void llvm::report_fatal_error(const std::string &Reason) {
-  report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(const std::string &Reason, bool GenCrashDiag) {
+  report_fatal_error(Twine(Reason), GenCrashDiag);
 }
 
-void llvm::report_fatal_error(StringRef Reason) {
-  report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) {
+  report_fatal_error(Twine(Reason), GenCrashDiag);
 }
 
-void llvm::report_fatal_error(const Twine &Reason) {
+void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
   if (ErrorHandler) {
-    ErrorHandler(ErrorHandlerUserData, Reason.str());
+    ErrorHandler(ErrorHandlerUserData, Reason.str(), GenCrashDiag);
   } else {
     // Blast the result out to stderr.  We don't try hard to make sure this
     // succeeds (e.g. handling EINTR) and we can't use errs() here because
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 36e33b5aafa3..145f12dc1e5d 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -101,7 +101,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
     // Otherwise do it the hard way.
     // To be compatible with above bulk transfer, we need to take endianness
     // into account.
-    if (sys::isBigEndianHost()) {
+    if (sys::IsBigEndianHost) {
       for (Pos += 4; Pos <= Size; Pos += 4) {
         unsigned V = ((unsigned char)String[Pos - 4] << 24) |
                      ((unsigned char)String[Pos - 3] << 16) |
@@ -110,7 +110,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
         Bits.push_back(V);
       }
     } else {
-      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+      assert(sys::IsLittleEndianHost && "Unexpected host endianness");
       for (Pos += 4; Pos <= Size; Pos += 4) {
         unsigned V = ((unsigned char)String[Pos - 1] << 24) |
                      ((unsigned char)String[Pos - 2] << 16) |
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index b9bbcb9322bb..73d98d148746 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -112,6 +112,21 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
 #endif
 }
 
+static bool OSHasAVXSupport() {
+#if defined(__GNUC__)
+  // Check xgetbv; this uses a .byte sequence instead of the instruction 
+  // directly because older assemblers do not include support for xgetbv and 
+  // there is no easy way to conditionally compile based on the assembler used.
+  int rEAX, rEDX;
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
+  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+  int rEAX = 0; // Ensures we return false
+#endif
+  return (rEAX & 6) == 6;
+}
+
 static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
                                  unsigned &Model) {
   Family = (EAX >> 8) & 0xf; // Bits 8 - 11
@@ -134,6 +149,11 @@ std::string sys::getHostCPUName() {
   DetectX86FamilyModel(EAX, Family, Model);
 
   bool HasSSE3 = (ECX & 0x1);
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 
+  // indicates that the AVX registers will be saved and restored on context
+  // switch, then we have full AVX support.
+  const unsigned AVXBits = (1 << 27) | (1 << 28);
+  bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
   GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   bool Em64T = (EDX >> 29) & 0x1;
 
@@ -243,11 +263,15 @@ std::string sys::getHostCPUName() {
       case 42: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
       case 45:
-        return "corei7-avx";
+        // Not all Sandy Bridge processors support AVX (such as the Pentium
+        // versions instead of the i7 versions).
+        return HasAVX ? "corei7-avx" : "corei7";
 
       // Ivy Bridge:
       case 58:
-        return "core-avx-i";
+        // Not all Ivy Bridge processors support AVX (such as the Pentium
+        // versions instead of the i7 versions).
+        return HasAVX ? "core-avx-i" : "corei7";
 
       case 28: // Most 45 nm Intel Atom processors
       case 38: // 45 nm Atom Lincroft
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 92d8b83cf94e..2917e273bce7 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -174,8 +174,8 @@ void LockFileManager::waitForUnlock() {
   Interval.tv_sec = 0;
   Interval.tv_nsec = 1000000;
 #endif
-  // Don't wait more than an hour for the file to appear.
-  const unsigned MaxSeconds = 3600;
+  // Don't wait more than five minutes for the file to appear.
+  unsigned MaxSeconds = 300;
   bool LockFileGone = false;
   do {
     // Sleep for the designated interval, to allow the owning process time to
@@ -187,21 +187,48 @@ void LockFileManager::waitForUnlock() {
 #else
     nanosleep(&Interval, NULL);
 #endif
-    // If the lock file no longer exists, wait for the actual file.
     bool Exists = false;
+    bool LockFileJustDisappeared = false;
+
+    // If the lock file is still expected to be there, check whether it still
+    // is.
     if (!LockFileGone) {
       if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) {
         LockFileGone = true;
+        LockFileJustDisappeared = true;
         Exists = false;
       }
     }
+
+    // If the lock file is no longer there, check if the original file is
+    // available now.
     if (LockFileGone) {
-      if (!sys::fs::exists(FileName.str(), Exists) && Exists)
+      if (!sys::fs::exists(FileName.str(), Exists) && Exists) {
         return;
+      }
+
+      // The lock file is gone, so now we're waiting for the original file to
+      // show up. If this just happened, reset our waiting intervals and keep
+      // waiting.
+      if (LockFileJustDisappeared) {
+        MaxSeconds = 5;
+
+#if LLVM_ON_WIN32
+        Interval = 1;
+#else
+        Interval.tv_sec = 0;
+        Interval.tv_nsec = 1000000;
+#endif
+        continue;
+      }
     }
 
-    if (!processStillExecuting((*Owner).first, (*Owner).second))
+    // If we're looking for the lock file to disappear, but the process
+    // owning the lock died without cleaning up, just bail out.
+    if (!LockFileGone &&
+        !processStillExecuting((*Owner).first, (*Owner).second)) {
       return;
+    }
 
     // Exponentially increase the time we wait for the lock to be removed.
 #if LLVM_ON_WIN32
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 804223725397..7c5ab96a764a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -72,10 +72,12 @@ static void CopyStringRef(char *Memory, StringRef Data) {
   Memory[Data.size()] = 0; // Null terminate string.
 }
 
+namespace {
 struct NamedBufferAlloc {
   StringRef Name;
   NamedBufferAlloc(StringRef Name) : Name(Name) {}
 };
+}
 
 void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
   char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index 41add96194f6..ac53a9e9e6b4 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -18,6 +18,9 @@
 #include <cctype>
 #include <cstdio>
 #include <cstring>
+#ifdef __APPLE__
+#include <unistd.h>
+#endif
 
 namespace {
   using llvm::StringRef;
@@ -493,6 +496,27 @@ bool is_separator(char value) {
 void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
   result.clear();
 
+#ifdef __APPLE__
+  // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
+  int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
+                               : _CS_DARWIN_USER_CACHE_DIR;
+  size_t ConfLen = confstr(ConfName, 0, 0);
+  if (ConfLen > 0) {
+    do {
+      result.resize(ConfLen);
+      ConfLen = confstr(ConfName, result.data(), result.size());
+    } while (ConfLen > 0 && ConfLen != result.size());
+
+    if (ConfLen > 0) {
+      assert(result.back() == 0);
+      result.pop_back();
+      return;
+    }
+
+    result.clear();
+  }
+#endif
+
   // Check whether the temporary directory is specified by an environment
   // variable.
   const char *EnvironmentVariable;
@@ -765,8 +789,11 @@ file_magic identify_magic(StringRef magic) {
 
     case '\177':
       if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
-        if (magic.size() >= 18 && magic[17] == 0)
-          switch (magic[16]) {
+        bool Data2MSB = magic[5] == 2;
+        unsigned high = Data2MSB ? 16 : 17;
+        unsigned low  = Data2MSB ? 17 : 16;
+        if (magic.size() >= 18 && magic[high] == 0)
+          switch (magic[low]) {
             default: break;
             case 1: return file_magic::elf_relocatable;
             case 2: return file_magic::elf_executable;
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 21d56adb5e08..23ee5ab105ae 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/ThreadLocal.h"
+#include "llvm/Support/Watchdog.h"
 #include "llvm/Support/raw_ostream.h"
 
 #ifdef HAVE_CRASHREPORTERCLIENT_H
@@ -37,7 +38,10 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
   if (Entry->getNextEntry())
     NextID = PrintStack(Entry->getNextEntry(), OS);
   OS << NextID << ".\t";
-  Entry->print(OS);
+  {
+    sys::Watchdog W(5);
+    Entry->print(OS);
+  }
   
   return NextID+1;
 }
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 75bc282d9bd4..201d5c0d3056 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -29,12 +29,15 @@ Program::ExecuteAndWait(const Path& path,
                         const Path** redirects,
                         unsigned secondsToWait,
                         unsigned memoryLimit,
-                        std::string* ErrMsg) {
+                        std::string* ErrMsg,
+                        bool *ExecutionFailed) {
   Program prg;
-  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
+  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg)) {
+    if (ExecutionFailed) *ExecutionFailed = false;
     return prg.Wait(path, secondsToWait, ErrMsg);
-  else
-    return -1;
+  }
+  if (ExecutionFailed) *ExecutionFailed = true;
+  return -1;
 }
 
 void
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 3b53e9ff49fe..f0fed7792ce6 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -29,13 +29,9 @@ void SmallPtrSetImpl::shrink_and_clear() {
   NumElements = NumTombstones = 0;
 
   // Install the new array.  Clear all the buckets to empty.
-  CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1));
+  CurArray = (const void**)malloc(sizeof(void*) * CurArraySize);
   assert(CurArray && "Failed to allocate memory?");
   memset(CurArray, -1, CurArraySize*sizeof(void*));
-  
-  // The end pointer, always valid, is set to a valid element to help the
-  // iterator.
-  CurArray[CurArraySize] = 0;
 }
 
 bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
@@ -139,15 +135,11 @@ void SmallPtrSetImpl::Grow(unsigned NewSize) {
   bool WasSmall = isSmall();
   
   // Install the new array.  Clear all the buckets to empty.
-  CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1));
+  CurArray = (const void**)malloc(sizeof(void*) * NewSize);
   assert(CurArray && "Failed to allocate memory?");
   CurArraySize = NewSize;
   memset(CurArray, -1, NewSize*sizeof(void*));
   
-  // The end pointer, always valid, is set to a valid element to help the
-  // iterator.
-  CurArray[NewSize] = 0;
-  
   // Copy over all the elements.
   if (WasSmall) {
     // Small sets store their elements in order.
@@ -180,7 +172,7 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
     CurArray = SmallArray;
   // Otherwise, allocate new heap space (unless we were the same size)
   } else {
-    CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1));
+    CurArray = (const void**)malloc(sizeof(void*) * that.CurArraySize);
     assert(CurArray && "Failed to allocate memory?");
   }
   
@@ -188,7 +180,7 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
   CurArraySize = that.CurArraySize;
 
   // Copy over the contents from the other set
-  memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1));
+  memcpy(CurArray, that.CurArray, sizeof(void*)*CurArraySize);
   
   NumElements = that.NumElements;
   NumTombstones = that.NumTombstones;
@@ -200,7 +192,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
   if (isSmall() && RHS.isSmall())
     assert(CurArraySize == RHS.CurArraySize &&
            "Cannot assign sets with different small sizes");
-           
+
   // If we're becoming small, prepare to insert into our stack space
   if (RHS.isSmall()) {
     if (!isSmall())
@@ -209,9 +201,9 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
   // Otherwise, allocate new heap space (unless we were the same size)
   } else if (CurArraySize != RHS.CurArraySize) {
     if (isSmall())
-      CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1));
+      CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize);
     else
-      CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1));
+      CurArray = (const void**)realloc(CurArray, sizeof(void*)*RHS.CurArraySize);
     assert(CurArray && "Failed to allocate memory?");
   }
   
@@ -219,7 +211,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
   CurArraySize = RHS.CurArraySize;
 
   // Copy over the contents from the other set
-  memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1));
+  memcpy(CurArray, RHS.CurArray, sizeof(void*)*CurArraySize);
   
   NumElements = RHS.NumElements;
   NumTombstones = RHS.NumTombstones;
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index a3dfd4b0a32d..7e0aead151b1 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -430,9 +430,7 @@ rety_open_create:
     if (SavedErrno == errc::file_exists)
       goto retry_random_path;
     // If path prefix doesn't exist, try to create it.
-    if (SavedErrno == errc::no_such_file_or_directory &&
-        !exists(path::parent_path(RandomPath)) &&
-        !TriedToCreateParent) {
+    if (SavedErrno == errc::no_such_file_or_directory && !TriedToCreateParent) {
       TriedToCreateParent = true;
       StringRef p(RandomPath);
       SmallString<64> dir_to_create;
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 117151c91d8b..aa03d48438ec 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -32,6 +32,9 @@
 #if HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #ifdef HAVE_POSIX_SPAWN
 #include <spawn.h>
 #if !defined(__APPLE__)
@@ -409,4 +412,25 @@ error_code Program::ChangeStderrToBinary(){
   return make_error_code(errc::success);
 }
 
+bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+  static long ArgMax = sysconf(_SC_ARG_MAX);
+
+  // System says no practical limit.
+  if (ArgMax == -1)
+    return true;
+
+  // Conservatively account for space required by environment variables.
+  ArgMax /= 2;
+
+  size_t ArgLength = 0;
+  for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
+       I != E; ++I) {
+    ArgLength += strlen(*I) + 1;
+    if (ArgLength > size_t(ArgMax)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }
diff --git a/lib/Support/Unix/Watchdog.inc b/lib/Support/Unix/Watchdog.inc
new file mode 100644
index 000000000000..5d89c0e51b11
--- /dev/null
+++ b/lib/Support/Unix/Watchdog.inc
@@ -0,0 +1,32 @@
+//===--- Unix/Watchdog.inc - Unix Watchdog Implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+namespace llvm {
+  namespace sys {
+    Watchdog::Watchdog(unsigned int seconds) {
+#ifdef HAVE_UNISTD_H
+      alarm(seconds);
+#endif
+    }
+
+    Watchdog::~Watchdog() {
+#ifdef HAVE_UNISTD_H
+      alarm(0);
+#endif
+    }
+  }
+}
diff --git a/lib/Support/Watchdog.cpp b/lib/Support/Watchdog.cpp
new file mode 100644
index 000000000000..724aa001f16e
--- /dev/null
+++ b/lib/Support/Watchdog.cpp
@@ -0,0 +1,23 @@
+//===---- Watchdog.cpp - Implement Watchdog ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Watchdog.h"
+#include "llvm/Config/config.h"
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Watchdog.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Watchdog.inc"
+#endif
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 691d6d455501..994a09764e3b 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -396,4 +396,20 @@ error_code Program::ChangeStderrToBinary(){
   return make_error_code(errc::success);
 }
 
+bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+  // The documented max length of the command line passed to CreateProcess.
+  static const size_t MaxCommandStringLength = 32768;
+  size_t ArgLength = 0;
+  for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
+       I != E; ++I) {
+    // Account for the trailing space for every arg but the last one and the
+    // trailing NULL of the last argument.
+    ArgLength += ArgLenWithQuotes(*I) + 1;
+    if (ArgLength > MaxCommandStringLength) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 3dd6660b031d..b18b4d1dacac 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -178,6 +178,19 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
 
 #ifdef _MSC_VER
+/// AvoidMessageBoxHook - Emulates hitting "retry" from an "abort, retry,
+/// ignore" CRT debug report dialog.  "retry" raises an exception which
+/// ultimately triggers our stack dumper.
+static int AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
+  // Set *Return to the retry code for the return value of _CrtDbgReport:
+  // http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx
+  // This may also trigger just-in-time debugging via DebugBreak().
+  if (Return)
+    *Return = 1;
+  // Don't call _CrtDbgReport.
+  return TRUE;
+}
+
 /// CRTReportHook - Function called on a CRT debugging event.
 static int CRTReportHook(int ReportType, char *Message, int *Return) {
   // Don't cause a DebugBreak() on return.
@@ -238,6 +251,15 @@ static void RegisterHandler() {
   OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
   SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
 
+#ifdef _MSC_VER
+  const char *EnableMsgbox = getenv("LLVM_ENABLE_CRT_REPORT");
+  if (!EnableMsgbox || strcmp("0", EnableMsgbox) == 0) {
+    // Setting a report hook overrides the default behavior of popping an "abort,
+    // retry, or ignore" dialog.
+    _CrtSetReportHook(AvoidMessageBoxHook);
+  }
+#endif
+
   // Environment variable to disable any kind of crash dialog.
   if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
 #ifdef _MSC_VER
diff --git a/lib/Support/Windows/Watchdog.inc b/lib/Support/Windows/Watchdog.inc
new file mode 100644
index 000000000000..fab2bdf2a941
--- /dev/null
+++ b/lib/Support/Windows/Watchdog.inc
@@ -0,0 +1,24 @@
+//===--- Windows/Watchdog.inc - Windows Watchdog Implementation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Windows implementation of the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: implement.
+// Currently this is only used by PrettyStackTrace which is also unimplemented
+// on Windows. Roughly, a Windows implementation would use CreateWaitableTimer
+// and a second thread to run the TimerAPCProc.
+
+namespace llvm {
+  namespace sys {
+    Watchdog::Watchdog(unsigned int seconds) {}
+    Watchdog::~Watchdog() {}
+  }
+}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index da26a371a762..a433088b1930 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -517,7 +517,7 @@ raw_fd_ostream::~raw_fd_ostream() {
   // has_error() and clear the error flag with clear_error() before
   // destructing raw_ostream objects which may have errors.
   if (has_error())
-    report_fatal_error("IO failure on output stream.");
+    report_fatal_error("IO failure on output stream.", /*GenCrashDiag=*/false);
 }
 
 
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index ec84a72454f6..928b1203cd8f 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -20,9 +20,15 @@
 namespace llvm {
 
 SourceMgr SrcMgr;
+unsigned ErrorsPrinted = 0;
 
 static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind,
                          const Twine &Msg) {
+  // Count the total number of errors printed.
+  // This is used to exit with an error code if there were any errors.
+  if (Kind == SourceMgr::DK_Error)
+    ++ErrorsPrinted;
+
   SMLoc NullLoc;
   if (Loc.empty())
     Loc = NullLoc;
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index e1cd6237832c..dc4167b305ca 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -117,11 +117,14 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
   if (MainFn(Out.os(), Records))
     return 1;
 
+  if (ErrorsPrinted > 0) {
+    errs() << argv0 << ": " << ErrorsPrinted << " errors.\n";
+    return 1;
+  }
+
   // Declare success.
   Out.keep();
   return 0;
-
-  return 1;
 }
 
 }
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index c4b48fe5e895..86ad2a6e3c09 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -1547,29 +1547,39 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
 
 /// ParseDagArgList - Parse the argument list for a dag literal expression.
 ///
-///    ParseDagArgList ::= Value (':' VARNAME)?
-///    ParseDagArgList ::= ParseDagArgList ',' Value (':' VARNAME)?
+///    DagArg     ::= Value (':' VARNAME)?
+///    DagArg     ::= VARNAME
+///    DagArgList ::= DagArg
+///    DagArgList ::= DagArgList ',' DagArg
 std::vector<std::pair<llvm::Init*, std::string> >
 TGParser::ParseDagArgList(Record *CurRec) {
   std::vector<std::pair<llvm::Init*, std::string> > Result;
 
   while (1) {
-    Init *Val = ParseValue(CurRec);
-    if (Val == 0) return std::vector<std::pair<llvm::Init*, std::string> >();
-
-    // If the variable name is present, add it.
-    std::string VarName;
-    if (Lex.getCode() == tgtok::colon) {
-      if (Lex.Lex() != tgtok::VarName) { // eat the ':'
-        TokError("expected variable name in dag literal");
+    // DagArg ::= VARNAME
+    if (Lex.getCode() == tgtok::VarName) {
+      // A missing value is treated like '?'.
+      Result.push_back(std::make_pair(UnsetInit::get(), Lex.getCurStrVal()));
+      Lex.Lex();
+    } else {
+      // DagArg ::= Value (':' VARNAME)?
+      Init *Val = ParseValue(CurRec);
+      if (Val == 0)
         return std::vector<std::pair<llvm::Init*, std::string> >();
-      }
-      VarName = Lex.getCurStrVal();
-      Lex.Lex();  // eat the VarName.
-    }
 
-    Result.push_back(std::make_pair(Val, VarName));
+      // If the variable name is present, add it.
+      std::string VarName;
+      if (Lex.getCode() == tgtok::colon) {
+        if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+          TokError("expected variable name in dag literal");
+          return std::vector<std::pair<llvm::Init*, std::string> >();
+        }
+        VarName = Lex.getCurStrVal();
+        Lex.Lex();  // eat the VarName.
+      }
 
+      Result.push_back(std::make_pair(Val, VarName));
+    }
     if (Lex.getCode() != tgtok::comma) break;
     Lex.Lex(); // eat the ','
   }
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 572617cc0781..dc41f2f60525 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -396,7 +396,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     // helpfully places it near either SP or FP for us to avoid
     // infinitely-regression during scavenging.
     const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
-    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                        RC->getAlignment(),
                                                        false));
   }
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 46b822152a00..468c56156a4f 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -88,6 +88,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 
   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
 
+  SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
+
   SDNode *TrySelectToMoveImm(SDNode *N);
   SDNode *LowerToFPLitPool(SDNode *Node);
   SDNode *SelectToLitPool(SDNode *N);
@@ -318,6 +320,38 @@ AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
   return true;
 }
 
+SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
+                                          unsigned Op16,unsigned Op32,
+                                          unsigned Op64) {
+  // Mostly direct translation to the given operations, except that we preserve
+  // the AtomicOrdering for use later on.
+  AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+  EVT VT = AN->getMemoryVT();
+
+  unsigned Op;
+  if (VT == MVT::i8)
+    Op = Op8;
+  else if (VT == MVT::i16)
+    Op = Op16;
+  else if (VT == MVT::i32)
+    Op = Op32;
+  else if (VT == MVT::i64)
+    Op = Op64;
+  else
+    llvm_unreachable("Unexpected atomic operation");
+
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned i = 1; i < AN->getNumOperands(); ++i)
+      Ops.push_back(AN->getOperand(i));
+
+  Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
+  Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+
+  return CurDAG->SelectNodeTo(Node, Op,
+                              AN->getValueType(0), MVT::Other,
+                              &Ops[0], Ops.size());
+}
+
 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   // Dump information about the Node being selected
   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@@ -328,6 +362,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   }
 
   switch (Node->getOpcode()) {
+  case ISD::ATOMIC_LOAD_ADD:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_ADD_I8,
+                        AArch64::ATOMIC_LOAD_ADD_I16,
+                        AArch64::ATOMIC_LOAD_ADD_I32,
+                        AArch64::ATOMIC_LOAD_ADD_I64);
+  case ISD::ATOMIC_LOAD_SUB:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_SUB_I8,
+                        AArch64::ATOMIC_LOAD_SUB_I16,
+                        AArch64::ATOMIC_LOAD_SUB_I32,
+                        AArch64::ATOMIC_LOAD_SUB_I64);
+  case ISD::ATOMIC_LOAD_AND:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_AND_I8,
+                        AArch64::ATOMIC_LOAD_AND_I16,
+                        AArch64::ATOMIC_LOAD_AND_I32,
+                        AArch64::ATOMIC_LOAD_AND_I64);
+  case ISD::ATOMIC_LOAD_OR:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_OR_I8,
+                        AArch64::ATOMIC_LOAD_OR_I16,
+                        AArch64::ATOMIC_LOAD_OR_I32,
+                        AArch64::ATOMIC_LOAD_OR_I64);
+  case ISD::ATOMIC_LOAD_XOR:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_XOR_I8,
+                        AArch64::ATOMIC_LOAD_XOR_I16,
+                        AArch64::ATOMIC_LOAD_XOR_I32,
+                        AArch64::ATOMIC_LOAD_XOR_I64);
+  case ISD::ATOMIC_LOAD_NAND:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_NAND_I8,
+                        AArch64::ATOMIC_LOAD_NAND_I16,
+                        AArch64::ATOMIC_LOAD_NAND_I32,
+                        AArch64::ATOMIC_LOAD_NAND_I64);
+  case ISD::ATOMIC_LOAD_MIN:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_MIN_I8,
+                        AArch64::ATOMIC_LOAD_MIN_I16,
+                        AArch64::ATOMIC_LOAD_MIN_I32,
+                        AArch64::ATOMIC_LOAD_MIN_I64);
+  case ISD::ATOMIC_LOAD_MAX:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_MAX_I8,
+                        AArch64::ATOMIC_LOAD_MAX_I16,
+                        AArch64::ATOMIC_LOAD_MAX_I32,
+                        AArch64::ATOMIC_LOAD_MAX_I64);
+  case ISD::ATOMIC_LOAD_UMIN:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_UMIN_I8,
+                        AArch64::ATOMIC_LOAD_UMIN_I16,
+                        AArch64::ATOMIC_LOAD_UMIN_I32,
+                        AArch64::ATOMIC_LOAD_UMIN_I64);
+  case ISD::ATOMIC_LOAD_UMAX:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_UMAX_I8,
+                        AArch64::ATOMIC_LOAD_UMAX_I16,
+                        AArch64::ATOMIC_LOAD_UMAX_I32,
+                        AArch64::ATOMIC_LOAD_UMAX_I64);
+  case ISD::ATOMIC_SWAP:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_SWAP_I8,
+                        AArch64::ATOMIC_SWAP_I16,
+                        AArch64::ATOMIC_SWAP_I32,
+                        AArch64::ATOMIC_SWAP_I64);
+  case ISD::ATOMIC_CMP_SWAP:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_CMP_SWAP_I8,
+                        AArch64::ATOMIC_CMP_SWAP_I16,
+                        AArch64::ATOMIC_CMP_SWAP_I32,
+                        AArch64::ATOMIC_CMP_SWAP_I64);
   case ISD::FrameIndex: {
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     EVT PtrTy = TLI.getPointerTy();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index e9f449709c40..6deae75488ed 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -59,12 +59,9 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
 
   computeRegisterProperties();
 
-  // Some atomic operations can be folded into load-acquire or store-release
-  // instructions on AArch64. It's marginally simpler to let LLVM expand
-  // everything out to a barrier and then recombine the (few) barriers we can.
-  setInsertFencesForAtomic(true);
-  setTargetDAGCombine(ISD::ATOMIC_FENCE);
-  setTargetDAGCombine(ISD::ATOMIC_STORE);
+  // We have particularly efficient implementations of atomic fences if they can
+  // be combined with nearby atomic loads and stores.
+  setShouldFoldAtomicFences(true);
 
   // We combine OR nodes for bitfield and NEON BSL operations.
   setTargetDAGCombine(ISD::OR);
@@ -275,27 +272,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
   return VT.changeVectorElementTypeToInteger();
 }
 
-static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
-                                  unsigned &strOpc) {
-  switch (Size) {
-  default: llvm_unreachable("unsupported size for atomic binary op!");
-  case 1:
-    ldrOpc = AArch64::LDXR_byte;
-    strOpc = AArch64::STXR_byte;
-    break;
-  case 2:
-    ldrOpc = AArch64::LDXR_hword;
-    strOpc = AArch64::STXR_hword;
-    break;
-  case 4:
-    ldrOpc = AArch64::LDXR_word;
-    strOpc = AArch64::STXR_word;
-    break;
-  case 8:
-    ldrOpc = AArch64::LDXR_dword;
-    strOpc = AArch64::STXR_dword;
-    break;
-  }
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+                                  unsigned &LdrOpc,
+                                  unsigned &StrOpc) {
+  static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
+                                 AArch64::LDXR_word, AArch64::LDXR_dword};
+  static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
+                                AArch64::LDAXR_word, AArch64::LDAXR_dword};
+  static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
+                                  AArch64::STXR_word, AArch64::STXR_dword};
+  static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword,
+                                 AArch64::STLXR_word, AArch64::STLXR_dword};
+
+  unsigned *LoadOps, *StoreOps;
+  if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+    LoadOps = LoadAcqs;
+  else
+    LoadOps = LoadBares;
+
+  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+    StoreOps = StoreRels;
+  else
+    StoreOps = StoreBares;
+
+  assert(isPowerOf2_32(Size) && Size <= 8 &&
+         "unsupported size for atomic binary op!");
+
+  LdrOpc = LoadOps[Log2_32(Size)];
+  StrOpc = StoreOps[Log2_32(Size)];
 }
 
 MachineBasicBlock *
@@ -313,12 +317,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   unsigned dest = MI->getOperand(0).getReg();
   unsigned ptr = MI->getOperand(1).getReg();
   unsigned incr = MI->getOperand(2).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
   DebugLoc dl = MI->getDebugLoc();
 
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -397,6 +402,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
   unsigned dest = MI->getOperand(0).getReg();
   unsigned ptr = MI->getOperand(1).getReg();
   unsigned incr = MI->getOperand(2).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+
   unsigned oldval = dest;
   DebugLoc dl = MI->getDebugLoc();
 
@@ -411,7 +418,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
   }
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -479,6 +486,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   unsigned ptr     = MI->getOperand(1).getReg();
   unsigned oldval  = MI->getOperand(2).getReg();
   unsigned newval  = MI->getOperand(3).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
 
@@ -487,7 +495,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineFunction *MF = BB->getParent();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -2377,78 +2385,6 @@ static SDValue PerformANDCombine(SDNode *N,
                      DAG.getConstant(LSB + Width - 1, MVT::i64));
 }
 
-static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
-  // An atomic operation followed by an acquiring atomic fence can be reduced to
-  // an acquiring load. The atomic operation provides a convenient pointer to
-  // load from. If the original operation was a load anyway we can actually
-  // combine the two operations into an acquiring load.
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue AtomicOp = FenceNode->getOperand(0);
-  AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
-
-  // A fence on its own can't be optimised
-  if (!AtomicNode)
-    return SDValue();
-
-  AtomicOrdering FenceOrder
-    = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
-  SynchronizationScope FenceScope
-    = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
-
-  if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
-    return SDValue();
-
-  // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
-  // the chain we use should be its input, otherwise we'll put our store after
-  // it so we use its output chain.
-  SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
-    AtomicNode->getChain() : AtomicOp;
-
-  // We have an acquire fence with a handy atomic operation nearby, we can
-  // convert the fence into a load-acquire, discarding the result.
-  DebugLoc DL = FenceNode->getDebugLoc();
-  SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
-                             AtomicNode->getValueType(0),
-                             Chain,                  // Chain
-                             AtomicOp.getOperand(1), // Pointer
-                             AtomicNode->getMemOperand(), Acquire,
-                             FenceScope);
-
-  if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
-    DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
-
-  return Op.getValue(1);
-}
-
-static SDValue PerformATOMIC_STORECombine(SDNode *N,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
-  // A releasing atomic fence followed by an atomic store can be combined into a
-  // single store operation.
-  SelectionDAG &DAG = DCI.DAG;
-  AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
-  SDValue FenceOp = AtomicNode->getOperand(0);
-
-  if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
-    return SDValue();
-
-  AtomicOrdering FenceOrder
-    = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
-  SynchronizationScope FenceScope
-    = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
-
-  if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
-    return SDValue();
-
-  DebugLoc DL = AtomicNode->getDebugLoc();
-  return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
-                       FenceOp.getOperand(0),  // Chain
-                       AtomicNode->getOperand(1),       // Pointer
-                       AtomicNode->getOperand(2),       // Value
-                       AtomicNode->getMemOperand(), Release,
-                       FenceScope);
-}
-
 /// For a true bitfield insert, the bits getting into that contiguous mask
 /// should come from the low part of an existing value: they must be formed from
 /// a compatible SHL operation (unless they're already low). This function
@@ -2804,8 +2740,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default: break;
   case ISD::AND: return PerformANDCombine(N, DCI);
-  case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
-  case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
   case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
   case ISD::SRA: return PerformSRACombine(N, DCI);
   }
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 7b9346324400..cf3a2c3707d9 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -618,11 +618,11 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB,
                          int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
   if (NumBytes == 0 && DstReg == SrcReg)
     return;
-  else if (abs(NumBytes) & ~0xffffff) {
+  else if (abs64(NumBytes) & ~0xffffff) {
     // Generically, we have to materialize the offset into a temporary register
     // and subtract it. There are a couple of ways this could be done, for now
     // we'll use a movz/movk or movn/movk sequence.
-    uint64_t Bits = static_cast<uint64_t>(abs(NumBytes));
+    uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
     BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
       .addImm(0xffff & Bits).addImm(0)
       .setMIFlags(MIFlags);
@@ -673,7 +673,7 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB,
   } else {
     LowOp = AArch64::SUBxxi_lsl0_s;
     HighOp = AArch64::SUBxxi_lsl12_s;
-    NumBytes = abs(NumBytes);
+    NumBytes = abs64(NumBytes);
   }
 
   // If we're here, at the very least a move needs to be produced, which just
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 319ec97cfcde..ff21c223e9ab 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -159,53 +159,55 @@ let Defs = [XSP], Uses = [XSP] in {
 // Atomic operation pseudo-instructions
 //===----------------------------------------------------------------------===//
 
-let usesCustomInserter = 1 in {
-multiclass AtomicSizes<string opname> {
-  def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-    [(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>;
-  def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-   [(set GPR32:$dst, (!cast<SDNode>(opname # "_16") GPR64:$ptr, GPR32:$incr))]>;
-  def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-   [(set GPR32:$dst, (!cast<SDNode>(opname # "_32") GPR64:$ptr, GPR32:$incr))]>;
-  def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
-   [(set GPR64:$dst, (!cast<SDNode>(opname # "_64") GPR64:$ptr, GPR64:$incr))]>;
-}
-}
-
-defm ATOMIC_LOAD_ADD  : AtomicSizes<"atomic_load_add">;
-defm ATOMIC_LOAD_SUB  : AtomicSizes<"atomic_load_sub">;
-defm ATOMIC_LOAD_AND  : AtomicSizes<"atomic_load_and">;
-defm ATOMIC_LOAD_OR   : AtomicSizes<"atomic_load_or">;
-defm ATOMIC_LOAD_XOR  : AtomicSizes<"atomic_load_xor">;
-defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
-defm ATOMIC_SWAP      : AtomicSizes<"atomic_swap">;
+// These get selected from C++ code as a pretty much direct translation from the
+// generic DAG nodes. The one exception is the AtomicOrdering is added as an
+// operand so that the eventual lowering can make use of it and choose
+// acquire/release operations when required.
+
+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
+multiclass AtomicSizes {
+  def _I8 : PseudoInst<(outs GPR32:$dst),
+                       (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I16 : PseudoInst<(outs GPR32:$dst),
+                        (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I32 : PseudoInst<(outs GPR32:$dst),
+                        (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I64 : PseudoInst<(outs GPR64:$dst),
+                        (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
+}
+}
+
+defm ATOMIC_LOAD_ADD  : AtomicSizes;
+defm ATOMIC_LOAD_SUB  : AtomicSizes;
+defm ATOMIC_LOAD_AND  : AtomicSizes;
+defm ATOMIC_LOAD_OR   : AtomicSizes;
+defm ATOMIC_LOAD_XOR  : AtomicSizes;
+defm ATOMIC_LOAD_NAND : AtomicSizes;
+defm ATOMIC_SWAP      : AtomicSizes;
 let Defs = [NZCV] in {
   // These operations need a CMP to calculate the correct value
-  defm ATOMIC_LOAD_MIN  : AtomicSizes<"atomic_load_min">;
-  defm ATOMIC_LOAD_MAX  : AtomicSizes<"atomic_load_max">;
-  defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
-  defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
-}
-
-let usesCustomInserter = 1, Defs = [NZCV] in {
-def ATOMIC_CMP_SWAP_I8
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-               [(set GPR32:$dst,
-                     (atomic_cmp_swap_8 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
-def ATOMIC_CMP_SWAP_I16
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-               [(set GPR32:$dst,
-                     (atomic_cmp_swap_16 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
-def ATOMIC_CMP_SWAP_I32
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-               [(set GPR32:$dst,
-                     (atomic_cmp_swap_32 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
-def ATOMIC_CMP_SWAP_I64
-  : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
-               [(set GPR64:$dst,
-                     (atomic_cmp_swap_64 GPR64:$ptr, GPR64:$old, GPR64:$new))]>;
+  defm ATOMIC_LOAD_MIN  : AtomicSizes;
+  defm ATOMIC_LOAD_MAX  : AtomicSizes;
+  defm ATOMIC_LOAD_UMIN : AtomicSizes;
+  defm ATOMIC_LOAD_UMAX : AtomicSizes;
 }
 
+class AtomicCmpSwap<RegisterClass GPRData>
+  : PseudoInst<(outs GPRData:$dst),
+               (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
+                    i32imm:$ordering), []> {
+  let usesCustomInserter = 1;
+  let hasCtrlDep = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let Defs = [NZCV];
+}
+
+def ATOMIC_CMP_SWAP_I8  : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
+
 //===----------------------------------------------------------------------===//
 // Add-subtract (extended register) instructions
 //===----------------------------------------------------------------------===//
@@ -264,31 +266,39 @@ def LSL_extoperand : Operand<i64> {
 class extend_types {
     dag uxtb; dag uxth; dag uxtw; dag uxtx;
     dag sxtb; dag sxth; dag sxtw; dag sxtx;
+    ValueType ty;
+    RegisterClass GPR;
 }
 
 def extends_to_i64 : extend_types {
-    let uxtb = (and (anyext GPR32:$Rm), 255);
-    let uxth = (and (anyext GPR32:$Rm), 65535);
-    let uxtw = (zext GPR32:$Rm);
-    let uxtx = (i64 GPR64:$Rm);
+    let uxtb = (and (anyext i32:$Rm), 255);
+    let uxth = (and (anyext i32:$Rm), 65535);
+    let uxtw = (zext i32:$Rm);
+    let uxtx = (i64 $Rm);
+
+    let sxtb = (sext_inreg (anyext i32:$Rm), i8);
+    let sxth = (sext_inreg (anyext i32:$Rm), i16);
+    let sxtw = (sext i32:$Rm);
+    let sxtx = (i64 $Rm);
 
-    let sxtb = (sext_inreg (anyext GPR32:$Rm), i8);
-    let sxth = (sext_inreg (anyext GPR32:$Rm), i16);
-    let sxtw = (sext GPR32:$Rm);
-    let sxtx = (i64 GPR64:$Rm);
+    let ty = i64;
+    let GPR = GPR64xsp;
 }
 
 
 def extends_to_i32 : extend_types {
-    let uxtb = (and GPR32:$Rm, 255);
-    let uxth = (and GPR32:$Rm, 65535);
-    let uxtw = (i32 GPR32:$Rm);
-    let uxtx = (i32 GPR32:$Rm);
+    let uxtb = (and i32:$Rm, 255);
+    let uxth = (and i32:$Rm, 65535);
+    let uxtw = (i32 i32:$Rm);
+    let uxtx = (i32 i32:$Rm);
+
+    let sxtb = (sext_inreg i32:$Rm, i8);
+    let sxth = (sext_inreg i32:$Rm, i16);
+    let sxtw = (i32 i32:$Rm);
+    let sxtx = (i32 i32:$Rm);
 
-    let sxtb = (sext_inreg GPR32:$Rm, i8);
-    let sxth = (sext_inreg GPR32:$Rm, i16);
-    let sxtw = (i32 GPR32:$Rm);
-    let sxtx = (i32 GPR32:$Rm);
+    let ty = i32;
+    let GPR = GPR32wsp;
 }
 
 // Now, six of the extensions supported are easy and uniform: if the source size
@@ -303,44 +313,38 @@ def extends_to_i32 : extend_types {
 //       would probably be the best option).
 multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
                        SDPatternOperator opfrag,
-                       dag outs, extend_types exts, RegisterClass GPRsp> {
+                       dag outs, extend_types exts> {
     def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
-                      outs,
-                      (ins GPRsp:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
-                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                      [(opfrag GPRsp:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
-                      NoItinerary>;
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
+                    NoItinerary>;
     def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
-                      outs,
-                      (ins GPRsp:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
-                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                      [(opfrag GPRsp:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
-                      NoItinerary>;
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
+                    NoItinerary>;
     def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
-                      outs,
-                      (ins GPRsp:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
-                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                      [(opfrag GPRsp:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
-                      NoItinerary>;
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
+                    NoItinerary>;
 
     def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
-                      outs,
-                      (ins GPRsp:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
-                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                      [(opfrag GPRsp:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
-                      NoItinerary>;
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
+                    NoItinerary>;
     def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
-                      outs,
-                      (ins GPRsp:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
-                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                      [(opfrag GPRsp:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
-                      NoItinerary>;
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
+                    NoItinerary>;
     def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
-                      outs,
-                      (ins GPRsp:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
-                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                      [(opfrag GPRsp:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
-                      NoItinerary>;
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
+                    NoItinerary>;
 }
 
 // These two could be merge in with the above, but their patterns aren't really
@@ -351,7 +355,7 @@ multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
                    outs,
                    (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                   [(opfrag GPR64xsp:$Rn, (shl GPR64:$Rm, UXTX_operand:$Imm3))],
+                   [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
                    NoItinerary>;
 
     def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
@@ -384,53 +388,53 @@ class SetNZCV<SDPatternOperator op>
   : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
 
 defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
-                        (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>,
+                        (outs GPR64xsp:$Rd), extends_to_i64>,
             addsub_xxtx<     0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
                         (outs GPR64xsp:$Rd)>;
 defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
-                        (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>,
+                        (outs GPR32wsp:$Rd), extends_to_i32>,
             addsub_wxtx<     0b0, 0b0, "add\t$Rd, ",
                         (outs GPR32wsp:$Rd)>;
 defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
-                        (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>,
+                        (outs GPR64xsp:$Rd), extends_to_i64>,
             addsub_xxtx<     0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
                         (outs GPR64xsp:$Rd)>;
 defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
-                        (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>,
+                        (outs GPR32wsp:$Rd), extends_to_i32>,
             addsub_wxtx<     0b1, 0b0, "sub\t$Rd, ",
                         (outs GPR32wsp:$Rd)>;
 
 let Defs = [NZCV] in {
 defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
-                         (outs GPR64:$Rd), extends_to_i64, GPR64xsp>,
+                         (outs GPR64:$Rd), extends_to_i64>,
              addsub_xxtx<     0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
                          (outs GPR64:$Rd)>;
 defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
-                         (outs GPR32:$Rd), extends_to_i32, GPR32wsp>,
+                         (outs GPR32:$Rd), extends_to_i32>,
              addsub_wxtx<     0b0, 0b1, "adds\t$Rd, ",
                          (outs GPR32:$Rd)>;
 defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
-                         (outs GPR64:$Rd), extends_to_i64, GPR64xsp>,
+                         (outs GPR64:$Rd), extends_to_i64>,
              addsub_xxtx<     0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
                          (outs GPR64:$Rd)>;
 defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
-                         (outs GPR32:$Rd), extends_to_i32, GPR32wsp>,
+                         (outs GPR32:$Rd), extends_to_i32>,
              addsub_wxtx<     0b1, 0b1, "subs\t$Rd, ",
                          (outs GPR32:$Rd)>;
 
 
 let Rd = 0b11111, isCompare = 1 in {
 defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
-                        (outs), extends_to_i64, GPR64xsp>,
+                        (outs), extends_to_i64>,
             addsub_xxtx<     0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
 defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
-                        (outs), extends_to_i32, GPR32wsp>,
+                        (outs), extends_to_i32>,
             addsub_wxtx<     0b0, 0b1, "cmn\t", (outs)>;
 defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
-                        (outs), extends_to_i64, GPR64xsp>,
+                        (outs), extends_to_i64>,
             addsub_xxtx<     0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
 defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
-                        (outs), extends_to_i32, GPR32wsp>,
+                        (outs), extends_to_i32>,
             addsub_wxtx<     0b1, 0b1, "cmp\t", (outs)>;
 }
 }
@@ -439,31 +443,31 @@ defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
 // created for uxtx/sxtx since they're non-uniform and it's expected that
 // add/sub (shifted register) will handle those cases anyway.
 multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
-                                      RegisterClass GPRsp, extend_types exts> {
-    def : Pat<(nodeop GPRsp:$Rn, exts.uxtb),
-              (!cast<Instruction>(prefix # "w_uxtb") GPRsp:$Rn, GPR32:$Rm, 0)>;
-    def : Pat<(nodeop GPRsp:$Rn, exts.uxth),
-              (!cast<Instruction>(prefix # "w_uxth") GPRsp:$Rn, GPR32:$Rm, 0)>;
-    def : Pat<(nodeop GPRsp:$Rn, exts.uxtw),
-              (!cast<Instruction>(prefix # "w_uxtw") GPRsp:$Rn, GPR32:$Rm, 0)>;
-
-    def : Pat<(nodeop GPRsp:$Rn, exts.sxtb),
-              (!cast<Instruction>(prefix # "w_sxtb") GPRsp:$Rn, GPR32:$Rm, 0)>;
-    def : Pat<(nodeop GPRsp:$Rn, exts.sxth),
-              (!cast<Instruction>(prefix # "w_sxth") GPRsp:$Rn, GPR32:$Rm, 0)>;
-    def : Pat<(nodeop GPRsp:$Rn, exts.sxtw),
-              (!cast<Instruction>(prefix # "w_sxtw") GPRsp:$Rn, GPR32:$Rm, 0)>;
-}
-
-defm : addsubext_noshift_patterns<"ADDxx", add, GPR64xsp, extends_to_i64>;
-defm : addsubext_noshift_patterns<"ADDww", add, GPR32wsp, extends_to_i32>;
-defm : addsubext_noshift_patterns<"SUBxx", sub, GPR64xsp, extends_to_i64>;
-defm : addsubext_noshift_patterns<"SUBww", sub, GPR32wsp, extends_to_i32>;
-
-defm : addsubext_noshift_patterns<"CMNx", A64cmn, GPR64xsp, extends_to_i64>;
-defm : addsubext_noshift_patterns<"CMNw", A64cmn, GPR32wsp, extends_to_i32>;
-defm : addsubext_noshift_patterns<"CMPx", A64cmp, GPR64xsp, extends_to_i64>;
-defm : addsubext_noshift_patterns<"CMPw", A64cmp, GPR32wsp, extends_to_i32>;
+                                      extend_types exts> {
+    def : Pat<(nodeop exts.ty:$Rn, exts.uxtb),
+              (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>;
+    def : Pat<(nodeop exts.ty:$Rn, exts.uxth),
+              (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>;
+    def : Pat<(nodeop exts.ty:$Rn, exts.uxtw),
+              (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>;
+
+    def : Pat<(nodeop exts.ty:$Rn, exts.sxtb),
+              (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>;
+    def : Pat<(nodeop exts.ty:$Rn, exts.sxth),
+              (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>;
+    def : Pat<(nodeop exts.ty:$Rn, exts.sxtw),
+              (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>;
+}
+
+defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>;
+defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>;
+defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>;
+defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>;
+
+defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>;
+defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>;
 
 // An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
 // sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
@@ -614,14 +618,13 @@ multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
                                string asmop, string cmpasmop,
                                Operand imm_operand, Operand cmp_imm_operand,
                                RegisterClass GPR, RegisterClass GPRsp,
-                               AArch64Reg ZR> {
+                               AArch64Reg ZR, ValueType Ty> {
     // All registers for non-S variants allow SP
   def _s : A64I_addsubimm<sf, op, 0b0, shift,
                          (outs GPRsp:$Rd),
                          (ins GPRsp:$Rn, imm_operand:$Imm12),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
-                         [(set GPRsp:$Rd,
-                               (add GPRsp:$Rn, imm_operand:$Imm12))],
+                         [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
                          NoItinerary>;
 
 
@@ -630,7 +633,7 @@ multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
                          (outs GPR:$Rd),
                          (ins GPRsp:$Rn, imm_operand:$Imm12),
                          !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
-                         [(set GPR:$Rd, (addc GPRsp:$Rn, imm_operand:$Imm12))],
+                         [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
                          NoItinerary> {
     let Defs = [NZCV];
   }
@@ -642,7 +645,7 @@ multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
                             (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
                             !strconcat(cmpasmop, " $Rn, $Imm12"),
                             [(set NZCV,
-                              (A64cmp GPRsp:$Rn, cmp_imm_operand:$Imm12))],
+                                  (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
                             NoItinerary> {
     let Rd = 0b11111;
     let Defs = [NZCV];
@@ -653,36 +656,37 @@ multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
 
 multiclass addsubimm_shifts<string prefix, bit sf, bit op,
            string asmop, string cmpasmop, string operand, string cmpoperand,
-           RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR> {
+           RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR,
+           ValueType Ty> {
   defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
                                    asmop, cmpasmop,
                                    !cast<Operand>(operand # "_lsl0"),
                                    !cast<Operand>(cmpoperand # "_lsl0"),
-                                   GPR, GPRsp, ZR>;
+                                   GPR, GPRsp, ZR, Ty>;
 
   defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
                                     asmop, cmpasmop,
                                     !cast<Operand>(operand # "_lsl12"),
                                     !cast<Operand>(cmpoperand # "_lsl12"),
-                                    GPR, GPRsp, ZR>;
+                                    GPR, GPRsp, ZR, Ty>;
 }
 
 defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
                               "addsubimm_operand_i32_posimm",
                               "addsubimm_operand_i32_negimm",
-                              GPR32, GPR32wsp, WZR>;
+                              GPR32, GPR32wsp, WZR, i32>;
 defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
                               "addsubimm_operand_i64_posimm",
                               "addsubimm_operand_i64_negimm",
-                              GPR64, GPR64xsp, XZR>;
+                              GPR64, GPR64xsp, XZR, i64>;
 defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
                               "addsubimm_operand_i32_negimm",
                               "addsubimm_operand_i32_posimm",
-                              GPR32, GPR32wsp, WZR>;
+                              GPR32, GPR32wsp, WZR, i32>;
 defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
                               "addsubimm_operand_i64_negimm",
                               "addsubimm_operand_i64_posimm",
-                              GPR64, GPR64xsp, XZR>;
+                              GPR64, GPR64xsp, XZR, i64>;
 
 multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
   def _fromsp : InstAlias<"mov $Rd, $Rn",
@@ -753,36 +757,36 @@ defm ror_operand : shift_operands<"ror_operand", "ROR">;
 // N.b. the commutable parameter is just !N. It will be first against the wall
 // when the revolution comes.
 multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
-                         string asmop, SDPatternOperator opfrag, string sty,
+                         string asmop, SDPatternOperator opfrag, ValueType ty,
                          RegisterClass GPR, list<Register> defs> {
   let isCommutable = commutable, Defs = defs in {
   def _lsl : A64I_addsubshift<sf, op, s, 0b00,
                        (outs GPR:$Rd),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
-                       [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm,
-                            !cast<Operand>("lsl_operand_" # sty):$Imm6))
+                       [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
 
   def _lsr : A64I_addsubshift<sf, op, s, 0b01,
                        (outs GPR:$Rd),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
-                       [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm,
-                            !cast<Operand>("lsr_operand_" # sty):$Imm6))
+                       [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
 
   def _asr : A64I_addsubshift<sf, op, s, 0b10,
                        (outs GPR:$Rd),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
-                       [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm,
-                            !cast<Operand>("asr_operand_" # sty):$Imm6))
+                       [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
   }
@@ -792,17 +796,17 @@ multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
                  (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
                                                       GPR:$Rm, 0)>;
 
-  def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
-            (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+  def : Pat<(opfrag ty:$Rn, ty:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
 }
 
 multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
                          string asmop, SDPatternOperator opfrag,
                          list<Register> defs> {
   defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
-                           commutable, asmop, opfrag, "i64", GPR64, defs>;
+                           commutable, asmop, opfrag, i64, GPR64, defs>;
   defm www : addsub_shifts<prefix # "www", 0b0, op, s,
-                           commutable, asmop, opfrag, "i32", GPR32, defs>;
+                           commutable, asmop, opfrag, i32, GPR32, defs>;
 }
 
 
@@ -816,26 +820,26 @@ defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
 // 1. The NEG/NEGS aliases
 //===-------------------------------
 
-multiclass neg_alias<Instruction INST, RegisterClass GPR,
-                     Register ZR, Operand shift_operand, SDNode shiftop> {
+multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR,
+                     ValueType ty, Operand shift_operand, SDNode shiftop> {
    def : InstAlias<"neg $Rd, $Rm, $Imm6",
                    (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
 
-   def : Pat<(sub 0, (shiftop GPR:$Rm, shift_operand:$Imm6)),
-             (INST ZR, GPR:$Rm, shift_operand:$Imm6)>;
+   def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)),
+             (INST ZR, $Rm, shift_operand:$Imm6)>;
 }
 
-defm : neg_alias<SUBwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
-defm : neg_alias<SUBwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
-defm : neg_alias<SUBwww_asr, GPR32, WZR, asr_operand_i32, sra>;
+defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>;
+defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>;
+defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>;
 def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
-def : Pat<(sub 0, GPR32:$Rm), (SUBwww_lsl WZR, GPR32:$Rm, 0)>;
+def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>;
 
-defm : neg_alias<SUBxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
-defm : neg_alias<SUBxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
-defm : neg_alias<SUBxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
+defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>;
+defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>;
+defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>;
 def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
-def : Pat<(sub 0, GPR64:$Rm), (SUBxxx_lsl XZR, GPR64:$Rm, 0)>;
+def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>;
 
 // NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
 // be involved.
@@ -859,36 +863,36 @@ def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
 //===-------------------------------
 
 multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
-                      string asmop, SDPatternOperator opfrag, string sty,
+                      string asmop, SDPatternOperator opfrag, ValueType ty,
                       RegisterClass GPR> {
   let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
   def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
                        (outs),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
-                       [(set NZCV, (opfrag GPR:$Rn, (shl GPR:$Rm,
-                            !cast<Operand>("lsl_operand_" # sty):$Imm6))
+                       [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
 
   def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
                        (outs),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
-                       [(set NZCV, (opfrag GPR:$Rn, (srl GPR:$Rm,
-                            !cast<Operand>("lsr_operand_" # sty):$Imm6))
+                       [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
 
   def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
                        (outs),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
-                       [(set NZCV, (opfrag GPR:$Rn, (sra GPR:$Rm,
-                            !cast<Operand>("asr_operand_" # sty):$Imm6))
+                       [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
   }
@@ -897,15 +901,15 @@ multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
       : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
                  (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
 
-  def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
-            (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+  def : Pat<(opfrag ty:$Rn, ty:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
 }
 
-defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, "i32", GPR32>;
-defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, "i64", GPR64>;
+defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>;
+defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>;
 
-defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, "i32", GPR32>;
-defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, "i64", GPR64>;
+defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>;
+defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>;
 
 //===----------------------------------------------------------------------===//
 // Add-subtract (with carry) instructions
@@ -947,10 +951,10 @@ def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
 
 // Note that adde and sube can form a chain longer than two (e.g. for 256-bit
 // addition). So the flag-setting instructions are appropriate.
-def : Pat<(adde GPR32:$Rn, GPR32:$Rm), (ADCSwww GPR32:$Rn, GPR32:$Rm)>;
-def : Pat<(adde GPR64:$Rn, GPR64:$Rm), (ADCSxxx GPR64:$Rn, GPR64:$Rm)>;
-def : Pat<(sube GPR32:$Rn, GPR32:$Rm), (SBCSwww GPR32:$Rn, GPR32:$Rm)>;
-def : Pat<(sube GPR64:$Rn, GPR64:$Rm), (SBCSxxx GPR64:$Rn, GPR64:$Rm)>;
+def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>;
+def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>;
+def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>;
+def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>;
 
 //===----------------------------------------------------------------------===//
 // Bitfield
@@ -1053,52 +1057,52 @@ def BFMxxii :
 
 // Note that these instructions are strictly more specific than the
 // BFM ones (in ImmR) so they can handle their own decoding.
-class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, string asmop,
-                    bits<6> imms, dag pattern>
+class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty,
+                    string asmop, bits<6> imms, dag pattern>
   : A64I_bitfield<sf, opc, sf,
                   (outs GPRDest:$Rd), (ins GPR32:$Rn),
                   !strconcat(asmop, "\t$Rd, $Rn"),
-                  [(set GPRDest:$Rd, pattern)], NoItinerary> {
+                  [(set dty:$Rd, pattern)], NoItinerary> {
   let ImmR = 0b000000;
   let ImmS = imms;
 }
 
 // Signed extensions
-def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtb", 7,
-                         (sext_inreg (anyext GPR32:$Rn), i8)>;
-def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, "sxtb", 7,
-                         (sext_inreg GPR32:$Rn, i8)>;
-def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxth", 15,
-                         (sext_inreg (anyext GPR32:$Rn), i16)>;
-def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, "sxth", 15,
-                         (sext_inreg GPR32:$Rn, i16)>;
-def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtw", 31, (sext GPR32:$Rn)>;
+def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7,
+                         (sext_inreg (anyext i32:$Rn), i8)>;
+def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7,
+                         (sext_inreg i32:$Rn, i8)>;
+def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15,
+                         (sext_inreg (anyext i32:$Rn), i16)>;
+def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15,
+                         (sext_inreg i32:$Rn, i16)>;
+def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>;
 
 // Unsigned extensions
-def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, "uxtb", 7,
-                         (and GPR32:$Rn, 255)>;
-def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, "uxth", 15,
-                         (and GPR32:$Rn, 65535)>;
+def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7,
+                         (and i32:$Rn, 255)>;
+def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15,
+                         (and i32:$Rn, 65535)>;
 
 // The 64-bit unsigned variants are not strictly architectural but recommended
 // for consistency.
 let isAsmParserOnly = 1 in {
-  def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxtb", 7,
-                           (and (anyext GPR32:$Rn), 255)>;
-  def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxth", 15,
-                           (and (anyext GPR32:$Rn), 65535)>;
+  def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7,
+                           (and (anyext i32:$Rn), 255)>;
+  def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15,
+                           (and (anyext i32:$Rn), 65535)>;
 }
 
 // Extra patterns for when the source register is actually 64-bits
 // too. There's no architectural difference here, it's just LLVM
 // shinanigans. There's no need for equivalent zero-extension patterns
 // because they'll already be caught by logical (immediate) matching.
-def : Pat<(sext_inreg GPR64:$Rn, i8),
-          (SXTBxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
-def : Pat<(sext_inreg GPR64:$Rn, i16),
-          (SXTHxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
-def : Pat<(sext_inreg GPR64:$Rn, i32),
-          (SXTWxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i8),
+          (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i16),
+          (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i32),
+          (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>;
 
 
 //===-------------------------------
@@ -1111,7 +1115,7 @@ multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
   def wwi : A64I_bitfield<0b0, opc, 0b0,
                     (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
                     !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
-                    [(set GPR32:$Rd, (opnode GPR32:$Rn, bitfield32_imm:$ImmR))],
+                    [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
                     NoItinerary> {
     let ImmS = 31;
   }
@@ -1119,7 +1123,7 @@ multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
   def xxi : A64I_bitfield<0b1, opc, 0b1,
                     (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
                     !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
-                    [(set GPR64:$Rd, (opnode GPR64:$Rn, bitfield64_imm:$ImmR))],
+                    [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
                     NoItinerary> {
     let ImmS = 63;
   }
@@ -1156,10 +1160,11 @@ def bitfield64_lsl_imm : Operand<i64>,
   let EncoderMethod = "getBitfield64LSLOpValue";
 }
 
-class A64I_bitfield_lsl<bit sf, RegisterClass GPR, Operand operand>
+class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty,
+                        Operand operand>
   : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
                   "lsl\t$Rd, $Rn, $FullImm",
-                  [(set GPR:$Rd, (shl GPR:$Rn, operand:$FullImm))],
+                  [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
                   NoItinerary> {
   bits<12> FullImm;
   let ImmR = FullImm{5-0};
@@ -1170,8 +1175,8 @@ class A64I_bitfield_lsl<bit sf, RegisterClass GPR, Operand operand>
   let isAsmParserOnly = 1;
 }
 
-def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, bitfield32_lsl_imm>;
-def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, bitfield64_lsl_imm>;
+def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>;
+def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>;
 
 //===-------------------------------
 // 5. Aliases for bitfield extract instructions
@@ -1206,7 +1211,7 @@ multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
   def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
                        (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
                        !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
-                       [(set GPR32:$Rd, (op GPR32:$Rn, imm:$ImmR, imm:$ImmS))],
+                       [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
                        NoItinerary> {
     // As above, no disassembler allowed.
     let isAsmParserOnly = 1;
@@ -1215,7 +1220,7 @@ multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
   def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
                        (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
                        !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
-                       [(set GPR64:$Rd, (op GPR64:$Rn, imm:$ImmR, imm:$ImmS))],
+                       [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
                        NoItinerary> {
     // As above, no disassembler allowed.
     let isAsmParserOnly = 1;
@@ -1243,15 +1248,15 @@ def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
 }
 
 // SBFX instructions can do a 1-instruction sign-extension of boolean values.
-def : Pat<(sext_inreg GPR64:$Rn, i1), (SBFXxxii GPR64:$Rn, 0, 0)>;
-def : Pat<(sext_inreg GPR32:$Rn, i1), (SBFXwwii GPR32:$Rn, 0, 0)>;
-def : Pat<(i64 (sext_inreg (anyext GPR32:$Rn), i1)),
-          (SBFXxxii (SUBREG_TO_REG (i64 0), GPR32:$Rn, sub_32), 0, 0)>;
+def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>;
+def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>;
+def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
+          (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>;
 
 // UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
 // use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
-def : Pat<(zext GPR32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii GPR32:$Rn, 0, 31),
-                                           sub_32)>;
+def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
+                                         sub_32)>;
 
 //===-------------------------------
 // 6. Aliases for bitfield insert instructions
@@ -1380,14 +1385,14 @@ multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
                      (outs),
                      (ins GPR64:$Rt, bcc_target:$Label),
                      !strconcat(asmop,"\t$Rt, $Label"),
-                     [(A64br_cc (A64cmp GPR64:$Rt, 0), SETOP, bb:$Label)],
+                     [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
                      NoItinerary>;
 
   def w : A64I_cmpbr<0b0, op,
                      (outs),
                      (ins GPR32:$Rt, bcc_target:$Label),
                      !strconcat(asmop,"\t$Rt, $Label"),
-                     [(A64br_cc (A64cmp GPR32:$Rt, 0), SETOP, bb:$Label)],
+                     [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
                      NoItinerary>;
   }
 }
@@ -1530,7 +1535,7 @@ multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
                             (outs GPR32:$Rd),
                             (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
                             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
-                            [(set GPR32:$Rd, (select GPR32:$Rn, GPR32:$Rm))],
+                            [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
                             NoItinerary>;
 
 
@@ -1538,7 +1543,7 @@ multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
                             (outs GPR64:$Rd),
                             (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
                             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
-                            [(set GPR64:$Rd, (select GPR64:$Rn, GPR64:$Rm))],
+                            [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
                             NoItinerary>;
   }
 }
@@ -1613,24 +1618,22 @@ def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
 // No commutable pattern for CSEL since the commuted version is isomorphic.
 
 // CSINC
-def :Pat<(A64select_cc NZCV, (add GPR32:$Rm, 1), GPR32:$Rn,
-         inv_cond_code:$Cond),
-         (CSINCwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
-def :Pat<(A64select_cc NZCV, (add GPR64:$Rm, 1), GPR64:$Rn,
-         inv_cond_code:$Cond),
-         (CSINCxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond),
+         (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond),
+         (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
 
 // CSINV
-def :Pat<(A64select_cc NZCV, (not GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond),
-         (CSINVwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
-def :Pat<(A64select_cc NZCV, (not GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond),
-         (CSINVxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
+         (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
+         (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
 
 // CSNEG
-def :Pat<(A64select_cc NZCV, (ineg GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond),
-         (CSNEGwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
-def :Pat<(A64select_cc NZCV, (ineg GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond),
-         (CSNEGxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
+         (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
+         (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
 
 //===----------------------------------------------------------------------===//
 // Data Processing (1 source) instructions
@@ -1664,28 +1667,28 @@ defm RBIT  : A64I_dp_1src<0b000000, "rbit">;
 defm CLS   : A64I_dp_1src<0b000101, "cls">;
 defm CLZ   : A64I_dp_1src<0b000100, "clz">;
 
-def : Pat<(ctlz GPR32:$Rn), (CLZww GPR32:$Rn)>;
-def : Pat<(ctlz GPR64:$Rn), (CLZxx GPR64:$Rn)>;
-def : Pat<(ctlz_zero_undef GPR32:$Rn), (CLZww GPR32:$Rn)>;
-def : Pat<(ctlz_zero_undef GPR64:$Rn), (CLZxx GPR64:$Rn)>;
+def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>;
+def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>;
+def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>;
+def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>;
 
-def : Pat<(cttz GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>;
-def : Pat<(cttz GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>;
-def : Pat<(cttz_zero_undef GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>;
-def : Pat<(cttz_zero_undef GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>;
+def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>;
+def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>;
+def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>;
+def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>;
 
 
 def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
-                              [(set GPR32:$Rd, (bswap GPR32:$Rn))],
+                              [(set i32:$Rd, (bswap i32:$Rn))],
                               GPR32, NoItinerary>;
 def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
-                              [(set GPR64:$Rd, (bswap GPR64:$Rn))],
+                              [(set i64:$Rd, (bswap i64:$Rn))],
                               GPR64, NoItinerary>;
 def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
-                          [(set GPR64:$Rd, (bswap (rotr GPR64:$Rn, (i64 32))))],
+                          [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))],
                           GPR64, NoItinerary>;
 def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
-                          [(set GPR32:$Rd, (bswap (rotr GPR32:$Rn, (i64 16))))],
+                          [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))],
                           GPR32,
                           NoItinerary>;
 def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
@@ -1726,14 +1729,14 @@ multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
    def www : dp_2src_impl<0b0,
                          opcode,
                          asmop,
-                         [(set GPR32:$Rd,
-                               (op GPR32:$Rn, (i64 (zext GPR32:$Rm))))],
+                         [(set i32:$Rd,
+                               (op i32:$Rn, (i64 (zext i32:$Rm))))],
                          GPR32,
                          NoItinerary>;
    def xxx : dp_2src_impl<0b1,
                          opcode,
                          asmop,
-                         [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))],
+                         [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
                          GPR64,
                          NoItinerary>;
 }
@@ -1743,13 +1746,13 @@ multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
     def www : dp_2src_impl<0b0,
                          opcode,
                          asmop,
-                         [(set GPR32:$Rd, (op GPR32:$Rn, GPR32:$Rm))],
+                         [(set i32:$Rd, (op i32:$Rn, i32:$Rm))],
                          GPR32,
                          NoItinerary>;
    def xxx : dp_2src_impl<0b1,
                          opcode,
                          asmop,
-                         [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))],
+                         [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
                          GPR64,
                          NoItinerary>;
 }
@@ -1770,14 +1773,14 @@ defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
 // operation. Since the LLVM operations are undefined (as in C) if the
 // RHS is out of range, it's perfectly permissible to discard the high
 // bits of the GPR64.
-def : Pat<(shl GPR32:$Rn, GPR64:$Rm),
-          (LSLVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
-def : Pat<(srl GPR32:$Rn, GPR64:$Rm),
-          (LSRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
-def : Pat<(sra GPR32:$Rn, GPR64:$Rm),
-          (ASRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
-def : Pat<(rotr GPR32:$Rn, GPR64:$Rm),
-          (RORVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+def : Pat<(shl i32:$Rn, i64:$Rm),
+          (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(srl i32:$Rn, i64:$Rm),
+          (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(sra i32:$Rn, i64:$Rm),
+          (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(rotr i32:$Rn, i64:$Rm),
+          (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
 
 // Here we define the aliases for the data processing 2 source instructions.
 def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
@@ -1792,46 +1795,47 @@ def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
 //    + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
 
 class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
-                        RegisterClass SrcReg, string asmop, dag pattern>
+                        ValueType AccTy, RegisterClass SrcReg,
+                        string asmop, dag pattern>
   : A64I_dp3<sf, opcode,
              (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
              !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
-             [(set AccReg:$Rd, pattern)], NoItinerary> {
+             [(set AccTy:$Rd, pattern)], NoItinerary> {
   RegisterClass AccGPR = AccReg;
   RegisterClass SrcGPR = SrcReg;
 }
 
-def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, GPR32, "madd",
-                                 (add GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>;
-def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, GPR64, "madd",
-                                 (add GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>;
+def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd",
+                                 (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
+def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd",
+                                 (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
 
-def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, GPR32, "msub",
-                                 (sub GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>;
-def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, GPR64, "msub",
-                                 (sub GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>;
+def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub",
+                                 (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
+def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub",
+                                 (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
 
-def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, GPR32, "smaddl",
-               (add GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
-def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, GPR32, "smsubl",
-               (sub GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
+def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl",
+                     (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl",
+                     (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
 
-def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, GPR32, "umaddl",
-               (add GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
-def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, GPR32, "umsubl",
-               (sub GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
+def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl",
+                     (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl",
+                     (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
 
 let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
   def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
                           (ins GPR64:$Rn, GPR64:$Rm),
                           "umulh\t$Rd, $Rn, $Rm",
-                          [(set GPR64:$Rd, (mulhu GPR64:$Rn, GPR64:$Rm))],
+                          [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
                           NoItinerary>;
 
   def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
                           (ins GPR64:$Rn, GPR64:$Rm),
                           "smulh\t$Rd, $Rn, $Rm",
-                          [(set GPR64:$Rd, (mulhs GPR64:$Rn, GPR64:$Rm))],
+                          [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
                           NoItinerary>;
 }
 
@@ -1840,26 +1844,26 @@ multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
   def : InstAlias<asmop # " $Rd, $Rn, $Rm",
                   (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
 
-  def : Pat<pattern, (INST INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
+  def : Pat<pattern, (INST $Rn, $Rm, ZR)>;
 }
 
-defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul GPR32:$Rn, GPR32:$Rm)>;
-defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul GPR64:$Rn, GPR64:$Rm)>;
+defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>;
+defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>;
 
 defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
-                         (sub 0, (mul GPR32:$Rn, GPR32:$Rm))>;
+                         (sub 0, (mul i32:$Rn, i32:$Rm))>;
 defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
-                         (sub 0, (mul GPR64:$Rn, GPR64:$Rm))>;
+                         (sub 0, (mul i64:$Rn, i64:$Rm))>;
 
 defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
-                         (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm))>;
+                         (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>;
 defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
-                       (sub 0, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
+                       (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
 
 defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
-                         (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm))>;
+                         (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>;
 defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
-                       (sub 0, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
+                       (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
 
 
 //===----------------------------------------------------------------------===//
@@ -1909,15 +1913,15 @@ def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
                             (outs GPR32:$Rd),
                             (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
                             "extr\t$Rd, $Rn, $Rm, $LSB",
-                            [(set GPR32:$Rd,
-                                  (A64Extr GPR32:$Rn, GPR32:$Rm, imm:$LSB))],
+                            [(set i32:$Rd,
+                                  (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
                             NoItinerary>;
 def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
                             (outs GPR64:$Rd),
                             (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
                             "extr\t$Rd, $Rn, $Rm, $LSB",
-                            [(set GPR64:$Rd,
-                                  (A64Extr GPR64:$Rn, GPR64:$Rm, imm:$LSB))],
+                            [(set i64:$Rd,
+                                  (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
                             NoItinerary>;
 
 def : InstAlias<"ror $Rd, $Rs, $LSB",
@@ -1925,10 +1929,10 @@ def : InstAlias<"ror $Rd, $Rs, $LSB",
 def : InstAlias<"ror $Rd, $Rs, $LSB",
                (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
 
-def : Pat<(rotr GPR32:$Rn, bitfield32_imm:$LSB),
-          (EXTRwwwi GPR32:$Rn, GPR32:$Rn, bitfield32_imm:$LSB)>;
-def : Pat<(rotr GPR64:$Rn, bitfield64_imm:$LSB),
-          (EXTRxxxi GPR64:$Rn, GPR64:$Rn, bitfield64_imm:$LSB)>;
+def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB),
+          (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>;
+def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB),
+          (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>;
 
 //===----------------------------------------------------------------------===//
 // Floating-point compare instructions
@@ -1969,17 +1973,17 @@ multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
 }
 
 defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
-                               (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>;
+                               (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>;
 defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
-                               (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>;
+                               (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>;
 
 // What would be Rm should be written as 0; note that even though it's called
 // "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
 defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
-                               (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Rm))>;
+                               (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>;
 
 defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
-                               (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Rm))>;
+                               (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>;
 
 
 //===----------------------------------------------------------------------===//
@@ -2010,18 +2014,16 @@ let Uses = [NZCV] in {
   def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
                                  (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
                                  "fcsel\t$Rd, $Rn, $Rm, $Cond",
-                                 [(set FPR32:$Rd,
-                                       (simple_select (f32 FPR32:$Rn),
-                                                      FPR32:$Rm))],
+                                 [(set f32:$Rd, 
+                                       (simple_select f32:$Rn, f32:$Rm))],
                                  NoItinerary>;
 
 
   def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
                                  (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
                                  "fcsel\t$Rd, $Rn, $Rm, $Cond",
-                                 [(set FPR64:$Rd,
-                                       (simple_select (f64 FPR64:$Rn),
-                                                      FPR64:$Rm))],
+                                 [(set f64:$Rd,
+                                       (simple_select f64:$Rn, f64:$Rm))],
                                  NoItinerary>;
 }
 
@@ -2039,12 +2041,12 @@ multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
                            SDPatternOperator opnode = FPNoUnop> {
   def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
                      !strconcat(asmstr, "\t$Rd, $Rn"),
-                     [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn))],
+                     [(set f32:$Rd, (opnode f32:$Rn))],
                      NoItinerary>;
 
   def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
                      !strconcat(asmstr, "\t$Rd, $Rn"),
-                     [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn))],
+                     [(set f64:$Rd, (opnode f64:$Rn))],
                      NoItinerary>;
 }
 
@@ -2080,8 +2082,7 @@ class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
                {0,0,0,1, DestReg.t1, DestReg.t0},
                (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
                "fcvt\t$Rd, $Rn",
-               [(set (DestReg.VT DestReg.Class:$Rd),
-                     (opnode (SrcReg.VT SrcReg.Class:$Rn)))], NoItinerary>;
+               [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>;
 
 def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
 def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
@@ -2105,14 +2106,14 @@ multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
                       (outs FPR32:$Rd),
                       (ins FPR32:$Rn, FPR32:$Rm),
                       !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
-                      [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn, FPR32:$Rm))],
+                      [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
                       NoItinerary>;
 
   def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
                       (outs FPR64:$Rd),
                       (ins FPR64:$Rn, FPR64:$Rm),
                       !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
-                      [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn, FPR64:$Rm))],
+                      [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
                       NoItinerary>;
 }
 
@@ -2151,7 +2152,7 @@ class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
   : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
                (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
                !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
-               [(set FPR:$Rd, (fmakind (VT FPR:$Rn), FPR:$Rm, FPR:$Ra))],
+               [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
                NoItinerary>;
 
 def FMADDssss  : A64I_fpdp3Impl<"fmadd",  FPR32, f32, 0b00, 0b0, 0b0, fma>;
@@ -2208,57 +2209,59 @@ class cvtfix_i64_op<ValueType FloatVT>
 // worth going for a multiclass here. Oh well.
 
 class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
-                   RegisterClass GPR, RegisterClass FPR, Operand scale_op,
-                   string asmop, SDNode cvtop>
+                   RegisterClass GPR, RegisterClass FPR, 
+                   ValueType DstTy, ValueType SrcTy, 
+                   Operand scale_op, string asmop, SDNode cvtop>
   : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
                  (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
                  !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
-                 [(set GPR:$Rd, (cvtop (fmul FPR:$Rn, scale_op:$Scale)))],
+                 [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
                  NoItinerary>;
 
-def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32,
+def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
                              cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
-def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32,
+def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32,
                              cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
-def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32,
+def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32,
                              cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
-def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32,
+def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32,
                              cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
 
-def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64,
+def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64,
                              cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
-def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64,
+def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64,
                              cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
-def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64,
+def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64,
                              cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
-def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64,
+def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64,
                              cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
 
 
 class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
-                   RegisterClass FPR, RegisterClass GPR, Operand scale_op,
-                   string asmop, SDNode cvtop>
+                   RegisterClass FPR, RegisterClass GPR,
+                   ValueType DstTy, ValueType SrcTy,
+                   Operand scale_op, string asmop, SDNode cvtop>
   : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
                  (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
                  !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
-                 [(set FPR:$Rd, (fdiv (cvtop GPR:$Rn), scale_op:$Scale))],
+                 [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
                  NoItinerary>;
 
-def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32,
+def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
                             cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
-def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64,
+def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64,
                             cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
-def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32,
+def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32,
                             cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
-def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64,
+def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64,
                             cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
-def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32,
+def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32,
                             cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
-def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64,
+def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64,
                             cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
-def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32,
+def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32,
                             cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
-def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64,
+def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64,
                             cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
 
 //===----------------------------------------------------------------------===//
@@ -2297,14 +2300,14 @@ defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
 defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
 defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
 
-def : Pat<(i32 (fp_to_sint FPR32:$Rn)), (FCVTZSws FPR32:$Rn)>;
-def : Pat<(i64 (fp_to_sint FPR32:$Rn)), (FCVTZSxs FPR32:$Rn)>;
-def : Pat<(i32 (fp_to_uint FPR32:$Rn)), (FCVTZUws FPR32:$Rn)>;
-def : Pat<(i64 (fp_to_uint FPR32:$Rn)), (FCVTZUxs FPR32:$Rn)>;
-def : Pat<(i32 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSwd FPR64:$Rn)>;
-def : Pat<(i64 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSxd FPR64:$Rn)>;
-def : Pat<(i32 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUwd FPR64:$Rn)>;
-def : Pat<(i64 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUxd FPR64:$Rn)>;
+def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
+def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
+def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
+def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>;
+def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
+def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
+def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
+def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
 
 multiclass A64I_inttofp<bit o0, string asmop> {
   def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
@@ -2316,24 +2319,24 @@ multiclass A64I_inttofp<bit o0, string asmop> {
 defm S : A64I_inttofp<0b0, "scvtf">;
 defm U : A64I_inttofp<0b1, "ucvtf">;
 
-def : Pat<(f32 (sint_to_fp GPR32:$Rn)), (SCVTFsw GPR32:$Rn)>;
-def : Pat<(f32 (sint_to_fp GPR64:$Rn)), (SCVTFsx GPR64:$Rn)>;
-def : Pat<(f64 (sint_to_fp GPR32:$Rn)), (SCVTFdw GPR32:$Rn)>;
-def : Pat<(f64 (sint_to_fp GPR64:$Rn)), (SCVTFdx GPR64:$Rn)>;
-def : Pat<(f32 (uint_to_fp GPR32:$Rn)), (UCVTFsw GPR32:$Rn)>;
-def : Pat<(f32 (uint_to_fp GPR64:$Rn)), (UCVTFsx GPR64:$Rn)>;
-def : Pat<(f64 (uint_to_fp GPR32:$Rn)), (UCVTFdw GPR32:$Rn)>;
-def : Pat<(f64 (uint_to_fp GPR64:$Rn)), (UCVTFdx GPR64:$Rn)>;
+def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
+def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
+def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
+def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>;
+def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
+def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
+def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
+def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
 
 def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
 def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
 def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
 def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
 
-def : Pat<(i32 (bitconvert (f32 FPR32:$Rn))), (FMOVws FPR32:$Rn)>;
-def : Pat<(f32 (bitconvert (i32 GPR32:$Rn))), (FMOVsw GPR32:$Rn)>;
-def : Pat<(i64 (bitconvert (f64 FPR64:$Rn))), (FMOVxd FPR64:$Rn)>;
-def : Pat<(f64 (bitconvert (i64 GPR64:$Rn))), (FMOVdx GPR64:$Rn)>;
+def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
+def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
+def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
+def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
 
 def lane1_asmoperand : AsmOperandClass {
   let Name = "Lane1";
@@ -2397,7 +2400,7 @@ class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
                (outs Reg:$Rd),
                (ins fmov_operand:$Imm8),
                "fmov\t$Rd, $Imm8",
-               [(set (VT Reg:$Rd), fmov_operand:$Imm8)],
+               [(set VT:$Rd, fmov_operand:$Imm8)],
                NoItinerary>;
 
 def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
@@ -2582,7 +2585,8 @@ defm LDAR  : A64I_LRex<"ldar",  0b101>;
 
 class acquiring_load<PatFrag base>
   : PatFrag<(ops node:$ptr), (base node:$ptr), [{
-  return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+  return Ordering == Acquire || Ordering == SequentiallyConsistent;
 }]>;
 
 def atomic_load_acquire_8  : acquiring_load<atomic_load_8>;
@@ -2590,10 +2594,10 @@ def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
 def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
 def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
 
-def : Pat<(atomic_load_acquire_8  GPR64xsp:$Rn), (LDAR_byte  GPR64xsp0:$Rn)>;
-def : Pat<(atomic_load_acquire_16 GPR64xsp:$Rn), (LDAR_hword GPR64xsp0:$Rn)>;
-def : Pat<(atomic_load_acquire_32 GPR64xsp:$Rn), (LDAR_word  GPR64xsp0:$Rn)>;
-def : Pat<(atomic_load_acquire_64 GPR64xsp:$Rn), (LDAR_dword GPR64xsp0:$Rn)>;
+def : Pat<(atomic_load_acquire_8  i64:$Rn), (LDAR_byte  $Rn)>;
+def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>;
+def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word  $Rn)>;
+def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>;
 
 //===----------------------------------
 // Store-release (no exclusivity)
@@ -2613,7 +2617,8 @@ class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
 
 class releasing_store<PatFrag base>
   : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
-  return cast<AtomicSDNode>(N)->getOrdering() == Release;
+  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+  return Ordering == Release || Ordering == SequentiallyConsistent;
 }]>;
 
 def atomic_store_release_8  : releasing_store<atomic_store_8>;
@@ -2624,22 +2629,22 @@ def atomic_store_release_64 : releasing_store<atomic_store_64>;
 multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
   def _byte:  A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
                             (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                            [(atomic_store_release_8 GPR64xsp0:$Rn, GPR32:$Rt)],
+                            [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
                             NoItinerary>;
 
   def _hword:  A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                           [(atomic_store_release_16 GPR64xsp0:$Rn, GPR32:$Rt)],
+                           [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
                            NoItinerary>;
 
   def _word:  A64I_SLexs_impl<0b10, opcode, asmstr,
                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                           [(atomic_store_release_32 GPR64xsp0:$Rn, GPR32:$Rt)],
+                           [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
                            NoItinerary>;
 
   def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
                            (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
-                           [(atomic_store_release_64 GPR64xsp0:$Rn, GPR64:$Rt)],
+                           [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
                            NoItinerary>;
 }
 
@@ -3596,15 +3601,15 @@ multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
   def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
                          (ins GPR32:$Rn, logical_imm32_operand:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [(set GPR32wsp:$Rd,
-                               (opnode GPR32:$Rn, logical_imm32_operand:$Imm))],
+                         [(set i32:$Rd,
+                               (opnode i32:$Rn, logical_imm32_operand:$Imm))],
                          NoItinerary>;
 
   def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
                          (ins GPR64:$Rn, logical_imm64_operand:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [(set GPR64xsp:$Rd,
-                               (opnode GPR64:$Rn, logical_imm64_operand:$Imm))],
+                         [(set i64:$Rd,
+                               (opnode i64:$Rn, logical_imm64_operand:$Imm))],
                          NoItinerary>;
 }
 
@@ -3655,46 +3660,46 @@ def signed_cond : PatLeaf<(cond), [{
 // when the revolution comes.
 multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
                           bit N, bit commutable,
-                          string asmop, SDPatternOperator opfrag, string sty,
+                          string asmop, SDPatternOperator opfrag, ValueType ty,
                           RegisterClass GPR, list<Register> defs> {
   let isCommutable = commutable, Defs = defs in {
   def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
                        (outs GPR:$Rd),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
-                       [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm,
-                            !cast<Operand>("lsl_operand_" # sty):$Imm6))
+                       [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
 
   def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
                        (outs GPR:$Rd),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
-                       [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm,
-                            !cast<Operand>("lsr_operand_" # sty):$Imm6))
+                       [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
 
   def _asr : A64I_logicalshift<sf, opc, 0b10, N,
                        (outs GPR:$Rd),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
-                       [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm,
-                            !cast<Operand>("asr_operand_" # sty):$Imm6))
+                       [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
 
   def _ror : A64I_logicalshift<sf, opc, 0b11, N,
                        (outs GPR:$Rd),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("ror_operand_" # sty):$Imm6),
+                            !cast<Operand>("ror_operand_" # ty):$Imm6),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
-                       [(set GPR:$Rd, (opfrag GPR:$Rn, (rotr GPR:$Rm,
-                            !cast<Operand>("ror_operand_" # sty):$Imm6))
+                       [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
+                            !cast<Operand>("ror_operand_" # ty):$Imm6))
                        )],
                        NoItinerary>;
   }
@@ -3704,17 +3709,17 @@ multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
                  (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
                                                       GPR:$Rm, 0)>;
 
-  def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
-            (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+  def : Pat<(opfrag ty:$Rn, ty:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
 }
 
 multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
                          string asmop, SDPatternOperator opfrag,
                          list<Register> defs> {
   defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
-                            commutable, asmop, opfrag, "i64", GPR64, defs>;
+                            commutable, asmop, opfrag, i64, GPR64, defs>;
   defm www : logical_shifts<prefix # "www", 0b0, opc, N,
-                            commutable, asmop, opfrag, "i32", GPR32, defs>;
+                            commutable, asmop, opfrag, i32, GPR32, defs>;
 }
 
 
@@ -3741,15 +3746,15 @@ defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
                                   [{ (void)N; return false; }]>,
                           [NZCV]>;
 
-multiclass tst_shifts<string prefix, bit sf, string sty, RegisterClass GPR> {
+multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
   let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
   def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
                        (outs),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
                        "tst\t$Rn, $Rm, $Imm6",
-                       [(set NZCV, (A64setcc (and GPR:$Rn, (shl GPR:$Rm,
-                           !cast<Operand>("lsl_operand_" # sty):$Imm6)),
+                       [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
+                           !cast<Operand>("lsl_operand_" # ty):$Imm6)),
                                           0, signed_cond))],
                        NoItinerary>;
 
@@ -3757,30 +3762,30 @@ multiclass tst_shifts<string prefix, bit sf, string sty, RegisterClass GPR> {
   def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
                        (outs),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
                        "tst\t$Rn, $Rm, $Imm6",
-                       [(set NZCV, (A64setcc (and GPR:$Rn, (srl GPR:$Rm,
-                           !cast<Operand>("lsr_operand_" # sty):$Imm6)),
+                       [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
+                           !cast<Operand>("lsr_operand_" # ty):$Imm6)),
                                           0, signed_cond))],
                        NoItinerary>;
 
   def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
                        (outs),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
                        "tst\t$Rn, $Rm, $Imm6",
-                       [(set NZCV, (A64setcc (and GPR:$Rn, (sra GPR:$Rm,
-                           !cast<Operand>("asr_operand_" # sty):$Imm6)),
+                       [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
+                           !cast<Operand>("asr_operand_" # ty):$Imm6)),
                                           0, signed_cond))],
                        NoItinerary>;
 
   def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
                        (outs),
                        (ins GPR:$Rn, GPR:$Rm,
-                            !cast<Operand>("ror_operand_" # sty):$Imm6),
+                            !cast<Operand>("ror_operand_" # ty):$Imm6),
                        "tst\t$Rn, $Rm, $Imm6",
-                       [(set NZCV, (A64setcc (and GPR:$Rn, (rotr GPR:$Rm,
-                           !cast<Operand>("ror_operand_" # sty):$Imm6)),
+                       [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
+                           !cast<Operand>("ror_operand_" # ty):$Imm6)),
                                           0, signed_cond))],
                        NoItinerary>;
   }
@@ -3788,63 +3793,63 @@ multiclass tst_shifts<string prefix, bit sf, string sty, RegisterClass GPR> {
   def _noshift : InstAlias<"tst $Rn, $Rm",
                      (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
 
-  def : Pat<(A64setcc (and GPR:$Rn, GPR:$Rm), 0, signed_cond),
-            (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+  def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond),
+            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
 }
 
-defm TSTxx : tst_shifts<"TSTxx", 0b1, "i64", GPR64>;
-defm TSTww : tst_shifts<"TSTww", 0b0, "i32", GPR32>;
+defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>;
+defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>;
 
 
-multiclass mvn_shifts<string prefix, bit sf, string sty, RegisterClass GPR> {
+multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
   let isCommutable = 0, Rn = 0b11111 in {
   def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
                        (outs GPR:$Rd),
                        (ins GPR:$Rm,
-                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
                        "mvn\t$Rd, $Rm, $Imm6",
-                       [(set GPR:$Rd, (not (shl GPR:$Rm,
-                         !cast<Operand>("lsl_operand_" # sty):$Imm6)))],
+                       [(set ty:$Rd, (not (shl ty:$Rm,
+                         !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
                        NoItinerary>;
 
 
   def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
                        (outs GPR:$Rd),
                        (ins GPR:$Rm,
-                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
                        "mvn\t$Rd, $Rm, $Imm6",
-                       [(set GPR:$Rd, (not (srl GPR:$Rm,
-                         !cast<Operand>("lsr_operand_" # sty):$Imm6)))],
+                       [(set ty:$Rd, (not (srl ty:$Rm,
+                         !cast<Operand>("lsr_operand_" # ty):$Imm6)))],
                        NoItinerary>;
 
   def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
                        (outs GPR:$Rd),
                        (ins GPR:$Rm,
-                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
                        "mvn\t$Rd, $Rm, $Imm6",
-                       [(set GPR:$Rd, (not (sra GPR:$Rm,
-                         !cast<Operand>("asr_operand_" # sty):$Imm6)))],
+                       [(set ty:$Rd, (not (sra ty:$Rm,
+                         !cast<Operand>("asr_operand_" # ty):$Imm6)))],
                        NoItinerary>;
 
   def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
                        (outs GPR:$Rd),
                        (ins GPR:$Rm,
-                            !cast<Operand>("ror_operand_" # sty):$Imm6),
+                            !cast<Operand>("ror_operand_" # ty):$Imm6),
                        "mvn\t$Rd, $Rm, $Imm6",
-                       [(set GPR:$Rd, (not (rotr GPR:$Rm,
-                         !cast<Operand>("lsl_operand_" # sty):$Imm6)))],
+                       [(set ty:$Rd, (not (rotr ty:$Rm,
+                         !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
                        NoItinerary>;
   }
 
   def _noshift : InstAlias<"mvn $Rn, $Rm",
                      (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
 
-  def : Pat<(not GPR:$Rm),
-            (!cast<Instruction>(prefix # "_lsl") GPR:$Rm, 0)>;
+  def : Pat<(not ty:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>;
 }
 
-defm MVNxx : mvn_shifts<"MVNxx", 0b1, "i64", GPR64>;
-defm MVNww : mvn_shifts<"MVNww", 0b0, "i32", GPR32>;
+defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>;
+defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>;
 
 def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
 def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
@@ -4279,14 +4284,14 @@ let isBranch = 1, isTerminator = 1 in {
   def TBZxii : A64I_TBimm<0b0, (outs),
                         (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
                         "tbz\t$Rt, $Imm, $Label",
-                        [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0),
+                        [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
                                    A64eq, bb:$Label)],
                         NoItinerary>;
 
   def TBNZxii : A64I_TBimm<0b1, (outs),
                         (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
                         "tbnz\t$Rt, $Imm, $Label",
-                        [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0),
+                        [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
                                    A64ne, bb:$Label)],
                         NoItinerary>;
 
@@ -4298,7 +4303,7 @@ let isBranch = 1, isTerminator = 1 in {
   def TBZwii : A64I_TBimm<0b0, (outs),
                         (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
                         "tbz\t$Rt, $Imm, $Label",
-                        [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0),
+                        [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
                                    A64eq, bb:$Label)],
                         NoItinerary> {
     let Imm{5} = 0b0;
@@ -4307,7 +4312,7 @@ let isBranch = 1, isTerminator = 1 in {
   def TBNZwii : A64I_TBimm<0b1, (outs),
                         (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
                         "tbnz\t$Rt, $Imm, $Label",
-                        [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0),
+                        [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
                                    A64ne, bb:$Label)],
                         NoItinerary> {
     let Imm{5} = 0b0;
@@ -4383,13 +4388,13 @@ class A64I_BregImpl<bits<4> opc,
 
 let isBranch = 1 in {
   def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
-                          "br\t$Rn", [(brind GPR64:$Rn)]> {
+                          "br\t$Rn", [(brind i64:$Rn)]> {
     let isBarrier = 1;
     let isTerminator = 1;
   }
 
   def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
-                           "blr\t$Rn", [(AArch64Call GPR64:$Rn)]> {
+                           "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
     let isBarrier = 0;
     let isCall = 1;
     let Defs = [X30];
@@ -4478,7 +4483,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
 
   def TC_RETURNxi
     : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
-                 [(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff))]>;
+                 [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>;
 }
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
@@ -4510,13 +4515,13 @@ def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
 }
 
 def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
-                            [(A64tlsdesc_blr GPR64:$Rn, tglobaltlsaddr:$Var)]> {
+                            [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> {
   let isCall = 1;
   let Defs = [X30];
 }
 
-def : Pat<(A64tlsdesc_blr GPR64:$Rn, texternalsym:$Var),
-          (TLSDESC_BLRx GPR64:$Rn, texternalsym:$Var)>;
+def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var),
+          (TLSDESC_BLRx $Rn, texternalsym:$Var)>;
 
 //===----------------------------------------------------------------------===//
 // Bitfield patterns
@@ -4539,22 +4544,22 @@ def bfi_width_to_imms : SDNodeXForm<imm, [{
 // (either all bits are used or the low 32 bits are used).
 let AddedComplexity = 10 in {
 
-def : Pat<(A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS),
-           (BFIxxii GPR64:$src, GPR64:$Rn,
+def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
+           (BFIxxii $src, $Rn,
                     (bfi64_lsb_to_immr (i64 imm:$ImmR)),
                     (bfi_width_to_imms (i64 imm:$ImmS)))>;
 
-def : Pat<(A64Bfi GPR32:$src, GPR32:$Rn, imm:$ImmR, imm:$ImmS),
-          (BFIwwii GPR32:$src, GPR32:$Rn,
+def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS),
+          (BFIwwii $src, $Rn,
                    (bfi32_lsb_to_immr (i64 imm:$ImmR)),
                    (bfi_width_to_imms (i64 imm:$ImmS)))>;
 
 
-def : Pat<(and (A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS),
+def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
                (i64 4294967295)),
           (SUBREG_TO_REG (i64 0),
-                         (BFIwwii (EXTRACT_SUBREG GPR64:$src, sub_32),
-                                  (EXTRACT_SUBREG GPR64:$Rn, sub_32),
+                         (BFIwwii (EXTRACT_SUBREG $src, sub_32),
+                                  (EXTRACT_SUBREG $Rn, sub_32),
                                   (bfi32_lsb_to_immr (i64 imm:$ImmR)),
                                   (bfi_width_to_imms (i64 imm:$ImmS))),
                          sub_32)>;
@@ -4566,20 +4571,19 @@ def : Pat<(and (A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS),
 //===----------------------------------------------------------------------===//
 
 // Truncation from 64 to 32-bits just involves renaming your register.
-def : Pat<(i32 (trunc (i64 GPR64:$val))), (EXTRACT_SUBREG GPR64:$val, sub_32)>;
+def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>;
 
 // Similarly, extension where we don't care about the high bits is
 // just a rename.
-def : Pat<(i64 (anyext (i32 GPR32:$val))),
-          (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$val, sub_32)>;
+def : Pat<(i64 (anyext i32:$val)),
+          (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>;
 
 // SELECT instructions providing f128 types need to be handled by a
 // pseudo-instruction since the eventual code will need to introduce basic
 // blocks and control flow.
 def F128CSEL : PseudoInst<(outs FPR128:$Rd),
-                          (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
-                          [(set FPR128:$Rd, (simple_select (f128 FPR128:$Rn),
-                                                           FPR128:$Rm))]> {
+                         (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
+                         [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> {
   let Uses = [NZCV];
   let usesCustomInserter = 1;
 }
@@ -4691,13 +4695,13 @@ def atomic_store_simple_i64 : simple_store<atomic_store_64>;
 // Atomic patterns can be shared between integer operations of all sizes, a
 // quick multiclass here allows reuse.
 multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
-                          dag Offset, dag address, RegisterClass TPR,
+                          dag Offset, dag address, ValueType transty,
                           ValueType sty> {
   def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
             (LOAD Base, Offset)>;
 
-  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt),
-            (STORE TPR:$Rt, Base, Offset)>;
+  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
+            (STORE $Rt, Base, Offset)>;
 }
 
 // Instructions accessing a memory chunk smaller than a register (or, in a
@@ -4709,7 +4713,7 @@ multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
 multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
                          dag Base, dag Offset,
                          dag address, ValueType sty>
-  : ls_atomic_pats<LOAD, STORE, Base, Offset, address, GPR32, sty> {
+  : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> {
   def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
 
   def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
@@ -4722,13 +4726,13 @@ multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
   def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
             (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
 
-  def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address),
-            (STORE GPR32:$Rt, Base, Offset)>;
+  def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
+            (STORE $Rt, Base, Offset)>;
 
   // For truncating store from 64-bits, we have to manually tell LLVM to
   // ignore the high bits of the x register.
-  def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address),
-            (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>;
+  def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
+            (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
 }
 
 // Next come patterns for sign-extending loads.
@@ -4744,18 +4748,16 @@ multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
 
 // and finally "natural-width" loads and stores come next.
 multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
-                           dag Offset, dag address, RegisterClass TPR,
-                           ValueType sty> {
+                           dag Offset, dag address, ValueType sty> {
   def : Pat<(sty (load address)), (LOAD Base, Offset)>;
-  def : Pat<(store (sty TPR:$Rt), address), (STORE TPR:$Rt, Base, Offset)>;
+  def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>;
 }
 
 // Integer operations also get atomic instructions to select for.
 multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
-                           dag Offset, dag address, RegisterClass TPR,
-                           ValueType sty>
-  : ls_neutral_pats<LOAD, STORE, Base, Offset, address, TPR, sty>,
-    ls_atomic_pats<LOAD, STORE, Base, Offset, address, TPR, sty>;
+                           dag Offset, dag address, ValueType sty>
+  : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>,
+    ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>;
 
 //===------------------------------
 // 2.2. Addressing-mode instantiations
@@ -4790,7 +4792,7 @@ multiclass uimm12_pats<dag address, dag Base, dag Offset> {
                           !foreach(decls.pattern, address,
                                    !subst(OFFSET, word_uimm12,
                                    !subst(ALIGN, min_align4, decls.pattern))),
-                          GPR32, i32>;
+                          i32>;
 
   defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
                           !foreach(decls.pattern, Offset,
@@ -4798,7 +4800,7 @@ multiclass uimm12_pats<dag address, dag Base, dag Offset> {
                           !foreach(decls.pattern, address,
                                    !subst(OFFSET, dword_uimm12,
                                    !subst(ALIGN, min_align8, decls.pattern))),
-                          GPR64, i64>;
+                          i64>;
 
   defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
                           !foreach(decls.pattern, Offset,
@@ -4806,7 +4808,7 @@ multiclass uimm12_pats<dag address, dag Base, dag Offset> {
                           !foreach(decls.pattern, address,
                                    !subst(OFFSET, hword_uimm12,
                                    !subst(ALIGN, min_align2, decls.pattern))),
-                          FPR16, f16>;
+                          f16>;
 
   defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
                           !foreach(decls.pattern, Offset,
@@ -4814,7 +4816,7 @@ multiclass uimm12_pats<dag address, dag Base, dag Offset> {
                           !foreach(decls.pattern, address,
                                    !subst(OFFSET, word_uimm12,
                                    !subst(ALIGN, min_align4, decls.pattern))),
-                          FPR32, f32>;
+                          f32>;
 
   defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
                           !foreach(decls.pattern, Offset,
@@ -4822,7 +4824,7 @@ multiclass uimm12_pats<dag address, dag Base, dag Offset> {
                           !foreach(decls.pattern, address,
                                    !subst(OFFSET, dword_uimm12,
                                    !subst(ALIGN, min_align8, decls.pattern))),
-                          FPR64, f64>;
+                          f64>;
 
   defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
                           !foreach(decls.pattern, Offset,
@@ -4830,7 +4832,7 @@ multiclass uimm12_pats<dag address, dag Base, dag Offset> {
                           !foreach(decls.pattern, address,
                                    !subst(OFFSET, qword_uimm12,
                                    !subst(ALIGN, min_align16, decls.pattern))),
-                          FPR128, f128>;
+                          f128>;
 
   defm : load_signed_pats<"B", "", Base,
                           !foreach(decls.pattern, Offset,
@@ -4857,13 +4859,13 @@ multiclass uimm12_pats<dag address, dag Base, dag Offset> {
 
 // Straightforward patterns of last resort: a pointer with or without an
 // appropriate offset.
-defm : uimm12_pats<(i64 GPR64xsp:$Rn), (i64 GPR64xsp:$Rn), (i64 0)>;
-defm : uimm12_pats<(add GPR64xsp:$Rn, OFFSET:$UImm12),
-                   (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>;
+defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>;
+defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12),
+                   (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
 
 // The offset could be hidden behind an "or", of course:
-defm : uimm12_pats<(add_like_or GPR64xsp:$Rn, OFFSET:$UImm12),
-                   (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>;
+defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12),
+                   (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
 
 // Global addresses under the small-absolute model should use these
 // instructions. There are ELF relocations specifically for it.
@@ -4897,36 +4899,31 @@ multiclass simm9_pats<dag address, dag Base, dag Offset> {
   defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
   defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
 
-  defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address,
-                             GPR32, i32>;
-  defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address,
-                             GPR64, i64>;
-
-  defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address,
-                         FPR16, f16>;
-  defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address,
-                         FPR32, f32>;
-  defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address,
-                         FPR64, f64>;
+  defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>;
+  defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>;
+
+  defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>;
+  defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>;
+  defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>;
   defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
-                         FPR128, f128>;
+                         f128>;
 
   def : Pat<(i64 (zextloadi32 address)),
             (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
 
-  def : Pat<(truncstorei32 GPR64:$Rt, address),
-            (LS32_STUR (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>;
+  def : Pat<(truncstorei32 i64:$Rt, address),
+            (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
 
   defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
   defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
   def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
 }
 
-defm : simm9_pats<(add GPR64xsp:$Rn, simm9:$SImm9),
-                  (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>;
+defm : simm9_pats<(add i64:$Rn, simm9:$SImm9),
+                  (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
 
-defm : simm9_pats<(add_like_or GPR64xsp:$Rn, simm9:$SImm9),
-                  (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>;
+defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9),
+                  (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
 
 
 //===------------------------------
@@ -4937,12 +4934,12 @@ defm : simm9_pats<(add_like_or GPR64xsp:$Rn, simm9:$SImm9),
 // quick multiclass here allows reuse.
 multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
                           dag Offset, dag Extend, dag address,
-                          RegisterClass TPR, ValueType sty> {
+                          ValueType transty, ValueType sty> {
   def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
             (LOAD Base, Offset, Extend)>;
 
-  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt),
-            (STORE TPR:$Rt, Base, Offset, Extend)>;
+  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
+            (STORE $Rt, Base, Offset, Extend)>;
 }
 
 // The register offset instructions take three operands giving the instruction,
@@ -4953,7 +4950,7 @@ multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
 multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
                          dag Base, dag Offset, dag Extend,
                          dag address, ValueType sty>
-  : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, GPR32, sty> {
+  : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> {
   def : Pat<(!cast<SDNode>(zextload # sty) address),
             (LOAD Base, Offset, Extend)>;
 
@@ -4968,13 +4965,13 @@ multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
   def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
             (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
 
-  def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address),
-            (STORE GPR32:$Rt, Base, Offset, Extend)>;
+  def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
+            (STORE $Rt, Base, Offset, Extend)>;
 
   // For truncating store from 64-bits, we have to manually tell LLVM to
   // ignore the high bits of the x register.
-  def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address),
-            (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset, Extend)>;
+  def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
+            (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>;
 
 }
 
@@ -4993,17 +4990,17 @@ multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
 // and finally "natural-width" loads and stores come next.
 multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
                            dag Base, dag Offset, dag Extend, dag address,
-                           RegisterClass TPR, ValueType sty> {
+                           ValueType sty> {
   def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
-  def : Pat<(store (sty TPR:$Rt), address),
-            (STORE TPR:$Rt, Base, Offset, Extend)>;
+  def : Pat<(store sty:$Rt, address),
+            (STORE $Rt, Base, Offset, Extend)>;
 }
 
 multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
                                dag Base, dag Offset, dag Extend, dag address,
-                               RegisterClass TPR, ValueType sty>
-  : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>,
-    ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>;
+                               ValueType sty>
+  : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>,
+    ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>;
 
 multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
                        dag Extend> {
@@ -5032,7 +5029,7 @@ multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
                             Base, Offset, Extend,
                             !foreach(decls.pattern, address,
                                      !subst(SHIFT, imm_eq2, decls.pattern)),
-                            GPR32, i32>;
+                            i32>;
 
   defm : ro_int_neutral_pats<
                             !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
@@ -5040,45 +5037,45 @@ multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
                             Base, Offset, Extend,
                             !foreach(decls.pattern, address,
                                      !subst(SHIFT, imm_eq3, decls.pattern)),
-                            GPR64, i64>;
+                            i64>;
 
   defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
                          !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
                          Base, Offset, Extend,
                          !foreach(decls.pattern, address,
                                   !subst(SHIFT, imm_eq1, decls.pattern)),
-                         FPR16, f16>;
+                         f16>;
 
   defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
                          !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
                          Base, Offset, Extend,
                          !foreach(decls.pattern, address,
                                   !subst(SHIFT, imm_eq2, decls.pattern)),
-                         FPR32, f32>;
+                         f32>;
 
   defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
                          !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
                          Base, Offset, Extend,
                          !foreach(decls.pattern, address,
                                   !subst(SHIFT, imm_eq3, decls.pattern)),
-                         FPR64, f64>;
+                         f64>;
 
   defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
                          !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
                          Base, Offset, Extend,
                          !foreach(decls.pattern, address,
                                   !subst(SHIFT, imm_eq4, decls.pattern)),
-                         FPR128, f128>;
+                         f128>;
 
   defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
-                          !foreach(decls.pattern, address,
-                                   !subst(SHIFT, imm_eq0, decls.pattern)),
-                          i8>;
+                        !foreach(decls.pattern, address,
+                                 !subst(SHIFT, imm_eq0, decls.pattern)),
+                        i8>;
 
   defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
-                          !foreach(decls.pattern, address,
-                                   !subst(SHIFT, imm_eq1, decls.pattern)),
-                          i16>;
+                        !foreach(decls.pattern, address,
+                                 !subst(SHIFT, imm_eq1, decls.pattern)),
+                        i16>;
 
   def : Pat<(sextloadi32 !foreach(decls.pattern, address,
                                   !subst(SHIFT, imm_eq2, decls.pattern))),
@@ -5091,20 +5088,20 @@ multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
 // using register-offset instructions. Essentially a base plus a possibly
 // extended, possibly shifted (by access size) offset.
 
-defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (sext GPR32:$Rm)),
-                   (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 6)>;
+defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)),
+                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>;
 
-defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (sext GPR32:$Rm), SHIFT)),
-                   (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 7)>;
+defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)),
+                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>;
 
-defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (zext GPR32:$Rm)),
-                   (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 2)>;
+defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)),
+                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>;
 
-defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (zext GPR32:$Rm), SHIFT)),
-                   (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 3)>;
+defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)),
+                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>;
 
-defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, GPR64:$Rm),
-                   (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 2)>;
+defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
+                   (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>;
 
-defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, (shl GPR64:$Rm, SHIFT)),
-                   (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 3)>;
+defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
+                   (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 2e9205fc9924..3501cf45ebc7 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -47,6 +47,9 @@ class AArch64Subtarget : public AArch64GenSubtargetInfo {
 
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+  bool isTargetAndroid() const {
+    return TargetTriple.getEnvironment() == Triple::Android;
+  }
 
 };
 } // End llvm namespace
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index b83577af45c6..3b811df212d1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -63,14 +63,15 @@ class AArch64ELFStreamer : public MCELFStreamer {
 
   ~AArch64ELFStreamer() {}
 
-  virtual void ChangeSection(const MCSection *Section) {
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection) {
     // We have to keep track of the mapping symbol state of any sections we
     // use. Each one should start off as EMS_None, which is provided as the
     // default constructor by DenseMap::lookup.
-    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastMappingSymbols[getPreviousSection().first] = LastEMS;
     LastEMS = LastMappingSymbols.lookup(Section);
 
-    MCELFStreamer::ChangeSection(Section);
+    MCELFStreamer::ChangeSection(Section, Subsection);
   }
 
   /// This function is the one used to emit instruction data into the ELF
@@ -129,7 +130,7 @@ class AArch64ELFStreamer : public MCELFStreamer {
     MCELF::SetType(SD, ELF::STT_NOTYPE);
     MCELF::SetBinding(SD, ELF::STB_LOCAL);
     SD.setExternal(false);
-    Symbol->setSection(*getCurrentSection());
+    Symbol->setSection(*getCurrentSection().first);
 
     const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
     Symbol->setVariableValue(Value);
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index ab9bba183690..bedccb5438f6 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -194,7 +194,55 @@ const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
   {"rvbar_el3", RVBAR_EL3},
   {"isr_el1", ISR_EL1},
   {"cntpct_el0", CNTPCT_EL0},
-  {"cntvct_el0", CNTVCT_EL0}
+  {"cntvct_el0", CNTVCT_EL0},
+
+  // Trace registers
+  {"trcstatr", TRCSTATR},
+  {"trcidr8", TRCIDR8},
+  {"trcidr9", TRCIDR9},
+  {"trcidr10", TRCIDR10},
+  {"trcidr11", TRCIDR11},
+  {"trcidr12", TRCIDR12},
+  {"trcidr13", TRCIDR13},
+  {"trcidr0", TRCIDR0},
+  {"trcidr1", TRCIDR1},
+  {"trcidr2", TRCIDR2},
+  {"trcidr3", TRCIDR3},
+  {"trcidr4", TRCIDR4},
+  {"trcidr5", TRCIDR5},
+  {"trcidr6", TRCIDR6},
+  {"trcidr7", TRCIDR7},
+  {"trcoslsr", TRCOSLSR},
+  {"trcpdsr", TRCPDSR},
+  {"trcdevaff0", TRCDEVAFF0},
+  {"trcdevaff1", TRCDEVAFF1},
+  {"trclsr", TRCLSR},
+  {"trcauthstatus", TRCAUTHSTATUS},
+  {"trcdevarch", TRCDEVARCH},
+  {"trcdevid", TRCDEVID},
+  {"trcdevtype", TRCDEVTYPE},
+  {"trcpidr4", TRCPIDR4},
+  {"trcpidr5", TRCPIDR5},
+  {"trcpidr6", TRCPIDR6},
+  {"trcpidr7", TRCPIDR7},
+  {"trcpidr0", TRCPIDR0},
+  {"trcpidr1", TRCPIDR1},
+  {"trcpidr2", TRCPIDR2},
+  {"trcpidr3", TRCPIDR3},
+  {"trccidr0", TRCCIDR0},
+  {"trccidr1", TRCCIDR1},
+  {"trccidr2", TRCCIDR2},
+  {"trccidr3", TRCCIDR3},
+
+  // GICv3 registers
+  {"icc_iar1_el1", ICC_IAR1_EL1},
+  {"icc_iar0_el1", ICC_IAR0_EL1},
+  {"icc_hppir1_el1", ICC_HPPIR1_EL1},
+  {"icc_hppir0_el1", ICC_HPPIR0_EL1},
+  {"icc_rpr_el1", ICC_RPR_EL1},
+  {"ich_vtr_el2", ICH_VTR_EL2},
+  {"ich_eisr_el2", ICH_EISR_EL2},
+  {"ich_elsr_el2", ICH_ELSR_EL2}
 };
 
 A64SysReg::MRSMapper::MRSMapper() {
@@ -205,7 +253,19 @@ A64SysReg::MRSMapper::MRSMapper() {
 const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
   {"dbgdtrtx_el0", DBGDTRTX_EL0},
   {"oslar_el1", OSLAR_EL1},
-  {"pmswinc_el0", PMSWINC_EL0}
+  {"pmswinc_el0", PMSWINC_EL0},
+
+  // Trace registers
+  {"trcoslar", TRCOSLAR},
+  {"trclar", TRCLAR},
+
+  // GICv3 registers
+  {"icc_eoir1_el1", ICC_EOIR1_EL1},
+  {"icc_eoir0_el1", ICC_EOIR0_EL1},
+  {"icc_dir_el1", ICC_DIR_EL1},
+  {"icc_sgi1r_el1", ICC_SGI1R_EL1},
+  {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
+  {"icc_sgi0r_el1", ICC_SGI0R_EL1}
 };
 
 A64SysReg::MSRMapper::MSRMapper() {
@@ -467,6 +527,230 @@ const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
   {"pmevtyper28_el0", PMEVTYPER28_EL0},
   {"pmevtyper29_el0", PMEVTYPER29_EL0},
   {"pmevtyper30_el0", PMEVTYPER30_EL0},
+
+  // Trace registers
+  {"trcprgctlr", TRCPRGCTLR},
+  {"trcprocselr", TRCPROCSELR},
+  {"trcconfigr", TRCCONFIGR},
+  {"trcauxctlr", TRCAUXCTLR},
+  {"trceventctl0r", TRCEVENTCTL0R},
+  {"trceventctl1r", TRCEVENTCTL1R},
+  {"trcstallctlr", TRCSTALLCTLR},
+  {"trctsctlr", TRCTSCTLR},
+  {"trcsyncpr", TRCSYNCPR},
+  {"trcccctlr", TRCCCCTLR},
+  {"trcbbctlr", TRCBBCTLR},
+  {"trctraceidr", TRCTRACEIDR},
+  {"trcqctlr", TRCQCTLR},
+  {"trcvictlr", TRCVICTLR},
+  {"trcviiectlr", TRCVIIECTLR},
+  {"trcvissctlr", TRCVISSCTLR},
+  {"trcvipcssctlr", TRCVIPCSSCTLR},
+  {"trcvdctlr", TRCVDCTLR},
+  {"trcvdsacctlr", TRCVDSACCTLR},
+  {"trcvdarcctlr", TRCVDARCCTLR},
+  {"trcseqevr0", TRCSEQEVR0},
+  {"trcseqevr1", TRCSEQEVR1},
+  {"trcseqevr2", TRCSEQEVR2},
+  {"trcseqrstevr", TRCSEQRSTEVR},
+  {"trcseqstr", TRCSEQSTR},
+  {"trcextinselr", TRCEXTINSELR},
+  {"trccntrldvr0", TRCCNTRLDVR0},
+  {"trccntrldvr1", TRCCNTRLDVR1},
+  {"trccntrldvr2", TRCCNTRLDVR2},
+  {"trccntrldvr3", TRCCNTRLDVR3},
+  {"trccntctlr0", TRCCNTCTLR0},
+  {"trccntctlr1", TRCCNTCTLR1},
+  {"trccntctlr2", TRCCNTCTLR2},
+  {"trccntctlr3", TRCCNTCTLR3},
+  {"trccntvr0", TRCCNTVR0},
+  {"trccntvr1", TRCCNTVR1},
+  {"trccntvr2", TRCCNTVR2},
+  {"trccntvr3", TRCCNTVR3},
+  {"trcimspec0", TRCIMSPEC0},
+  {"trcimspec1", TRCIMSPEC1},
+  {"trcimspec2", TRCIMSPEC2},
+  {"trcimspec3", TRCIMSPEC3},
+  {"trcimspec4", TRCIMSPEC4},
+  {"trcimspec5", TRCIMSPEC5},
+  {"trcimspec6", TRCIMSPEC6},
+  {"trcimspec7", TRCIMSPEC7},
+  {"trcrsctlr2", TRCRSCTLR2},
+  {"trcrsctlr3", TRCRSCTLR3},
+  {"trcrsctlr4", TRCRSCTLR4},
+  {"trcrsctlr5", TRCRSCTLR5},
+  {"trcrsctlr6", TRCRSCTLR6},
+  {"trcrsctlr7", TRCRSCTLR7},
+  {"trcrsctlr8", TRCRSCTLR8},
+  {"trcrsctlr9", TRCRSCTLR9},
+  {"trcrsctlr10", TRCRSCTLR10},
+  {"trcrsctlr11", TRCRSCTLR11},
+  {"trcrsctlr12", TRCRSCTLR12},
+  {"trcrsctlr13", TRCRSCTLR13},
+  {"trcrsctlr14", TRCRSCTLR14},
+  {"trcrsctlr15", TRCRSCTLR15},
+  {"trcrsctlr16", TRCRSCTLR16},
+  {"trcrsctlr17", TRCRSCTLR17},
+  {"trcrsctlr18", TRCRSCTLR18},
+  {"trcrsctlr19", TRCRSCTLR19},
+  {"trcrsctlr20", TRCRSCTLR20},
+  {"trcrsctlr21", TRCRSCTLR21},
+  {"trcrsctlr22", TRCRSCTLR22},
+  {"trcrsctlr23", TRCRSCTLR23},
+  {"trcrsctlr24", TRCRSCTLR24},
+  {"trcrsctlr25", TRCRSCTLR25},
+  {"trcrsctlr26", TRCRSCTLR26},
+  {"trcrsctlr27", TRCRSCTLR27},
+  {"trcrsctlr28", TRCRSCTLR28},
+  {"trcrsctlr29", TRCRSCTLR29},
+  {"trcrsctlr30", TRCRSCTLR30},
+  {"trcrsctlr31", TRCRSCTLR31},
+  {"trcssccr0", TRCSSCCR0},
+  {"trcssccr1", TRCSSCCR1},
+  {"trcssccr2", TRCSSCCR2},
+  {"trcssccr3", TRCSSCCR3},
+  {"trcssccr4", TRCSSCCR4},
+  {"trcssccr5", TRCSSCCR5},
+  {"trcssccr6", TRCSSCCR6},
+  {"trcssccr7", TRCSSCCR7},
+  {"trcsscsr0", TRCSSCSR0},
+  {"trcsscsr1", TRCSSCSR1},
+  {"trcsscsr2", TRCSSCSR2},
+  {"trcsscsr3", TRCSSCSR3},
+  {"trcsscsr4", TRCSSCSR4},
+  {"trcsscsr5", TRCSSCSR5},
+  {"trcsscsr6", TRCSSCSR6},
+  {"trcsscsr7", TRCSSCSR7},
+  {"trcsspcicr0", TRCSSPCICR0},
+  {"trcsspcicr1", TRCSSPCICR1},
+  {"trcsspcicr2", TRCSSPCICR2},
+  {"trcsspcicr3", TRCSSPCICR3},
+  {"trcsspcicr4", TRCSSPCICR4},
+  {"trcsspcicr5", TRCSSPCICR5},
+  {"trcsspcicr6", TRCSSPCICR6},
+  {"trcsspcicr7", TRCSSPCICR7},
+  {"trcpdcr", TRCPDCR},
+  {"trcacvr0", TRCACVR0},
+  {"trcacvr1", TRCACVR1},
+  {"trcacvr2", TRCACVR2},
+  {"trcacvr3", TRCACVR3},
+  {"trcacvr4", TRCACVR4},
+  {"trcacvr5", TRCACVR5},
+  {"trcacvr6", TRCACVR6},
+  {"trcacvr7", TRCACVR7},
+  {"trcacvr8", TRCACVR8},
+  {"trcacvr9", TRCACVR9},
+  {"trcacvr10", TRCACVR10},
+  {"trcacvr11", TRCACVR11},
+  {"trcacvr12", TRCACVR12},
+  {"trcacvr13", TRCACVR13},
+  {"trcacvr14", TRCACVR14},
+  {"trcacvr15", TRCACVR15},
+  {"trcacatr0", TRCACATR0},
+  {"trcacatr1", TRCACATR1},
+  {"trcacatr2", TRCACATR2},
+  {"trcacatr3", TRCACATR3},
+  {"trcacatr4", TRCACATR4},
+  {"trcacatr5", TRCACATR5},
+  {"trcacatr6", TRCACATR6},
+  {"trcacatr7", TRCACATR7},
+  {"trcacatr8", TRCACATR8},
+  {"trcacatr9", TRCACATR9},
+  {"trcacatr10", TRCACATR10},
+  {"trcacatr11", TRCACATR11},
+  {"trcacatr12", TRCACATR12},
+  {"trcacatr13", TRCACATR13},
+  {"trcacatr14", TRCACATR14},
+  {"trcacatr15", TRCACATR15},
+  {"trcdvcvr0", TRCDVCVR0},
+  {"trcdvcvr1", TRCDVCVR1},
+  {"trcdvcvr2", TRCDVCVR2},
+  {"trcdvcvr3", TRCDVCVR3},
+  {"trcdvcvr4", TRCDVCVR4},
+  {"trcdvcvr5", TRCDVCVR5},
+  {"trcdvcvr6", TRCDVCVR6},
+  {"trcdvcvr7", TRCDVCVR7},
+  {"trcdvcmr0", TRCDVCMR0},
+  {"trcdvcmr1", TRCDVCMR1},
+  {"trcdvcmr2", TRCDVCMR2},
+  {"trcdvcmr3", TRCDVCMR3},
+  {"trcdvcmr4", TRCDVCMR4},
+  {"trcdvcmr5", TRCDVCMR5},
+  {"trcdvcmr6", TRCDVCMR6},
+  {"trcdvcmr7", TRCDVCMR7},
+  {"trccidcvr0", TRCCIDCVR0},
+  {"trccidcvr1", TRCCIDCVR1},
+  {"trccidcvr2", TRCCIDCVR2},
+  {"trccidcvr3", TRCCIDCVR3},
+  {"trccidcvr4", TRCCIDCVR4},
+  {"trccidcvr5", TRCCIDCVR5},
+  {"trccidcvr6", TRCCIDCVR6},
+  {"trccidcvr7", TRCCIDCVR7},
+  {"trcvmidcvr0", TRCVMIDCVR0},
+  {"trcvmidcvr1", TRCVMIDCVR1},
+  {"trcvmidcvr2", TRCVMIDCVR2},
+  {"trcvmidcvr3", TRCVMIDCVR3},
+  {"trcvmidcvr4", TRCVMIDCVR4},
+  {"trcvmidcvr5", TRCVMIDCVR5},
+  {"trcvmidcvr6", TRCVMIDCVR6},
+  {"trcvmidcvr7", TRCVMIDCVR7},
+  {"trccidcctlr0", TRCCIDCCTLR0},
+  {"trccidcctlr1", TRCCIDCCTLR1},
+  {"trcvmidcctlr0", TRCVMIDCCTLR0},
+  {"trcvmidcctlr1", TRCVMIDCCTLR1},
+  {"trcitctrl", TRCITCTRL},
+  {"trcclaimset", TRCCLAIMSET},
+  {"trcclaimclr", TRCCLAIMCLR},
+
+  // GICv3 registers
+  {"icc_bpr1_el1", ICC_BPR1_EL1},
+  {"icc_bpr0_el1", ICC_BPR0_EL1},
+  {"icc_pmr_el1", ICC_PMR_EL1},
+  {"icc_ctlr_el1", ICC_CTLR_EL1},
+  {"icc_ctlr_el3", ICC_CTLR_EL3},
+  {"icc_sre_el1", ICC_SRE_EL1},
+  {"icc_sre_el2", ICC_SRE_EL2},
+  {"icc_sre_el3", ICC_SRE_EL3},
+  {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
+  {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
+  {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
+  {"icc_seien_el1", ICC_SEIEN_EL1},
+  {"icc_ap0r0_el1", ICC_AP0R0_EL1},
+  {"icc_ap0r1_el1", ICC_AP0R1_EL1},
+  {"icc_ap0r2_el1", ICC_AP0R2_EL1},
+  {"icc_ap0r3_el1", ICC_AP0R3_EL1},
+  {"icc_ap1r0_el1", ICC_AP1R0_EL1},
+  {"icc_ap1r1_el1", ICC_AP1R1_EL1},
+  {"icc_ap1r2_el1", ICC_AP1R2_EL1},
+  {"icc_ap1r3_el1", ICC_AP1R3_EL1},
+  {"ich_ap0r0_el2", ICH_AP0R0_EL2},
+  {"ich_ap0r1_el2", ICH_AP0R1_EL2},
+  {"ich_ap0r2_el2", ICH_AP0R2_EL2},
+  {"ich_ap0r3_el2", ICH_AP0R3_EL2},
+  {"ich_ap1r0_el2", ICH_AP1R0_EL2},
+  {"ich_ap1r1_el2", ICH_AP1R1_EL2},
+  {"ich_ap1r2_el2", ICH_AP1R2_EL2},
+  {"ich_ap1r3_el2", ICH_AP1R3_EL2},
+  {"ich_hcr_el2", ICH_HCR_EL2},
+  {"ich_misr_el2", ICH_MISR_EL2},
+  {"ich_vmcr_el2", ICH_VMCR_EL2},
+  {"ich_vseir_el2", ICH_VSEIR_EL2},
+  {"ich_lr0_el2", ICH_LR0_EL2},
+  {"ich_lr1_el2", ICH_LR1_EL2},
+  {"ich_lr2_el2", ICH_LR2_EL2},
+  {"ich_lr3_el2", ICH_LR3_EL2},
+  {"ich_lr4_el2", ICH_LR4_EL2},
+  {"ich_lr5_el2", ICH_LR5_EL2},
+  {"ich_lr6_el2", ICH_LR6_EL2},
+  {"ich_lr7_el2", ICH_LR7_EL2},
+  {"ich_lr8_el2", ICH_LR8_EL2},
+  {"ich_lr9_el2", ICH_LR9_EL2},
+  {"ich_lr10_el2", ICH_LR10_EL2},
+  {"ich_lr11_el2", ICH_LR11_EL2},
+  {"ich_lr12_el2", ICH_LR12_EL2},
+  {"ich_lr13_el2", ICH_LR13_EL2},
+  {"ich_lr14_el2", ICH_LR14_EL2},
+  {"ich_lr15_el2", ICH_LR15_EL2}
 };
 
 uint32_t
@@ -697,8 +981,11 @@ bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
       Rotation = RepeatWidth - Rotation;
     }
 
-    uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
-      | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+    uint64_t ReplicatedOnes = ReplicatedMask;
+    if (Rotation != 0 && Rotation != 64)
+      ReplicatedOnes = (ReplicatedMask >> Rotation)
+        | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+
     // Of course, they may not actually be ones, so we have to check that:
     if (!isMask_64(ReplicatedOnes))
       continue;
@@ -767,13 +1054,14 @@ bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
   int Rotation = (ImmR & (Width - 1));
   uint64_t Mask = (1ULL << Num1s) - 1;
   uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
-  Mask = (Mask >> Rotation)
-    | ((Mask << (Width - Rotation)) & WidthMask);
+  if (Rotation != 0 && Rotation != 64)
+    Mask = (Mask >> Rotation)
+      | ((Mask << (Width - Rotation)) & WidthMask);
 
-  Imm = 0;
-  for (unsigned i = 0; i < RegWidth / Width; ++i) {
-    Imm |= Mask;
+  Imm = Mask;
+  for (unsigned i = 1; i < RegWidth / Width; ++i) {
     Mask <<= Width;
+    Imm |= Mask;
   }
 
   return true;
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 5eebf4443177..1b773d632ebe 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -354,13 +354,73 @@ namespace A64SysReg {
     RVBAR_EL3         = 0xf601, // 11  110  1100  0000  001
     ISR_EL1           = 0xc608, // 11  000  1100  0001  000
     CNTPCT_EL0        = 0xdf01, // 11  011  1110  0000  001
-    CNTVCT_EL0        = 0xdf02  // 11  011  1110  0000  010
+    CNTVCT_EL0        = 0xdf02,  // 11  011  1110  0000  010
+
+    // Trace registers
+    TRCSTATR          = 0x8818, // 10  001  0000  0011  000
+    TRCIDR8           = 0x8806, // 10  001  0000  0000  110
+    TRCIDR9           = 0x880e, // 10  001  0000  0001  110
+    TRCIDR10          = 0x8816, // 10  001  0000  0010  110
+    TRCIDR11          = 0x881e, // 10  001  0000  0011  110
+    TRCIDR12          = 0x8826, // 10  001  0000  0100  110
+    TRCIDR13          = 0x882e, // 10  001  0000  0101  110
+    TRCIDR0           = 0x8847, // 10  001  0000  1000  111
+    TRCIDR1           = 0x884f, // 10  001  0000  1001  111
+    TRCIDR2           = 0x8857, // 10  001  0000  1010  111
+    TRCIDR3           = 0x885f, // 10  001  0000  1011  111
+    TRCIDR4           = 0x8867, // 10  001  0000  1100  111
+    TRCIDR5           = 0x886f, // 10  001  0000  1101  111
+    TRCIDR6           = 0x8877, // 10  001  0000  1110  111
+    TRCIDR7           = 0x887f, // 10  001  0000  1111  111
+    TRCOSLSR          = 0x888c, // 10  001  0001  0001  100
+    TRCPDSR           = 0x88ac, // 10  001  0001  0101  100
+    TRCDEVAFF0        = 0x8bd6, // 10  001  0111  1010  110
+    TRCDEVAFF1        = 0x8bde, // 10  001  0111  1011  110
+    TRCLSR            = 0x8bee, // 10  001  0111  1101  110
+    TRCAUTHSTATUS     = 0x8bf6, // 10  001  0111  1110  110
+    TRCDEVARCH        = 0x8bfe, // 10  001  0111  1111  110
+    TRCDEVID          = 0x8b97, // 10  001  0111  0010  111
+    TRCDEVTYPE        = 0x8b9f, // 10  001  0111  0011  111
+    TRCPIDR4          = 0x8ba7, // 10  001  0111  0100  111
+    TRCPIDR5          = 0x8baf, // 10  001  0111  0101  111
+    TRCPIDR6          = 0x8bb7, // 10  001  0111  0110  111
+    TRCPIDR7          = 0x8bbf, // 10  001  0111  0111  111
+    TRCPIDR0          = 0x8bc7, // 10  001  0111  1000  111
+    TRCPIDR1          = 0x8bcf, // 10  001  0111  1001  111
+    TRCPIDR2          = 0x8bd7, // 10  001  0111  1010  111
+    TRCPIDR3          = 0x8bdf, // 10  001  0111  1011  111
+    TRCCIDR0          = 0x8be7, // 10  001  0111  1100  111
+    TRCCIDR1          = 0x8bef, // 10  001  0111  1101  111
+    TRCCIDR2          = 0x8bf7, // 10  001  0111  1110  111
+    TRCCIDR3          = 0x8bff, // 10  001  0111  1111  111
+
+    // GICv3 registers
+    ICC_IAR1_EL1      = 0xc660, // 11  000  1100  1100  000
+    ICC_IAR0_EL1      = 0xc640, // 11  000  1100  1000  000
+    ICC_HPPIR1_EL1    = 0xc662, // 11  000  1100  1100  010
+    ICC_HPPIR0_EL1    = 0xc642, // 11  000  1100  1000  010
+    ICC_RPR_EL1       = 0xc65b, // 11  000  1100  1011  011
+    ICH_VTR_EL2       = 0xe659, // 11  100  1100  1011  001
+    ICH_EISR_EL2      = 0xe65b, // 11  100  1100  1011  011
+    ICH_ELSR_EL2      = 0xe65d  // 11  100  1100  1011  101
   };
 
   enum SysRegWOValues {
     DBGDTRTX_EL0      = 0x9828, // 10  011  0000  0101  000
     OSLAR_EL1         = 0x8084, // 10  000  0001  0000  100
-    PMSWINC_EL0       = 0xdce4  // 11  011  1001  1100  100
+    PMSWINC_EL0       = 0xdce4,  // 11  011  1001  1100  100
+
+    // Trace Registers
+    TRCOSLAR          = 0x8884, // 10  001  0001  0000  100
+    TRCLAR            = 0x8be6, // 10  001  0111  1100  110
+
+    // GICv3 registers
+    ICC_EOIR1_EL1     = 0xc661, // 11  000  1100  1100  001
+    ICC_EOIR0_EL1     = 0xc641, // 11  000  1100  1000  001
+    ICC_DIR_EL1       = 0xc659, // 11  000  1100  1011  001
+    ICC_SGI1R_EL1     = 0xc65d, // 11  000  1100  1011  101
+    ICC_ASGI1R_EL1    = 0xc65e, // 11  000  1100  1011  110
+    ICC_SGI0R_EL1     = 0xc65f  // 11  000  1100  1011  111
   };
 
   enum SysRegValues {
@@ -616,7 +676,231 @@ namespace A64SysReg {
     PMEVTYPER27_EL0   = 0xdf7b, // 11  011  1110  1111  011
     PMEVTYPER28_EL0   = 0xdf7c, // 11  011  1110  1111  100
     PMEVTYPER29_EL0   = 0xdf7d, // 11  011  1110  1111  101
-    PMEVTYPER30_EL0   = 0xdf7e  // 11  011  1110  1111  110
+    PMEVTYPER30_EL0   = 0xdf7e, // 11  011  1110  1111  110
+
+    // Trace registers
+    TRCPRGCTLR        = 0x8808, // 10  001  0000  0001  000
+    TRCPROCSELR       = 0x8810, // 10  001  0000  0010  000
+    TRCCONFIGR        = 0x8820, // 10  001  0000  0100  000
+    TRCAUXCTLR        = 0x8830, // 10  001  0000  0110  000
+    TRCEVENTCTL0R     = 0x8840, // 10  001  0000  1000  000
+    TRCEVENTCTL1R     = 0x8848, // 10  001  0000  1001  000
+    TRCSTALLCTLR      = 0x8858, // 10  001  0000  1011  000
+    TRCTSCTLR         = 0x8860, // 10  001  0000  1100  000
+    TRCSYNCPR         = 0x8868, // 10  001  0000  1101  000
+    TRCCCCTLR         = 0x8870, // 10  001  0000  1110  000
+    TRCBBCTLR         = 0x8878, // 10  001  0000  1111  000
+    TRCTRACEIDR       = 0x8801, // 10  001  0000  0000  001
+    TRCQCTLR          = 0x8809, // 10  001  0000  0001  001
+    TRCVICTLR         = 0x8802, // 10  001  0000  0000  010
+    TRCVIIECTLR       = 0x880a, // 10  001  0000  0001  010
+    TRCVISSCTLR       = 0x8812, // 10  001  0000  0010  010
+    TRCVIPCSSCTLR     = 0x881a, // 10  001  0000  0011  010
+    TRCVDCTLR         = 0x8842, // 10  001  0000  1000  010
+    TRCVDSACCTLR      = 0x884a, // 10  001  0000  1001  010
+    TRCVDARCCTLR      = 0x8852, // 10  001  0000  1010  010
+    TRCSEQEVR0        = 0x8804, // 10  001  0000  0000  100
+    TRCSEQEVR1        = 0x880c, // 10  001  0000  0001  100
+    TRCSEQEVR2        = 0x8814, // 10  001  0000  0010  100
+    TRCSEQRSTEVR      = 0x8834, // 10  001  0000  0110  100
+    TRCSEQSTR         = 0x883c, // 10  001  0000  0111  100
+    TRCEXTINSELR      = 0x8844, // 10  001  0000  1000  100
+    TRCCNTRLDVR0      = 0x8805, // 10  001  0000  0000  101
+    TRCCNTRLDVR1      = 0x880d, // 10  001  0000  0001  101
+    TRCCNTRLDVR2      = 0x8815, // 10  001  0000  0010  101
+    TRCCNTRLDVR3      = 0x881d, // 10  001  0000  0011  101
+    TRCCNTCTLR0       = 0x8825, // 10  001  0000  0100  101
+    TRCCNTCTLR1       = 0x882d, // 10  001  0000  0101  101
+    TRCCNTCTLR2       = 0x8835, // 10  001  0000  0110  101
+    TRCCNTCTLR3       = 0x883d, // 10  001  0000  0111  101
+    TRCCNTVR0         = 0x8845, // 10  001  0000  1000  101
+    TRCCNTVR1         = 0x884d, // 10  001  0000  1001  101
+    TRCCNTVR2         = 0x8855, // 10  001  0000  1010  101
+    TRCCNTVR3         = 0x885d, // 10  001  0000  1011  101
+    TRCIMSPEC0        = 0x8807, // 10  001  0000  0000  111
+    TRCIMSPEC1        = 0x880f, // 10  001  0000  0001  111
+    TRCIMSPEC2        = 0x8817, // 10  001  0000  0010  111
+    TRCIMSPEC3        = 0x881f, // 10  001  0000  0011  111
+    TRCIMSPEC4        = 0x8827, // 10  001  0000  0100  111
+    TRCIMSPEC5        = 0x882f, // 10  001  0000  0101  111
+    TRCIMSPEC6        = 0x8837, // 10  001  0000  0110  111
+    TRCIMSPEC7        = 0x883f, // 10  001  0000  0111  111
+    TRCRSCTLR2        = 0x8890, // 10  001  0001  0010  000
+    TRCRSCTLR3        = 0x8898, // 10  001  0001  0011  000
+    TRCRSCTLR4        = 0x88a0, // 10  001  0001  0100  000
+    TRCRSCTLR5        = 0x88a8, // 10  001  0001  0101  000
+    TRCRSCTLR6        = 0x88b0, // 10  001  0001  0110  000
+    TRCRSCTLR7        = 0x88b8, // 10  001  0001  0111  000
+    TRCRSCTLR8        = 0x88c0, // 10  001  0001  1000  000
+    TRCRSCTLR9        = 0x88c8, // 10  001  0001  1001  000
+    TRCRSCTLR10       = 0x88d0, // 10  001  0001  1010  000
+    TRCRSCTLR11       = 0x88d8, // 10  001  0001  1011  000
+    TRCRSCTLR12       = 0x88e0, // 10  001  0001  1100  000
+    TRCRSCTLR13       = 0x88e8, // 10  001  0001  1101  000
+    TRCRSCTLR14       = 0x88f0, // 10  001  0001  1110  000
+    TRCRSCTLR15       = 0x88f8, // 10  001  0001  1111  000
+    TRCRSCTLR16       = 0x8881, // 10  001  0001  0000  001
+    TRCRSCTLR17       = 0x8889, // 10  001  0001  0001  001
+    TRCRSCTLR18       = 0x8891, // 10  001  0001  0010  001
+    TRCRSCTLR19       = 0x8899, // 10  001  0001  0011  001
+    TRCRSCTLR20       = 0x88a1, // 10  001  0001  0100  001
+    TRCRSCTLR21       = 0x88a9, // 10  001  0001  0101  001
+    TRCRSCTLR22       = 0x88b1, // 10  001  0001  0110  001
+    TRCRSCTLR23       = 0x88b9, // 10  001  0001  0111  001
+    TRCRSCTLR24       = 0x88c1, // 10  001  0001  1000  001
+    TRCRSCTLR25       = 0x88c9, // 10  001  0001  1001  001
+    TRCRSCTLR26       = 0x88d1, // 10  001  0001  1010  001
+    TRCRSCTLR27       = 0x88d9, // 10  001  0001  1011  001
+    TRCRSCTLR28       = 0x88e1, // 10  001  0001  1100  001
+    TRCRSCTLR29       = 0x88e9, // 10  001  0001  1101  001
+    TRCRSCTLR30       = 0x88f1, // 10  001  0001  1110  001
+    TRCRSCTLR31       = 0x88f9, // 10  001  0001  1111  001
+    TRCSSCCR0         = 0x8882, // 10  001  0001  0000  010
+    TRCSSCCR1         = 0x888a, // 10  001  0001  0001  010
+    TRCSSCCR2         = 0x8892, // 10  001  0001  0010  010
+    TRCSSCCR3         = 0x889a, // 10  001  0001  0011  010
+    TRCSSCCR4         = 0x88a2, // 10  001  0001  0100  010
+    TRCSSCCR5         = 0x88aa, // 10  001  0001  0101  010
+    TRCSSCCR6         = 0x88b2, // 10  001  0001  0110  010
+    TRCSSCCR7         = 0x88ba, // 10  001  0001  0111  010
+    TRCSSCSR0         = 0x88c2, // 10  001  0001  1000  010
+    TRCSSCSR1         = 0x88ca, // 10  001  0001  1001  010
+    TRCSSCSR2         = 0x88d2, // 10  001  0001  1010  010
+    TRCSSCSR3         = 0x88da, // 10  001  0001  1011  010
+    TRCSSCSR4         = 0x88e2, // 10  001  0001  1100  010
+    TRCSSCSR5         = 0x88ea, // 10  001  0001  1101  010
+    TRCSSCSR6         = 0x88f2, // 10  001  0001  1110  010
+    TRCSSCSR7         = 0x88fa, // 10  001  0001  1111  010
+    TRCSSPCICR0       = 0x8883, // 10  001  0001  0000  011
+    TRCSSPCICR1       = 0x888b, // 10  001  0001  0001  011
+    TRCSSPCICR2       = 0x8893, // 10  001  0001  0010  011
+    TRCSSPCICR3       = 0x889b, // 10  001  0001  0011  011
+    TRCSSPCICR4       = 0x88a3, // 10  001  0001  0100  011
+    TRCSSPCICR5       = 0x88ab, // 10  001  0001  0101  011
+    TRCSSPCICR6       = 0x88b3, // 10  001  0001  0110  011
+    TRCSSPCICR7       = 0x88bb, // 10  001  0001  0111  011
+    TRCPDCR           = 0x88a4, // 10  001  0001  0100  100
+    TRCACVR0          = 0x8900, // 10  001  0010  0000  000
+    TRCACVR1          = 0x8910, // 10  001  0010  0010  000
+    TRCACVR2          = 0x8920, // 10  001  0010  0100  000
+    TRCACVR3          = 0x8930, // 10  001  0010  0110  000
+    TRCACVR4          = 0x8940, // 10  001  0010  1000  000
+    TRCACVR5          = 0x8950, // 10  001  0010  1010  000
+    TRCACVR6          = 0x8960, // 10  001  0010  1100  000
+    TRCACVR7          = 0x8970, // 10  001  0010  1110  000
+    TRCACVR8          = 0x8901, // 10  001  0010  0000  001
+    TRCACVR9          = 0x8911, // 10  001  0010  0010  001
+    TRCACVR10         = 0x8921, // 10  001  0010  0100  001
+    TRCACVR11         = 0x8931, // 10  001  0010  0110  001
+    TRCACVR12         = 0x8941, // 10  001  0010  1000  001
+    TRCACVR13         = 0x8951, // 10  001  0010  1010  001
+    TRCACVR14         = 0x8961, // 10  001  0010  1100  001
+    TRCACVR15         = 0x8971, // 10  001  0010  1110  001
+    TRCACATR0         = 0x8902, // 10  001  0010  0000  010
+    TRCACATR1         = 0x8912, // 10  001  0010  0010  010
+    TRCACATR2         = 0x8922, // 10  001  0010  0100  010
+    TRCACATR3         = 0x8932, // 10  001  0010  0110  010
+    TRCACATR4         = 0x8942, // 10  001  0010  1000  010
+    TRCACATR5         = 0x8952, // 10  001  0010  1010  010
+    TRCACATR6         = 0x8962, // 10  001  0010  1100  010
+    TRCACATR7         = 0x8972, // 10  001  0010  1110  010
+    TRCACATR8         = 0x8903, // 10  001  0010  0000  011
+    TRCACATR9         = 0x8913, // 10  001  0010  0010  011
+    TRCACATR10        = 0x8923, // 10  001  0010  0100  011
+    TRCACATR11        = 0x8933, // 10  001  0010  0110  011
+    TRCACATR12        = 0x8943, // 10  001  0010  1000  011
+    TRCACATR13        = 0x8953, // 10  001  0010  1010  011
+    TRCACATR14        = 0x8963, // 10  001  0010  1100  011
+    TRCACATR15        = 0x8973, // 10  001  0010  1110  011
+    TRCDVCVR0         = 0x8904, // 10  001  0010  0000  100
+    TRCDVCVR1         = 0x8924, // 10  001  0010  0100  100
+    TRCDVCVR2         = 0x8944, // 10  001  0010  1000  100
+    TRCDVCVR3         = 0x8964, // 10  001  0010  1100  100
+    TRCDVCVR4         = 0x8905, // 10  001  0010  0000  101
+    TRCDVCVR5         = 0x8925, // 10  001  0010  0100  101
+    TRCDVCVR6         = 0x8945, // 10  001  0010  1000  101
+    TRCDVCVR7         = 0x8965, // 10  001  0010  1100  101
+    TRCDVCMR0         = 0x8906, // 10  001  0010  0000  110
+    TRCDVCMR1         = 0x8926, // 10  001  0010  0100  110
+    TRCDVCMR2         = 0x8946, // 10  001  0010  1000  110
+    TRCDVCMR3         = 0x8966, // 10  001  0010  1100  110
+    TRCDVCMR4         = 0x8907, // 10  001  0010  0000  111
+    TRCDVCMR5         = 0x8927, // 10  001  0010  0100  111
+    TRCDVCMR6         = 0x8947, // 10  001  0010  1000  111
+    TRCDVCMR7         = 0x8967, // 10  001  0010  1100  111
+    TRCCIDCVR0        = 0x8980, // 10  001  0011  0000  000
+    TRCCIDCVR1        = 0x8990, // 10  001  0011  0010  000
+    TRCCIDCVR2        = 0x89a0, // 10  001  0011  0100  000
+    TRCCIDCVR3        = 0x89b0, // 10  001  0011  0110  000
+    TRCCIDCVR4        = 0x89c0, // 10  001  0011  1000  000
+    TRCCIDCVR5        = 0x89d0, // 10  001  0011  1010  000
+    TRCCIDCVR6        = 0x89e0, // 10  001  0011  1100  000
+    TRCCIDCVR7        = 0x89f0, // 10  001  0011  1110  000
+    TRCVMIDCVR0       = 0x8981, // 10  001  0011  0000  001
+    TRCVMIDCVR1       = 0x8991, // 10  001  0011  0010  001
+    TRCVMIDCVR2       = 0x89a1, // 10  001  0011  0100  001
+    TRCVMIDCVR3       = 0x89b1, // 10  001  0011  0110  001
+    TRCVMIDCVR4       = 0x89c1, // 10  001  0011  1000  001
+    TRCVMIDCVR5       = 0x89d1, // 10  001  0011  1010  001
+    TRCVMIDCVR6       = 0x89e1, // 10  001  0011  1100  001
+    TRCVMIDCVR7       = 0x89f1, // 10  001  0011  1110  001
+    TRCCIDCCTLR0      = 0x8982, // 10  001  0011  0000  010
+    TRCCIDCCTLR1      = 0x898a, // 10  001  0011  0001  010
+    TRCVMIDCCTLR0     = 0x8992, // 10  001  0011  0010  010
+    TRCVMIDCCTLR1     = 0x899a, // 10  001  0011  0011  010
+    TRCITCTRL         = 0x8b84, // 10  001  0111  0000  100
+    TRCCLAIMSET       = 0x8bc6, // 10  001  0111  1000  110
+    TRCCLAIMCLR       = 0x8bce, // 10  001  0111  1001  110
+
+    // GICv3 registers
+    ICC_BPR1_EL1      = 0xc663, // 11  000  1100  1100  011
+    ICC_BPR0_EL1      = 0xc643, // 11  000  1100  1000  011
+    ICC_PMR_EL1       = 0xc230, // 11  000  0100  0110  000
+    ICC_CTLR_EL1      = 0xc664, // 11  000  1100  1100  100
+    ICC_CTLR_EL3      = 0xf664, // 11  110  1100  1100  100
+    ICC_SRE_EL1       = 0xc665, // 11  000  1100  1100  101
+    ICC_SRE_EL2       = 0xe64d, // 11  100  1100  1001  101
+    ICC_SRE_EL3       = 0xf665, // 11  110  1100  1100  101
+    ICC_IGRPEN0_EL1   = 0xc666, // 11  000  1100  1100  110
+    ICC_IGRPEN1_EL1   = 0xc667, // 11  000  1100  1100  111
+    ICC_IGRPEN1_EL3   = 0xf667, // 11  110  1100  1100  111
+    ICC_SEIEN_EL1     = 0xc668, // 11  000  1100  1101  000
+    ICC_AP0R0_EL1     = 0xc644, // 11  000  1100  1000  100
+    ICC_AP0R1_EL1     = 0xc645, // 11  000  1100  1000  101
+    ICC_AP0R2_EL1     = 0xc646, // 11  000  1100  1000  110
+    ICC_AP0R3_EL1     = 0xc647, // 11  000  1100  1000  111
+    ICC_AP1R0_EL1     = 0xc648, // 11  000  1100  1001  000
+    ICC_AP1R1_EL1     = 0xc649, // 11  000  1100  1001  001
+    ICC_AP1R2_EL1     = 0xc64a, // 11  000  1100  1001  010
+    ICC_AP1R3_EL1     = 0xc64b, // 11  000  1100  1001  011
+    ICH_AP0R0_EL2     = 0xe640, // 11  100  1100  1000  000
+    ICH_AP0R1_EL2     = 0xe641, // 11  100  1100  1000  001
+    ICH_AP0R2_EL2     = 0xe642, // 11  100  1100  1000  010
+    ICH_AP0R3_EL2     = 0xe643, // 11  100  1100  1000  011
+    ICH_AP1R0_EL2     = 0xe648, // 11  100  1100  1001  000
+    ICH_AP1R1_EL2     = 0xe649, // 11  100  1100  1001  001
+    ICH_AP1R2_EL2     = 0xe64a, // 11  100  1100  1001  010
+    ICH_AP1R3_EL2     = 0xe64b, // 11  100  1100  1001  011
+    ICH_HCR_EL2       = 0xe658, // 11  100  1100  1011  000
+    ICH_MISR_EL2      = 0xe65a, // 11  100  1100  1011  010
+    ICH_VMCR_EL2      = 0xe65f, // 11  100  1100  1011  111
+    ICH_VSEIR_EL2     = 0xe64c, // 11  100  1100  1001  100
+    ICH_LR0_EL2       = 0xe660, // 11  100  1100  1100  000
+    ICH_LR1_EL2       = 0xe661, // 11  100  1100  1100  001
+    ICH_LR2_EL2       = 0xe662, // 11  100  1100  1100  010
+    ICH_LR3_EL2       = 0xe663, // 11  100  1100  1100  011
+    ICH_LR4_EL2       = 0xe664, // 11  100  1100  1100  100
+    ICH_LR5_EL2       = 0xe665, // 11  100  1100  1100  101
+    ICH_LR6_EL2       = 0xe666, // 11  100  1100  1100  110
+    ICH_LR7_EL2       = 0xe667, // 11  100  1100  1100  111
+    ICH_LR8_EL2       = 0xe668, // 11  100  1100  1101  000
+    ICH_LR9_EL2       = 0xe669, // 11  100  1100  1101  001
+    ICH_LR10_EL2      = 0xe66a, // 11  100  1100  1101  010
+    ICH_LR11_EL2      = 0xe66b, // 11  100  1100  1101  011
+    ICH_LR12_EL2      = 0xe66c, // 11  100  1100  1101  100
+    ICH_LR13_EL2      = 0xe66d, // 11  100  1100  1101  101
+    ICH_LR14_EL2      = 0xe66e, // 11  100  1100  1101  110
+    ICH_LR15_EL2      = 0xe66f  // 11  100  1100  1101  111
   };
 
   // Note that these do not inherit from NamedImmMapper. This class is
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 46915eecf6fb..2d7470919dc4 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -59,6 +59,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
                                          "FP compare + branch is slow">;
 def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
                           "Floating point unit supports single precision only">;
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+                          "Enable support for TrustZone security extensions">;
 
 // Some processors have FP multiply-accumulate instructions that don't
 // play nicely with other VFP / NEON instructions, and it's generally better
@@ -143,32 +145,34 @@ include "ARMSchedule.td"
 // ARM processor families.
 def ProcA5      : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
                                    "Cortex-A5 ARM processors",
-                                   [FeatureSlowFPBrcc, FeatureNEONForFP,
-                                    FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
-                                    FeatureT2XtPk]>;
+                                   [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+                                    FeatureVMLxForwarding, FeatureT2XtPk,
+                                    FeatureTrustZone]>;
 def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
                                    "Cortex-A8 ARM processors",
-                                   [FeatureSlowFPBrcc, FeatureNEONForFP,
-                                    FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
-                                    FeatureT2XtPk]>;
+                                   [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+                                    FeatureVMLxForwarding, FeatureT2XtPk,
+                                    FeatureTrustZone]>;
 def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
                                    "Cortex-A9 ARM processors",
                                    [FeatureVMLxForwarding,
                                     FeatureT2XtPk, FeatureFP16,
-                                    FeatureAvoidPartialCPSR]>;
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureTrustZone]>;
 def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
                                    "Swift ARM processors",
                                    [FeatureNEONForFP, FeatureT2XtPk,
                                     FeatureVFP4, FeatureMP, FeatureHWDiv,
                                     FeatureHWDivARM, FeatureAvoidPartialCPSR,
                                     FeatureAvoidMOVsShOp,
-                                    FeatureHasSlowFPVMLx]>;
+                                    FeatureHasSlowFPVMLx, FeatureTrustZone]>;
 
 // FIXME: It has not been determined if A15 has these features.
 def ProcA15      : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
                                    "Cortex-A15 ARM processors",
                                    [FeatureT2XtPk, FeatureFP16,
-                                    FeatureAvoidPartialCPSR]>;
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureTrustZone]>;
 def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
                                    "Cortex-R5 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureHWDivARM,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index ed8b9cd9a1c0..9e68ff44890e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -3734,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
   if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
     return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
 
-  // A9-like cores are particularly picky about mixing the two and want these
+  // CortexA9 is particularly picky about mixing the two and wants these
   // converted.
-  if (Subtarget.isLikeA9() && !isPredicated(MI) &&
+  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
       (MI->getOpcode() == ARM::VMOVRS ||
        MI->getOpcode() == ARM::VMOVSR ||
        MI->getOpcode() == ARM::VMOVS))
@@ -4023,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
 //
 // FCONSTD can be used as a dependency-breaking instruction.
-
-
 unsigned ARMBaseInstrInfo::
 getPartialRegUpdateClearance(const MachineInstr *MI,
                              unsigned OpNum,
                              const TargetRegisterInfo *TRI) const {
-  // Only Swift has partial register update problems.
-  if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+  if (!SwiftPartialUpdateClearance ||
+      !(Subtarget.isSwift() || Subtarget.isCortexA15()))
     return 0;
 
   assert(TRI && "Need TRI instance");
@@ -4056,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
 
     // Explicitly reads the dependency.
   case ARM::VLD1LNd32:
-    UseOp = 1;
+    UseOp = 3;
     break;
   default:
     return 0;
@@ -4125,3 +4123,15 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
 bool ARMBaseInstrInfo::hasNOP() const {
   return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
 }
+
+bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+  unsigned ShOpVal = MI->getOperand(3).getImm();
+  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
+  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
+  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
+      ((ShImm == 1 || ShImm == 2) &&
+       ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
+    return true;
+
+  return false;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2698132d2d57..7c107bb41951 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -314,6 +314,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
   bool canCauseFpMLxStall(unsigned Opcode) const {
     return MLxHazardOpcodes.count(Opcode);
   }
+
+  /// Returns true if the instruction has a shift by immediate that can be
+  /// executed in one cycle less.
+  bool isSwiftFastImmShift(const MachineInstr *MI) const;
 };
 
 static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index abdd25174339..b6b27f849a23 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -680,7 +680,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // means the stack pointer cannot be used to access the emergency spill slot
   // when !hasReservedCallFrame().
 #ifndef NDEBUG
-  if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+  if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
     assert(TFI->hasReservedCallFrame(MF) &&
            "Cannot use SP to access the emergency spill slot in "
            "functions without a reserved call frame");
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 29fcd4009af3..c5b9e50e0c05 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -144,8 +144,8 @@ class ARMFastISel : public FastISel {
     virtual bool TargetSelectInstruction(const Instruction *I);
     virtual unsigned TargetMaterializeConstant(const Constant *C);
     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
-    virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                               const LoadInst *LI);
+    virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                     const LoadInst *LI);
     virtual bool FastLowerArguments();
   private:
   #include "ARMGenFastISel.inc"
@@ -2797,12 +2797,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
   return false;
 }
 
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// \brief The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
 /// try to fold the load as an operand to the instruction, returning true if
 /// successful.
-bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                                const LoadInst *LI) {
+bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                      const LoadInst *LI) {
   // Verify we have a legal type before going any further.
   MVT VT;
   if (!isLoadTypeLegal(LI->getType(), VT))
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 3b124087683e..982d043c188e 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -14,7 +14,9 @@
 #include "ARMFrameLowering.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
+#include "ARMInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -1368,7 +1370,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
         // closest to SP or frame pointer.
         const TargetRegisterClass *RC = &ARM::GPRRegClass;
-        RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+        RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                            RC->getAlignment(),
                                                            false));
       }
@@ -1428,3 +1430,221 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
+// Get minimum constant for ARM instruction set that is greator than 
+// or equal to the argument.
+// In ARM instruction, constant can have any value that can be
+// produced by rotating an 8-bit value right by and even number
+// of bits within a 32-bit word.
+static uint32_t AlignToARMConstant(uint32_t Value) {
+  unsigned Shifted = 0;
+
+  if (Value == 0)
+      return 0;
+
+  while (!(Value & 0xC0000000)) {
+      Value = Value << 2;
+      Shifted += 2;
+  }
+
+  bool Carry = (Value & 0x00FFFFFF);
+  Value = ((Value & 0xFF000000) >> 24) + Carry;
+
+  if (Value & 0x0000100)
+      Value = Value & 0x000001FC;
+
+  if (Shifted > 24)
+      Value = Value >> (Shifted - 24);
+  else 
+      Value = Value << (24 - Shifted);
+
+  return Value;
+}
+
+// The stack limit in the TCB is set to this manyu bytes above the actual 
+// stack limit.
+static const uint64_t kSplitStackAvailable = 256;
+
+// Adjust function prologue to enable split stack.
+// Only support android.
+void 
+ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
+  const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
+
+  // Doesn't support vararg function.
+  if (MF.getFunction()->isVarArg())
+    report_fatal_error("Segmented stacks do not support vararg functions.");
+  // Doesn't support other than android.
+  if (!ST->isTargetAndroid())
+    report_fatal_error("Segmented statks not supported on this platfrom.");
+  
+  MachineBasicBlock &prologueMBB = MF.front();
+  MachineFrameInfo* MFI = MF.getFrameInfo();
+  const ARMBaseInstrInfo &TII = *TM.getInstrInfo();
+  ARMFunctionInfo* ARMFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL;
+
+  // Use R4 and R5 as scratch register.
+  // We should save R4 and R5 before use it and restore before
+  // leave the function.
+  unsigned ScratchReg0 = ARM::R4;
+  unsigned ScratchReg1 = ARM::R5;
+  // Use the last tls slot.
+  unsigned TlsOffset = 63;
+  uint64_t AlignedStackSize;
+
+  MachineBasicBlock* prevStackMBB = MF.CreateMachineBasicBlock();
+  MachineBasicBlock* postStackMBB = MF.CreateMachineBasicBlock();
+  MachineBasicBlock* allocMBB = MF.CreateMachineBasicBlock();
+  MachineBasicBlock* checkMBB = MF.CreateMachineBasicBlock();
+
+  for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
+         e = prologueMBB.livein_end(); i != e; ++i) {
+    allocMBB->addLiveIn(*i);
+    checkMBB->addLiveIn(*i);
+    prevStackMBB->addLiveIn(*i);
+    postStackMBB->addLiveIn(*i);
+  }
+
+  MF.push_front(postStackMBB);
+  MF.push_front(allocMBB);
+  MF.push_front(checkMBB);
+  MF.push_front(prevStackMBB);
+
+  // The required stack size that is aligend to ARM constant critarion.
+  uint64_t StackSize = MFI->getStackSize();
+
+  // If the front-end requested a fixed stack segment size, use that.
+  const Function *Fn = MF.getFunction();
+  if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::FixedStackSegment)) {
+    StackSize = MF.getTarget().Options.FixedStackSegmentSize;
+  }
+
+  AlignedStackSize = AlignToARMConstant(StackSize);
+
+  // When the frame size is less than 256 we just compare the stack
+  // boundary directly to the value of the stack pointer, per gcc.
+  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
+
+  // We will use two of callee save registers as scratch register so we
+  // need to save those registers into stack frame before use it.
+  // We will use SR0 to hold stack limit and SR1 to stack size requested.
+  // and arguments for __morestack().
+  // SR0: Scratch Register #0
+  // SR1: Scratch Register #1
+  // push {SR0, SR1}
+  AddDefaultPred(BuildMI(prevStackMBB, DL, TII.get(ARM::STMDB_UPD))
+                 .addReg(ARM::SP, RegState::Define)
+                 .addReg(ARM::SP))
+    .addReg(ScratchReg0)
+    .addReg(ScratchReg1);
+
+  if (CompareStackPointer) {
+    // mov SR1, sp
+    AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
+                   .addReg(ARM::SP)).addReg(0);
+  } else {
+    // sub SR1, sp, #StackSize
+    AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
+                   .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
+  }
+ 
+  // Get TLS base address.
+  // mrc p15, #0, SR0, c13, c0, #3
+  AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::MRC), ScratchReg0)
+                 .addImm(15)
+                 .addImm(0)
+                 .addImm(13)
+                 .addImm(0)
+                 .addImm(3));
+
+  // The last slot, assume that the last tls slot holds the stack limit
+  // add SR0, SR0, #252
+  AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::ADDri), ScratchReg0)
+                 .addReg(ScratchReg0).addImm(4*TlsOffset)).addReg(0);
+
+  // Get stack limit.
+  // ldr SR0, [sr0]
+  AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
+                 .addReg(ScratchReg0).addImm(0));
+
+  // Compare stack limit with stack size requested.
+  // cmp SR0, SR1
+  AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::CMPrr))
+                 .addReg(ScratchReg0)
+                 .addReg(ScratchReg1));
+
+  // This jump is taken if StackLimit < SP - stack required.
+  BuildMI(checkMBB, DL, TII.get(ARM::Bcc)).addMBB(postStackMBB)
+    .addImm(ARMCC::LO)
+    .addReg(ARM::CPSR);
+
+
+  // Calling __morestack(StackSize, Size of stack arguments).
+  // __morestack knows that the stack size requested is in SR0(r4)
+  // and amount size of stack arguments is in SR1(r5).
+
+  // Pass first argument for the __morestack by Scratch Register #0.
+  //   The amount size of stack required
+  AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
+                 .addImm(AlignedStackSize)).addReg(0);
+  // Pass second argument for the __morestack by Scratch Register #1.
+  //   The amount size of stack consumed to save function arguments.
+  AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
+                 .addImm(AlignToARMConstant(ARMFI->getArgumentStackSize())))
+                 .addReg(0);
+
+  // push {lr} - Save return address of this function.
+  AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::STMDB_UPD))
+                 .addReg(ARM::SP, RegState::Define)
+                 .addReg(ARM::SP))
+    .addReg(ARM::LR);
+
+  // Call __morestack().
+  BuildMI(allocMBB, DL, TII.get(ARM::BL))
+    .addExternalSymbol("__morestack");
+  
+  // Restore return address of this original function.
+  // pop {lr}
+  AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::LDMIA_UPD))
+                 .addReg(ARM::SP, RegState::Define)
+                 .addReg(ARM::SP))
+    .addReg(ARM::LR);
+
+
+  // Restore SR0 and SR1 in case of __morestack() was called.
+  // __morestack() will skip postStackMBB block so we need to restore
+  // scratch registers from here.
+  // pop {SR0, SR1}
+  AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::LDMIA_UPD))
+                 .addReg(ARM::SP, RegState::Define)
+                 .addReg(ARM::SP))
+    .addReg(ScratchReg0)
+    .addReg(ScratchReg1);
+
+  // Return from this function.
+  AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVr), ARM::PC)
+                 .addReg(ARM::LR)).addReg(0);
+
+  // Restore SR0 and SR1 in case of __morestack() was not called.
+  // pop {SR0, SR1}
+  AddDefaultPred(BuildMI(postStackMBB, DL, TII.get(ARM::LDMIA_UPD))
+                 .addReg(ARM::SP, RegState::Define)
+                 .addReg(ARM::SP))
+    .addReg(ScratchReg0)
+    .addReg(ScratchReg1);
+
+  // Organizing MBB lists
+  postStackMBB->addSuccessor(&prologueMBB);
+
+  allocMBB->addSuccessor(postStackMBB);
+  
+  checkMBB->addSuccessor(postStackMBB);
+  checkMBB->addSuccessor(allocMBB);
+  
+  prevStackMBB->addSuccessor(checkMBB);
+
+#ifdef XDEBUG
+  MF.verify();
+#endif
+}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index efa255a5574a..16b477ae3cd2 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -23,12 +23,14 @@ namespace llvm {
 
 class ARMFrameLowering : public TargetFrameLowering {
 protected:
+  const ARMBaseTargetMachine &TM;
   const ARMSubtarget &STI;
 
 public:
-  explicit ARMFrameLowering(const ARMSubtarget &sti)
+  explicit ARMFrameLowering(const ARMBaseTargetMachine& tm,
+                            const ARMSubtarget &sti)
     : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
-      STI(sti) {
+      TM(tm), STI(sti) {
   }
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
@@ -59,6 +61,8 @@ class ARMFrameLowering : public TargetFrameLowering {
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS) const;
 
+  void adjustForSegmentedStacks(MachineFunction &MF) const;
+
  private:
   void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2c51de23f7dc..9e1782e1191c 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1469,14 +1469,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
                        CurDAG->getRegister(0, MVT::i32), Chain };
       return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
-                                    MVT::i32, MVT::Other, Ops, 5);
+                                    MVT::i32, MVT::Other, Ops);
     } else {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
                        CurDAG->getRegister(0, MVT::i32), Chain };
       return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
-                                    MVT::i32, MVT::Other, Ops, 6);
+                                    MVT::i32, MVT::Other, Ops);
     }
   }
 
@@ -1525,7 +1525,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
     SDValue Ops[]= { Base, Offset, getAL(CurDAG),
                      CurDAG->getRegister(0, MVT::i32), Chain };
     return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
-                                  MVT::Other, Ops, 5);
+                                  MVT::Other, Ops);
   }
 
   return NULL;
@@ -1539,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form a D register from a pair of S registers.
@@ -1550,7 +1550,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form a quad register from a pair of D registers.
@@ -1560,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive D registers from a pair of Q registers.
@@ -1570,7 +1570,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive S registers.
@@ -1585,7 +1585,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive D registers.
@@ -1599,7 +1599,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive Q registers.
@@ -1613,7 +1613,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
@@ -1761,7 +1761,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
   } else {
     // Otherwise, quad registers are loaded with two separate instructions,
@@ -1774,7 +1774,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
-                                          ResTy, AddrTy, MVT::Other, OpsA, 7);
+                                          ResTy, AddrTy, MVT::Other, OpsA);
     Chain = SDValue(VLdA, 2);
 
     // Load the odd subregs.
@@ -1791,8 +1791,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
-                                 Ops.data(), Ops.size());
+    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
   }
 
   // Transfer memoperands.
@@ -1913,8 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    SDNode *VSt =
-      CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
     // Transfer memoperands.
     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
@@ -1939,7 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
                                         MemAddr.getValueType(),
-                                        MVT::Other, OpsA, 7);
+                                        MVT::Other, OpsA);
   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
   Chain = SDValue(VStA, 1);
 
@@ -1958,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   Ops.push_back(Reg0);
   Ops.push_back(Chain);
   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
-                                        Ops.data(), Ops.size());
+                                        Ops);
   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
   return VStB;
 }
@@ -2063,8 +2061,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
 
   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
                                   QOpcodes[OpcodeIndex]);
-  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
-                                         Ops.data(), Ops.size());
+  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
   if (!IsLoad)
     return VLdLn;
@@ -2150,8 +2147,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
   if (isUpdating)
     ResTys.push_back(MVT::i32);
   ResTys.push_back(MVT::Other);
-  SDNode *VLdDup =
-    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
   SuperReg = SDValue(VLdDup, 0);
 
@@ -2197,7 +2193,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
   Ops.push_back(getAL(CurDAG)); // predicate
   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
-  return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
 }
 
 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
@@ -2542,7 +2538,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
                                            MVT::i32, MVT::i32, MVT::Other,
-                                           Ops.data() ,Ops.size());
+                                           Ops);
   cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
   return ResNode;
 }
@@ -2599,7 +2595,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
-                                         Ops, 4);
+                                         Ops);
       } else {
         SDValue Ops[] = {
           CPIdx,
@@ -2609,7 +2605,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
           CurDAG->getEntryNode()
         };
         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
-                                       Ops, 5);
+                                       Ops);
       }
       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
       return NULL;
@@ -2719,7 +2715,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                                                   MVT::i32);
         SDValue Ops[] = { N0.getOperand(0), Imm16,
                           getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
-        return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+        return CurDAG->getMachineNode(Opc, dl, VT, Ops);
       }
     }
     break;
@@ -2733,16 +2729,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       break;
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
-                        CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
     } else {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                     ARM::UMULL : ARM::UMULLv5,
-                                    dl, MVT::i32, MVT::i32, Ops, 5);
+                                    dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ISD::SMUL_LOHI: {
@@ -2751,14 +2746,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
     } else {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                     ARM::SMULL : ARM::SMULLv5,
-                                    dl, MVT::i32, MVT::i32, Ops, 5);
+                                    dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ARMISD::UMLAL:{
@@ -2766,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
                         CurDAG->getRegister(0, MVT::i32)};
-      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
     }else{
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
@@ -2774,7 +2769,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                       ARM::UMLAL : ARM::UMLALv5,
-                                      dl, MVT::i32, MVT::i32, Ops, 7);
+                                      dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ARMISD::SMLAL:{
@@ -2782,7 +2777,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
                         CurDAG->getRegister(0, MVT::i32)};
-      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
     }else{
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
@@ -2790,7 +2785,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                       ARM::SMLAL : ARM::SMLALv5,
-                                      dl, MVT::i32, MVT::i32, Ops, 7);
+                                      dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ISD::LOAD: {
@@ -2833,7 +2828,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                                MVT::i32);
     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
-                                             MVT::Glue, Ops, 5);
+                                             MVT::Glue, Ops);
     Chain = SDValue(ResNode, 0);
     if (N->getNumValues() == 2) {
       InFlag = SDValue(ResNode, 1);
@@ -2863,7 +2858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::VUZP: {
     unsigned Opc = 0;
@@ -2883,7 +2878,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::VTRN: {
     unsigned Opc = 0;
@@ -2902,7 +2897,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::BUILD_VECTOR: {
     EVT VecVT = N->getValueType(0);
@@ -3147,8 +3142,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       Ops.push_back(getAL(CurDAG));
       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
       Ops.push_back(Chain);
-      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
-                                          Ops.size());
+      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
       // Transfer memoperands.
       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3211,8 +3205,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
       unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
 
-      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
-                                          Ops.size());
+      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
       // Transfer memoperands.
       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3398,7 +3391,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     Ops.push_back(N->getOperand(1));
     Ops.push_back(getAL(CurDAG));                    // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
-    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
   }
   case ARMISD::VTBL2: {
     DebugLoc dl = N->getDebugLoc();
@@ -3414,8 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     Ops.push_back(N->getOperand(2));
     Ops.push_back(getAL(CurDAG));                    // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
-    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
-                                  Ops.data(), Ops.size());
+    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
   }
 
   case ISD::CONCAT_VECTORS:
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 514971f01ee8..d4d67e99c5f0 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
 
+    // Custom expand long extensions to vectors.
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32,  Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32,  Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64,  Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64,  Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64,  Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64,  Custom);
+
     // NEON does not have single instruction CTPOP for vectors with element
     // types wider than 8-bits.  However, custom lowering can leverage the
     // v8i8/v16i8 vcnt instruction.
@@ -870,8 +880,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   // are at least 4 bytes aligned.
   setMinStackArgumentAlignment(4);
 
-  BenefitFromCodePlacementOpt = true;
-
   // Prefer likely predicted branches to selects on out-of-order cores.
   PredictableSelectIsExpensive = Subtarget->isLikeA9();
 
@@ -1531,7 +1539,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                RegsToPass[i].second, InFlag);
       InFlag = Chain.getValue(1);
     }
-    InFlag =SDValue();
+    InFlag = SDValue();
   }
 
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -2819,6 +2827,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
     VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
                          CCInfo.getNextStackOffset());
 
+  AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
+
   return Chain;
 }
 
@@ -3433,6 +3443,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   return FrameAddr;
 }
 
+/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
+/// and size(DestVec) > 128-bits.
+/// This is achieved by doing the one extension from the SrcVec, splitting the
+/// result, extending these parts, and then concatenating these into the
+/// destination.
+static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
+  SDValue Op = N->getOperand(0);
+  EVT SrcVT = Op.getValueType();
+  EVT DestVT = N->getValueType(0);
+
+  assert(DestVT.getSizeInBits() > 128 &&
+         "Custom sext/zext expansion needs >128-bit vector.");
+  // If this is a normal length extension, use the default expansion.
+  if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
+      SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
+    return SDValue();
+
+  DebugLoc dl = N->getDebugLoc();
+  unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+  unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
+  unsigned NumElts = SrcVT.getVectorNumElements();
+  LLVMContext &Ctx = *DAG.getContext();
+  SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
+
+  EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+                               NumElts);
+  EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+                                 NumElts/2);
+  EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
+                               NumElts/2);
+
+  Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
+  SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+                        DAG.getIntPtrConstant(0));
+  SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+                        DAG.getIntPtrConstant(NumElts/2));
+  ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
+  ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
+}
+
 /// ExpandBITCAST - If the target supports VFP, this function is called to
 /// expand a bit convert where either the source or destination type is i64 to
 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
@@ -5621,6 +5672,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::BITCAST:
     Res = ExpandBITCAST(N, DAG);
     break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+    Res = ExpandVectorExtension(N, DAG);
+    break;
   case ISD::SRL:
   case ISD::SRA:
     Res = Expand64BitShift(N, DAG, Subtarget);
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 9409f359743c..8aa608bc97af 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -221,6 +221,9 @@ def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
 def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
                                  AssemblerPredicate<"FeatureMP",
                                                     "mp-extensions">;
+def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
+                                 AssemblerPredicate<"FeatureTrustZone",
+                                                    "TrustZone">;
 def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
 def IsThumb          : Predicate<"Subtarget->isThumb()">,
@@ -578,6 +581,13 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
 def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
 def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
 
+/// imm0_4 predicate - Immediate in the range [0,4].
+def Imm0_4AsmOperand : ImmAsmOperand { let Name = "Imm0_4"; }
+def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
+  let ParserMatchClass = Imm0_4AsmOperand;
+  let DecoderMethod = "DecodeImm0_4";
+}
+
 /// imm0_7 predicate - Immediate in the range [0,7].
 def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
 def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -741,18 +751,26 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
 // addrmode_imm12 := reg +/- imm12
 //
 def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-def addrmode_imm12 : Operand<i32>,
+class AddrMode_Imm12 : Operand<i32>,
                      ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
   // 12-bit immediate operand. Note that instructions using this encode
   // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
   // immediate values are as normal.
 
   let EncoderMethod = "getAddrModeImm12OpValue";
-  let PrintMethod = "printAddrModeImm12Operand";
   let DecoderMethod = "DecodeAddrModeImm12Operand";
   let ParserMatchClass = MemImm12OffsetAsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
+
+def addrmode_imm12 : AddrMode_Imm12 {
+  let PrintMethod = "printAddrModeImm12Operand<false>";
+}
+
+def addrmode_imm12_pre : AddrMode_Imm12 {
+  let PrintMethod = "printAddrModeImm12Operand<true>";
+}
+
 // ldst_so_reg := reg +/- reg shop imm
 //
 def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
@@ -852,14 +870,23 @@ def am2offset_imm : Operand<i32>,
 //
 // FIXME: split into imm vs. reg versions.
 def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-def addrmode3 : Operand<i32>,
-                ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+class AddrMode3 : Operand<i32>,
+                  ComplexPattern<i32, 3, "SelectAddrMode3", []> {
   let EncoderMethod = "getAddrMode3OpValue";
-  let PrintMethod = "printAddrMode3Operand";
   let ParserMatchClass = AddrMode3AsmOperand;
   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
 }
 
+def addrmode3 : AddrMode3
+{
+  let PrintMethod = "printAddrMode3Operand<false>";
+}
+
+def addrmode3_pre : AddrMode3
+{
+  let PrintMethod = "printAddrMode3Operand<true>";
+}
+
 // FIXME: split into imm vs. reg versions.
 // FIXME: parser method to handle +/- register.
 def AM3OffsetAsmOperand : AsmOperandClass {
@@ -885,15 +912,22 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
 // addrmode5 := reg +/- imm8*4
 //
 def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-def addrmode5 : Operand<i32>,
-                ComplexPattern<i32, 2, "SelectAddrMode5", []> {
-  let PrintMethod = "printAddrMode5Operand";
+class AddrMode5 : Operand<i32>,
+                  ComplexPattern<i32, 2, "SelectAddrMode5", []> {
   let EncoderMethod = "getAddrMode5OpValue";
   let DecoderMethod = "DecodeAddrMode5Operand";
   let ParserMatchClass = AddrMode5AsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm);
 }
 
+def addrmode5 : AddrMode5 {
+   let PrintMethod = "printAddrMode5Operand<false>";
+}
+
+def addrmode5_pre : AddrMode5 {
+   let PrintMethod = "printAddrMode5Operand<true>";
+}
+
 // addrmode6 := reg with optional alignment
 //
 def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
@@ -1010,7 +1044,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
   let isReMaterializable = 1 in {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
                iii, opc, "\t$Rd, $Rn, $imm",
-               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+           Sched<[WriteALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> imm;
@@ -1022,7 +1057,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
   }
   def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
                iir, opc, "\t$Rd, $Rn, $Rm",
-               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+           Sched<[WriteALU, ReadALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<4> Rm;
@@ -1037,7 +1073,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
   def rsi : AsI1<opcod, (outs GPR:$Rd),
                (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
                iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> {
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+            Sched<[WriteALUsi, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1052,7 +1089,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
   def rsr : AsI1<opcod, (outs GPR:$Rd),
                (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
                iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> {
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>,
+            Sched<[WriteALUsr, ReadALUsr]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1079,7 +1117,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
   let isReMaterializable = 1 in {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
                iii, opc, "\t$Rd, $Rn, $imm",
-               [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> {
+               [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>,
+           Sched<[WriteALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> imm;
@@ -1091,7 +1130,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
   }
   def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
                iir, opc, "\t$Rd, $Rn, $Rm",
-               [/* pattern left blank */]> {
+               [/* pattern left blank */]>,
+           Sched<[WriteALU, ReadALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<4> Rm;
@@ -1105,7 +1145,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
   def rsi : AsI1<opcod, (outs GPR:$Rd),
                (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
                iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> {
+               [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>,
+            Sched<[WriteALUsi, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1120,7 +1161,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
   def rsr : AsI1<opcod, (outs GPR:$Rd),
                (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
                iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+               [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>,
+            Sched<[WriteALUsr, ReadALUsr]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1145,24 +1187,28 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
                           bit Commutable = 0> {
   def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
                          4, iii,
-                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>,
+                         Sched<[WriteALU, ReadALU]>;
 
   def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
                          4, iir,
-                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> {
+                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+                         Sched<[WriteALU, ReadALU, ReadALU]> {
     let isCommutable = Commutable;
   }
   def rsi : ARMPseudoInst<(outs GPR:$Rd),
                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
                           4, iis,
                           [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
-                                                so_reg_imm:$shift))]>;
+                                                so_reg_imm:$shift))]>,
+                          Sched<[WriteALUsi, ReadALU]>;
 
   def rsr : ARMPseudoInst<(outs GPR:$Rd),
                           (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
                           4, iis,
                           [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
-                                                so_reg_reg:$shift))]>;
+                                                so_reg_reg:$shift))]>,
+                          Sched<[WriteALUSsr, ReadALUsr]>;
 }
 }
 
@@ -1174,19 +1220,22 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
                           bit Commutable = 0> {
   def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
                          4, iii,
-                         [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
+                         [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>,
+           Sched<[WriteALU, ReadALU]>;
 
   def rsi : ARMPseudoInst<(outs GPR:$Rd),
                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
                           4, iis,
                           [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
-                                             GPR:$Rn))]>;
+                                             GPR:$Rn))]>,
+            Sched<[WriteALUsi, ReadALU]>;
 
   def rsr : ARMPseudoInst<(outs GPR:$Rd),
                           (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
                           4, iis,
                           [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
-                                             GPR:$Rn))]>;
+                                             GPR:$Rn))]>,
+            Sched<[WriteALUSsr, ReadALUsr]>;
 }
 }
 
@@ -1199,7 +1248,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
                        PatFrag opnode, bit Commutable = 0> {
   def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
                opc, "\t$Rn, $imm",
-               [(opnode GPR:$Rn, so_imm:$imm)]> {
+               [(opnode GPR:$Rn, so_imm:$imm)]>,
+           Sched<[WriteCMP, ReadALU]> {
     bits<4> Rn;
     bits<12> imm;
     let Inst{25} = 1;
@@ -1212,7 +1262,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   }
   def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
                opc, "\t$Rn, $Rm",
-               [(opnode GPR:$Rn, GPR:$Rm)]> {
+               [(opnode GPR:$Rn, GPR:$Rm)]>,
+           Sched<[WriteCMP, ReadALU, ReadALU]> {
     bits<4> Rn;
     bits<4> Rm;
     let isCommutable = Commutable;
@@ -1228,7 +1279,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   def rsi : AI1<opcod, (outs),
                (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
                opc, "\t$Rn, $shift",
-               [(opnode GPR:$Rn, so_reg_imm:$shift)]> {
+               [(opnode GPR:$Rn, so_reg_imm:$shift)]>,
+            Sched<[WriteCMPsi, ReadALU]> {
     bits<4> Rn;
     bits<12> shift;
     let Inst{25} = 0;
@@ -1244,7 +1296,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   def rsr : AI1<opcod, (outs),
                (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
                opc, "\t$Rn, $shift",
-               [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]> {
+               [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>,
+            Sched<[WriteCMPsr, ReadALU]> {
     bits<4> Rn;
     bits<12> shift;
     let Inst{25} = 0;
@@ -1326,7 +1379,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
                 DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
                [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+           Sched<[WriteALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> imm;
@@ -1338,7 +1392,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                 DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
                [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+           Sched<[WriteALU, ReadALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<4> Rm;
@@ -1353,7 +1408,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                 (ins GPR:$Rn, so_reg_imm:$shift),
                 DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
               [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+            Sched<[WriteALUsi, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1369,7 +1425,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                 DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
               [(set GPRnopc:$Rd, CPSR,
                     (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+            Sched<[WriteALUsr, ReadALUsr]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1392,7 +1449,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
                 DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
                [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+           Sched<[WriteALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> imm;
@@ -1403,7 +1461,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   }
   def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                 DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
-               [/* pattern left blank */]> {
+               [/* pattern left blank */]>,
+           Sched<[WriteALU, ReadALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<4> Rm;
@@ -1416,7 +1475,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
                 DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
               [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+            Sched<[WriteALUsi, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1430,7 +1490,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
                 DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
               [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+            Sched<[WriteALUsr, ReadALUsr]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1641,11 +1702,11 @@ def ATOMUMAX6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
                               NoItinerary, []>;
 }
 
-def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
               "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
-  bits<8> imm;
-  let Inst{27-8} = 0b00110010000011110000;
-  let Inst{7-0} = imm;
+  bits<3> imm;
+  let Inst{27-3} = 0b0011001000001111000000000;
+  let Inst{2-0} = imm;
 }
 
 def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -1842,7 +1903,8 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
 // the instruction. The {24-21} opcode bits are set by the fixup, as we don't
 // know until then which form of the instruction will be used.
 def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
-                 MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> {
+                 MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>,
+                 Sched<[WriteALU, ReadALU]> {
   bits<4> Rd;
   bits<14> label;
   let Inst{27-25} = 0b001;
@@ -2049,7 +2111,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
 
 // Secure Monitor Call is a system instruction.
 def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
-              []> {
+              []>, Requires<[IsARM, HasTrustZone]> {
   bits<4> opt;
   let Inst{23-4} = 0b01100000000000000111;
   let Inst{3-0} = opt;
@@ -2210,7 +2272,7 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
 multiclass AI2_ldridx<bit isByte, string opc,
                       InstrItinClass iii, InstrItinClass iir> {
   def _PRE_IMM  : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
-                      (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
+                      (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii,
                       opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 0;
@@ -2279,7 +2341,7 @@ defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
 
 multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
   def _PRE  : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
-                        (ins addrmode3:$addr), IndexModePre,
+                        (ins addrmode3_pre:$addr), IndexModePre,
                         LdMiscFrm, itin,
                         opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<14> addr;
@@ -2313,7 +2375,7 @@ defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
 defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
 let hasExtraDefRegAllocReq = 1 in {
 def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
-                          (ins addrmode3:$addr), IndexModePre,
+                          (ins addrmode3_pre:$addr), IndexModePre,
                           LdMiscFrm, IIC_iLoad_d_ru,
                           "ldrd", "\t$Rt, $Rt2, $addr!",
                           "$addr.base = $Rn_wb", []> {
@@ -2469,7 +2531,7 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
 multiclass AI2_stridx<bit isByte, string opc,
                       InstrItinClass iii, InstrItinClass iir> {
   def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
-                            (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+                            (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
                             StFrm, iii,
                             opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
@@ -2591,7 +2653,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
 
 
 def STRH_PRE  : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
-                           (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+                           (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
                            StMiscFrm, IIC_iStore_bh_ru,
                            "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
   bits<14> addr;
@@ -2623,7 +2685,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
 
 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
-                          (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+                          (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
                           IndexModePre, StMiscFrm, IIC_iStore_d_ru,
                           "strd", "\t$Rt, $Rt2, $addr!",
                           "$addr.base = $Rn_wb", []> {
@@ -3866,28 +3928,33 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
 
 def CLZ  : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "clz", "\t$Rd, $Rm",
-              [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+              [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
+           Sched<[WriteALU]>;
 
 def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "rbit", "\t$Rd, $Rm",
               [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
-           Requires<[IsARM, HasV6T2]>;
+           Requires<[IsARM, HasV6T2]>,
+           Sched<[WriteALU]>;
 
 def REV  : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "rev", "\t$Rd, $Rm",
-              [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+              [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>,
+           Sched<[WriteALU]>;
 
 let AddedComplexity = 5 in
 def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "rev16", "\t$Rd, $Rm",
                [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
-               Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]>,
+           Sched<[WriteALU]>;
 
 let AddedComplexity = 5 in
 def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "revsh", "\t$Rd, $Rm",
                [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
-               Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]>,
+           Sched<[WriteALU]>;
 
 def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
                    (and (srl GPR:$Rm, (i32 8)), 0xFF)),
@@ -3899,7 +3966,8 @@ def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
                [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
                                       (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
                                            0xFFFF0000)))]>,
-               Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]>,
+           Sched<[WriteALUsi, ReadALU]>;
 
 // Alternate cases for PKHBT where identities eliminate some nodes.
 def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
@@ -3915,7 +3983,8 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
                [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
                                       (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
                                            0xFFFF)))]>,
-               Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]>,
+           Sched<[WriteALUsi, ReadALU]>;
 
 // Alternate cases for PKHTB where identities eliminate some nodes.  Note that
 // a shift amount of 0 is *not legal* here, it is PKHBT instead.
@@ -4391,7 +4460,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
     let Inst{7-0} = addr{7-0};
     let DecoderMethod = "DecodeCopMemInstruction";
   }
-  def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+  def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
                  asm, "\t$cop, $CRd, $addr!", IndexModePre> {
     bits<13> addr;
     bits<4> cop;
@@ -4462,7 +4531,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
     let Inst{7-0} = addr{7-0};
     let DecoderMethod = "DecodeCopMemInstruction";
   }
-  def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+  def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
                     asm, "\t$cop, $CRd, $addr!", IndexModePre> {
     bits<13> addr;
     bits<4> cop;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 0411ac4e282a..44018fc48b6e 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4316,6 +4316,24 @@ def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
 defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                         IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
 
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+
 // Vector Bitwise Operations.
 
 def vnotd : PatFrag<(ops node:$in),
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c9d709eb5222..ad57de54c132 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -150,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
 def t2addrmode_imm12 : Operand<i32>,
                        ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
-  let PrintMethod = "printAddrModeImm12Operand";
+  let PrintMethod = "printAddrModeImm12Operand<false>";
   let EncoderMethod = "getAddrModeImm12OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm12";
   let ParserMatchClass = t2addrmode_imm12_asmoperand;
@@ -3449,7 +3449,8 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
 
 // Secure Monitor Call is a system instruction.
 // Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", 
+                []>, Requires<[IsThumb2, HasTrustZone]> {
   let Inst{31-27} = 0b11110;
   let Inst{26-20} = 0b1111111;
   let Inst{15-12} = 0b1000;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 98bd6c168eaf..c8ed5760f935 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -865,7 +865,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
   // Can't do the merge if the destination register is the same as the would-be
   // writeback register.
-  if (isLd && MI->getOperand(0).getReg() == Base)
+  if (MI->getOperand(0).getReg() == Base)
     return false;
 
   unsigned PredReg = 0;
@@ -1258,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       // merge the ldr's so far, including this one. But don't try to
       // combine the following ldr(s).
       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+
+      // Watch out for:
+      // r4 := ldr [r0, #8]
+      // r4 := ldr [r0, #4]
+      //
+      // The optimization may reorder the second ldr in front of the first
+      // ldr, which violates write after write(WAW) dependence. The same as
+      // str. Try to merge inst(s) already in MemOps.
+      bool Overlap = false;
+      for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
+        if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
+          Overlap = true;
+          break;
+        }
+      }
+
       if (CurrBase == 0 && !Clobber) {
         // Start of a new chain.
         CurrBase = Base;
@@ -1268,7 +1284,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
         ++NumMemOps;
         Advance = true;
-      } else {
+      } else if (!Overlap) {
         if (Clobber) {
           TryMerge = true;
           Advance = true;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 88d96c0be8a7..63bbb3213646 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -113,6 +113,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// relocation models.
   unsigned GlobalBaseReg;
 
+  /// ArgumentStackSize - amount of bytes on stack consumed by the arguments
+  /// being passed on the stack
+  unsigned ArgumentStackSize;
+
 public:
   ARMFunctionInfo() :
     isThumb(false),
@@ -175,6 +179,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
   void setDPRCalleeSavedAreaSize(unsigned s)  { DPRCSSize = s; }
 
+  unsigned getArgumentStackSize() const { return ArgumentStackSize; }
+  void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
+
   bool isGPRCalleeSavedArea1Frame(int fi) const {
     if (fi < 0 || fi >= (int)GPRCS1Frames.size())
       return false;
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 02196d06bfd3..2d088de96e27 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -6,6 +6,77 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction scheduling annotations for out-of-order CPUs.
+// These annotations are independent of the itinerary class defined below.
+// Here we define the subtarget independent read/write per-operand resources.
+// The subtarget schedule definitions will then map these to the subtarget's
+// resource usages.
+// For example:
+// The instruction cycle timings table might contain an entry for an operation
+// like the following:
+// Rd <- ADD Rn, Rm, <shift> Rs
+//  Uops | Latency from register | Uops - resource requirements - latency
+//  2    | Rn: 1 Rm: 4 Rs: 4     | uop T0, Rm, Rs - P01 - 3
+//       |                       | uopc Rd, Rn, T0 -  P01 - 1
+// This is telling us that the result will be available in destination register
+// Rd after a minimum of three cycles after the result in Rm and Rs is available
+// and one cycle after the result in Rn is available. The micro-ops can execute
+// on resource P01.
+// To model this, we need to express that we need to dispatch two micro-ops,
+// that the resource P01 is needed and that the latency to Rn is different than
+// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
+// two.
+// We will do this by assigning (abstract) resources to register defs/uses.
+// ARMSchedule.td:
+//   def WriteALUsr : SchedWrite;
+//   def ReadAdvanceALUsr : ScheRead;
+//
+// ARMInstrInfo.td:
+//   def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
+//                           ReadDefault]> { ...}
+// ReadAdvance read resources allow us to define "pipeline by-passes" or
+// shorter latencies to certain registers as needed in the example above.
+// The "ReadDefault" can be omitted.
+// Next, the subtarget td file assigns resources to the abstract resources
+// defined here.
+// ARMScheduleSubtarget.td:
+//  // Resources.
+//  def P01 : ProcResource<3>; // ALU unit (3 of it).
+//  ...
+//  // Resource usages.
+//  def : WriteRes<WriteALUsr, [P01, P01]> {
+//    Latency = 4; // Latency of 4.
+//    NumMicroOps = 2; // Dispatch 2 micro-ops.
+//    // The two instances of resource P01 are occupied for one cycle. It is one
+//    // cycle because these resources happen to be pipelined.
+//    ResourceCycles = [1, 1];
+//  }
+//  def : ReadAdvance<ReadAdvanceALUsr, 3>;
+
+// Basic ALU operation.
+def WriteALU : SchedWrite;
+def ReadALU : SchedRead;
+
+// Basic ALU with shifts.
+def WriteALUsi : SchedWrite; // Shift by immediate.
+def WriteALUsr : SchedWrite; // Shift by register.
+def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
+def ReadALUsr : SchedRead; // Some operands are read later.
+
+// Compares.
+def WriteCMP : SchedWrite;
+def WriteCMPsi : SchedWrite;
+def WriteCMPsr : SchedWrite;
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+  const ARMBaseInstrInfo *TII =
+    static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+  (void)TII;
+}]>;
+
+def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>;
 
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 4191931a5ad3..9739ed20ce2e 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1898,6 +1898,8 @@ def CortexA9Model : SchedMachineModel {
 //===----------------------------------------------------------------------===//
 // Define each kind of processor resource and number available.
 
+let SchedModel = CortexA9Model in {
+
 def A9UnitALU : ProcResource<2>;
 def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
 def A9UnitAGU : ProcResource<1>;
@@ -1918,11 +1920,11 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>;
 def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
 
 // Basic ALU.
-def A9WriteA : SchedWriteRes<[A9UnitALU]>;
+def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
 // ALU with operand shifted by immediate.
-def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
 // ALU with operand shifted by register.
-def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
 
 // Multiplication
 def A9WriteM   : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
@@ -2003,13 +2005,6 @@ foreach NumCycles = 2-8 in {
 def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
 } // foreach NumCycles
 
-// Define TII for use in SchedVariant Predicates.
-def : PredicateProlog<[{
-  const ARMBaseInstrInfo *TII =
-    static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
-  (void)TII;
-}]>;
-
 // Define address generation sequences and predicates for 8 flavors of LDMs.
 foreach NumAddr = 1-8 in {
 
@@ -2254,11 +2249,11 @@ def A9WriteLMfp : SchedWriteVariant<[
 // These mov immediate writers are unconditionally expanded with
 // additive latency.
 def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
-def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
 def A9WriteI2ld  : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
 
 // Some ALU operations can read loaded integer values one cycle early.
-def A9ReadA : SchedReadAdvance<1,
+def A9ReadALU : SchedReadAdvance<1,
   [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
    A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
    A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
@@ -2279,26 +2274,25 @@ def A9Read4 : SchedReadAdvance<3>;
 
 // This table follows the ARM Cortex-A9 Technical Reference Manuals,
 // mostly in order.
-let SchedModel = CortexA9Model in {
 
 def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
                          IIC_iMVNi,IIC_iMVNsi,
                          IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
-def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>;
 def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
 
 def :ItinRW<[A9WriteI2],   [IIC_iMOVix2,IIC_iCMOVix2]>;
 def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
 def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
 
-def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
-def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
-def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>;
-def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
-def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>;
-def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB
-def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
-def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>;
+def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
+def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
 
 // A9WriteHi ignored for MUL32.
 def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
@@ -2371,7 +2365,7 @@ def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
                                                       IIC_iStore_m,
                                                       IIC_iStore_mu]>;
 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
-def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
 
 def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
 
@@ -2486,4 +2480,17 @@ def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
 def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
 def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
 def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+
+// Map SchedRWs that are identical for cortexa9 to existing resources.
+def : SchedAlias<WriteALU, A9WriteALU>;
+def : SchedAlias<WriteALUsr, A9WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
+def : SchedAlias<ReadALU, A9ReadALU>;
+def : SchedAlias<ReadALUsr, A9ReadALU>;
+// FIXME: need to special case AND, ORR, EOR, BIC because they don't read
+// advance. But our instrinfo claims it does.
+
+def : SchedAlias<WriteCMP, A9WriteALU>;
+def : SchedAlias<WriteCMPsi, A9WriteALU>;
+def : SchedAlias<WriteCMPsr, A9WriteALU>;
 } // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index e9bc3e0f3955..7c6df410706e 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1078,8 +1078,67 @@ def SwiftModel : SchedMachineModel {
   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
   let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
   let LoadLatency = 3;
+  let MispredictPenalty = 14; // A branch direction mispredict.
 
   let Itineraries = SwiftItineraries;
 }
 
-// TODO: Add Swift processor and scheduler resources.
+// Swift predicates.
+def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
+
+// Swift resource mapping.
+let SchedModel = SwiftModel in {
+  // Processor resources.
+  def SwiftUnitP01 : ProcResource<2>; // ALU unit.
+  def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
+  def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
+  def SwiftUnitP2 : ProcResource<1>; // LS unit.
+  def SwiftUnitDiv : ProcResource<1>;
+
+  // Generic resource requirements.
+  def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
+  def SwiftWriteP01ThreeCycleTwoUops :
+    SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> {
+    let Latency = 3;
+    let NumMicroOps = 2;
+  }
+  def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
+    let Latency = 3;
+    let NumMicroOps = 3;
+    let ResourceCycles = [3];
+  }
+
+  // 4.2.4 Arithmetic and Logical.
+  // ALU operation register shifted by immediate variant.
+  def SwiftWriteALUsi : SchedWriteVariant<[
+    // lsl #2, lsl #1, or lsr #1.
+    SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
+    SchedVar<NoSchedPred,             [WriteALU]>
+  ]>;
+  def SwiftWriteALUsr : SchedWriteVariant<[
+    SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
+    SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
+  ]>;
+  def SwiftWriteALUSsr : SchedWriteVariant<[
+    SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
+    SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
+  ]>;
+  def SwiftReadAdvanceALUsr : SchedReadVariant<[
+    SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
+    SchedVar<NoSchedPred,      [NoReadAdvance]>
+  ]>;
+  // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
+  // AND,BIC,EOR,ORN,ORR
+  // CLZ,RBIT,REV,REV16,REVSH,PKH
+  def : WriteRes<WriteALU, [SwiftUnitP01]>;
+  def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
+  def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
+  def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
+  def : ReadAdvance<ReadALU, 0>;
+  def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+
+  // 4.2.5 Integer comparison
+  def : WriteRes<WriteCMP, [SwiftUnitP01]>;
+  def : WriteRes<WriteCMPsi, [SwiftUnitP01]>;
+  def : WriteRes<WriteCMPsr, [SwiftUnitP01]>;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e11314d4fcd2..8653c462f06b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
@@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden,
             cl::desc("Disallow all unaligned memory accesses"));
 
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS)
+                           const std::string &FS, const TargetOptions &Options)
   : ARMGenSubtargetInfo(TT, CPU, FS)
   , ARMProcFamily(Others)
   , stackAlignment(4)
   , CPUString(CPU)
   , TargetTriple(TT)
+  , Options(Options)
   , TargetABI(ARM_ABI_APCS) {
   initializeEnvironment();
   resetSubtargetFeatures(CPU, FS);
@@ -89,9 +91,11 @@ void ARMSubtarget::initializeEnvironment() {
   HasRAS = false;
   HasMPExtension = false;
   FPOnlySP = false;
+  HasTrustZone = false;
   AllowsUnalignedMem = false;
   Thumb2DSP = false;
   UseNaClTrap = false;
+  UnsafeFPMath = false;
 }
 
 void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
@@ -162,6 +166,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
   // configuration.
   if (!StrictAlign && hasV6Ops() && isTargetDarwin())
     AllowsUnalignedMem = true;
+
+  // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+  uint64_t Bits = getFeatureBits();
+  if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+      (Options.UnsafeFPMath || isTargetDarwin()))
+    UseNEONForSinglePrecisionFP = true;
 }
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8ce22e1de2cb..f111a0ace7b9 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,6 +26,7 @@
 namespace llvm {
 class GlobalValue;
 class StringRef;
+class TargetOptions;
 
 class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
@@ -147,6 +148,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// precision.
   bool FPOnlySP;
 
+  /// HasTrustZone - if true, processor supports TrustZone security extensions
+  bool HasTrustZone;
+
   /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
   /// accesses for some types.  For details, see
   /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -159,6 +163,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// NaCl TRAP instruction is generated instead of the regular TRAP.
   bool UseNaClTrap;
 
+  /// Target machine allowed unsafe FP math (such as use of NEON fp)
+  bool UnsafeFPMath;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -175,6 +182,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
 
+  /// Options passed via command line that could influence the target
+  const TargetOptions &Options;
+
  public:
   enum {
     isELF, isDarwin
@@ -189,7 +199,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// of the specified triple.
   ///
   ARMSubtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS);
+               const std::string &FS, const TargetOptions &Options);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -244,6 +254,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool hasVMLxForwarding() const { return HasVMLxForwarding; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
   bool isFPOnlySP() const { return FPOnlySP; }
+  bool hasTrustZone() const { return HasTrustZone; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }
   bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
   bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
@@ -263,6 +274,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     return TargetTriple.getOS() == Triple::NaCl;
   }
   bool isTargetELF() const { return !isTargetDarwin(); }
+  bool isTargetAndroid() const {
+    return TargetTriple.getEnvironment() == Triple::Android;
+  }
 
   bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 3003760df736..d42f41d44e0d 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -48,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS),
+    Subtarget(TT, CPU, FS, Options),
     JITInfo(),
     InstrItins(Subtarget.getInstrItineraryData()) {
   // Default to soft float ABI
@@ -84,7 +84,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
                            "v128:64:128-v64:64:64-n32-S32")),
     TLInfo(*this),
     TSInfo(*this),
-    FrameLowering(Subtarget) {
+    FrameLowering(*this, Subtarget) {
   if (!Subtarget.hasARMOps())
     report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
                        "support ARM mode execution!");
@@ -115,8 +115,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
     TLInfo(*this),
     TSInfo(*this),
     FrameLowering(Subtarget.hasThumb2()
-              ? new ARMFrameLowering(Subtarget)
-              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+              ? new ARMFrameLowering(*this, Subtarget)
+              : (ARMFrameLowering*)new Thumb1FrameLowering(*this, Subtarget)) {
 }
 
 namespace {
@@ -185,8 +185,7 @@ bool ARMPassConfig::addPreSched2() {
       addPass(createARMLoadStoreOptimizationPass());
       printAndVerify("After ARM load / store optimizer");
     }
-    if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) &&
-      getARMSubtarget().hasNEON())
+    if (getARMSubtarget().hasNEON())
       addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
   }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index d4caf5ca6e19..7312b564c864 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -61,6 +61,10 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 
   virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE);
+
+  virtual const ARMBaseInstrInfo *getInstrInfo() const {
+    llvm_unreachable("getInstrInfo not implemented");
+  }
 };
 
 /// ARMTargetMachine - ARM target machine.
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 7a32ffb7878b..1019b972e957 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -211,25 +211,71 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 0 },
     { ISD::TRUNCATE,    MVT::v4i16, MVT::v4i32, 1 },
 
+    // The number of vmovl instructions for the extension.
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+    { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+    { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+    { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+    { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
     // Operations that we legalize using load/stores to the stack.
-    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 16*2 + 4*4 },
-    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 16*2 + 4*3 },
-    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 8*2 + 2*4 },
-    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 8*2 + 2*3 },
     { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
     { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
 
     // Vector float <-> i32 conversions.
     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
+
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
+
     { ISD::FP_TO_SINT,  MVT::v4i32, MVT::v4f32, 1 },
     { ISD::FP_TO_UINT,  MVT::v4i32, MVT::v4f32, 1 },
+    { ISD::FP_TO_SINT,  MVT::v4i8, MVT::v4f32, 3 },
+    { ISD::FP_TO_UINT,  MVT::v4i8, MVT::v4f32, 3 },
+    { ISD::FP_TO_SINT,  MVT::v4i16, MVT::v4f32, 2 },
+    { ISD::FP_TO_UINT,  MVT::v4i16, MVT::v4f32, 2 },
 
     // Vector double <-> i32 conversions.
     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+
     { ISD::FP_TO_SINT,  MVT::v2i32, MVT::v2f64, 2 },
-    { ISD::FP_TO_UINT,  MVT::v2i32, MVT::v2f64, 2 }
+    { ISD::FP_TO_UINT,  MVT::v2i32, MVT::v2f64, 2 },
+    { ISD::FP_TO_SINT,  MVT::v8i16, MVT::v8f32, 4 },
+    { ISD::FP_TO_UINT,  MVT::v8i16, MVT::v8f32, 4 },
+    { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v16f32, 8 },
+    { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, 8 }
   };
 
   if (SrcTy.isVector() && ST->hasNEON()) {
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c897efd0198f..20468af9a27f 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -610,6 +610,13 @@ class ARMOperand : public MCParsedAsmOperand {
     int64_t Value = CE->getValue();
     return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
   }
+  bool isImm0_4() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 5;
+  }
   bool isImm0_1020s4() const {
     if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -4593,20 +4600,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     Error(Parser.getTok().getLoc(), "unexpected token in operand");
     return true;
   case AsmToken::Identifier: {
-    if (!tryParseRegisterWithWriteBack(Operands))
-      return false;
-    int Res = tryParseShiftRegister(Operands);
-    if (Res == 0) // success
-      return false;
-    else if (Res == -1) // irrecoverable error
-      return true;
-    // If this is VMRS, check for the apsr_nzcv operand.
-    if (Mnemonic == "vmrs" &&
-        Parser.getTok().getString().equals_lower("apsr_nzcv")) {
-      S = Parser.getTok().getLoc();
-      Parser.Lex();
-      Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
-      return false;
+    // If we've seen a branch mnemonic, the next operand must be a label.  This
+    // is true even if the label is a register name.  So "br r1" means branch to
+    // label "r1".
+    bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+    if (!ExpectLabel) {
+      if (!tryParseRegisterWithWriteBack(Operands))
+        return false;
+      int Res = tryParseShiftRegister(Operands);
+      if (Res == 0) // success
+        return false;
+      else if (Res == -1) // irrecoverable error
+        return true;
+      // If this is VMRS, check for the apsr_nzcv operand.
+      if (Mnemonic == "vmrs" &&
+          Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+        S = Parser.getTok().getLoc();
+        Parser.Lex();
+        Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+        return false;
+      }
     }
 
     // Fall though for the Identifier case that is not a register or a
@@ -4739,6 +4752,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic == "mls"   || Mnemonic == "smmls"  || Mnemonic == "vcls"  ||
       Mnemonic == "vmls"  || Mnemonic == "vnmls"  || Mnemonic == "vacge" ||
       Mnemonic == "vcge"  || Mnemonic == "vclt"   || Mnemonic == "vacgt" ||
+      Mnemonic == "vaclt" || Mnemonic == "vacle"  ||
       Mnemonic == "vcgt"  || Mnemonic == "vcle"   || Mnemonic == "smlal" ||
       Mnemonic == "umaal" || Mnemonic == "umlal"  || Mnemonic == "vabal" ||
       Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
@@ -5008,8 +5022,8 @@ static bool isDataTypeToken(StringRef Tok) {
 static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
   return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
 }
-
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+                                 unsigned VariantID);
 /// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                                     SMLoc NameLoc,
@@ -5020,7 +5034,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // MatchInstructionImpl(), but that's too late for aliases that include
   // any sort of suffix.
   unsigned AvailableFeatures = getAvailableFeatures();
-  applyMnemonicAliases(Name, AvailableFeatures);
+  unsigned AssemblerDialect = getParser().getAssemblerDialect();
+  applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
 
   // First check for the ARM-specific .req directive.
   if (Parser.getTok().is(AsmToken::Identifier) &&
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 31a3b0b524f3..32b47fba514d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -308,6 +308,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder);
 
 
 static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -1996,9 +1998,10 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
   imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
 
   if (Inst.getOpcode() == ARM::MOVTi16)
-    if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+    if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
       return MCDisassembler::Fail;
-  if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
 
   if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
@@ -3049,9 +3052,9 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
 
 static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
-  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4,
+  if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
                                 true, 2, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+    Inst.addOperand(MCOperand::CreateImm(Val << 1));
   return MCDisassembler::Success;
 }
 
@@ -3278,7 +3281,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
       return MCDisassembler::Fail;
   }
 
-  if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
     return MCDisassembler::Fail;
 
   if (load) {
@@ -3570,7 +3573,7 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
   unsigned Rn = fieldFromInstruction(Insn, 16, 4);
   unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
-  if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
 
   if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
@@ -4496,6 +4499,15 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder)
+{
+  unsigned Imm = fieldFromInstruction(Insn, 0, 3);
+  if (Imm > 4) return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::CreateImm(Imm));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2afb20d6686a..3bcd083a35fb 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -490,7 +490,8 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
-                                                raw_ostream &O) {
+                                                raw_ostream &O,
+                                                bool AlwaysPrintImm0) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op+1);
   const MCOperand &MO3 = MI->getOperand(Op+2);
@@ -509,7 +510,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
   ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
 
-  if (ImmOffs || (op == ARM_AM::sub)) {
+  if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
     O << ", "
       << markup("<imm:")
       << "#"
@@ -520,6 +521,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
   O << ']' << markup(">");
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
@@ -535,7 +537,7 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
     printAM3PostIndexOp(MI, Op, O);
     return;
   }
-  printAM3PreOrOffsetIndexOp(MI, Op, O);
+  printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
 }
 
 void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
@@ -593,6 +595,7 @@ void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
   O << ARM_AM::getAMSubModeStr(Mode);
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -608,7 +611,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
 
   unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
   unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
-  if (ImmOffs || Op == ARM_AM::sub) {
+  if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
     O << ", "
       << markup("<imm:")
       << "#"
@@ -1022,6 +1025,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
                    ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -1042,13 +1046,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
     OffImm = 0;
   if (isSub) {
     O << ", "
-      << markup("<imm:") 
+      << markup("<imm:")
       << "#-" << -OffImm
       << markup(">");
   }
-  else if (OffImm > 0) {
+  else if (AlwaysPrintImm0 || OffImm > 0) {
     O << ", "
-      << markup("<imm:") 
+      << markup("<imm:")
       << "#" << OffImm
       << markup(">");
   }
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index edff75d886e9..344104e87359 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -47,12 +47,13 @@ class ARMInstPrinter : public MCInstPrinter {
                                   raw_ostream &O);
   void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
-
+  template <bool AlwaysPrintImm0>
   void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
   void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
+                                  bool AlwaysPrintImm0);
   void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
                                raw_ostream &O);
   void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -60,6 +61,7 @@ class ARMInstPrinter : public MCInstPrinter {
                                raw_ostream &O);
 
   void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -91,6 +93,7 @@ class ARMInstPrinter : public MCInstPrinter {
                                    raw_ostream &O);
 
   void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template<bool AlwaysPrintImm0>
   void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
                                  raw_ostream &O);
   void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 418971df3292..6c3d2476688c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,7 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMRegisterInfo.h"
 #include "ARMUnwindOp.h"
+#include "ARMUnwindOpAsm.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -26,6 +28,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
@@ -33,11 +36,15 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
+  assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index");
+  return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
+}
+
 namespace {
 
 /// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -57,8 +64,9 @@ class ARMELFStreamer : public MCELFStreamer {
   ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
                  MCCodeEmitter *Emitter, bool IsThumb)
       : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
-        IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
-        FnStart(0), Personality(0), CantUnwind(false) {}
+        IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
+    Reset();
+  }
 
   ~ARMELFStreamer() {}
 
@@ -75,14 +83,15 @@ class ARMELFStreamer : public MCELFStreamer {
   virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                            bool isVector);
 
-  virtual void ChangeSection(const MCSection *Section) {
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection) {
     // We have to keep track of the mapping symbol state of any sections we
     // use. Each one should start off as EMS_None, which is provided as the
     // default constructor by DenseMap::lookup.
-    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastMappingSymbols[getPreviousSection().first] = LastEMS;
     LastEMS = LastMappingSymbols.lookup(Section);
 
-    MCELFStreamer::ChangeSection(Section);
+    MCELFStreamer::ChangeSection(Section, Subsection);
   }
 
   /// This function is the one used to emit instruction data into the ELF
@@ -175,7 +184,7 @@ class ARMELFStreamer : public MCELFStreamer {
     MCELF::SetType(SD, ELF::STT_NOTYPE);
     MCELF::SetBinding(SD, ELF::STB_LOCAL);
     SD.setExternal(false);
-    Symbol->setSection(*getCurrentSection());
+    Symbol->setSection(*getCurrentSection().first);
 
     const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
     Symbol->setVariableValue(Value);
@@ -194,6 +203,7 @@ class ARMELFStreamer : public MCELFStreamer {
   void Reset();
 
   void EmitPersonalityFixup(StringRef Name);
+  void CollectUnwindOpcodes();
 
   void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
                          SectionKind Kind, const MCSymbol &Fn);
@@ -210,9 +220,16 @@ class ARMELFStreamer : public MCELFStreamer {
   MCSymbol *ExTab;
   MCSymbol *FnStart;
   const MCSymbol *Personality;
+  uint32_t VFPRegSave; // Register mask for {d31-d0}
+  uint32_t RegSave; // Register mask for {r15-r0}
+  int64_t SPOffset;
+  uint16_t FPReg;
+  int64_t FPOffset;
+  bool UsedFP;
   bool CantUnwind;
+  UnwindOpcodeAssembler UnwindOpAsm;
 };
-}
+} // end anonymous namespace
 
 inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
                                               unsigned Type,
@@ -238,7 +255,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
   } else {
     EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
   }
-  assert(EHSection);
+  assert(EHSection && "Failed to get the required EH section");
 
   // Switch to .ARM.extab or .ARM.exidx section
   SwitchSection(EHSection);
@@ -262,10 +279,20 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
 }
 
 void ARMELFStreamer::Reset() {
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
   ExTab = NULL;
   FnStart = NULL;
   Personality = NULL;
+  VFPRegSave = 0;
+  RegSave = 0;
+  FPReg = MRI.getEncodingValue(ARM::SP);
+  FPOffset = 0;
+  SPOffset = 0;
+  UsedFP = false;
   CantUnwind = false;
+
+  UnwindOpAsm.Reset();
 }
 
 // Add the R_ARM_NONE fixup at the same position
@@ -284,6 +311,18 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
                     MCFixup::getKindForSize(4, false)));
 }
 
+void ARMELFStreamer::CollectUnwindOpcodes() {
+  if (UsedFP) {
+    UnwindOpAsm.EmitSetFP(FPReg);
+    UnwindOpAsm.EmitSPOffset(-FPOffset);
+  } else {
+    UnwindOpAsm.EmitSPOffset(SPOffset);
+  }
+  UnwindOpAsm.EmitVFPRegSave(VFPRegSave);
+  UnwindOpAsm.EmitRegSave(RegSave);
+  UnwindOpAsm.Finalize();
+}
+
 void ARMELFStreamer::EmitFnStart() {
   assert(FnStart == 0);
   FnStart = getContext().CreateTempSymbol();
@@ -294,35 +333,29 @@ void ARMELFStreamer::EmitFnEnd() {
   assert(FnStart && ".fnstart must preceeds .fnend");
 
   // Emit unwind opcodes if there is no .handlerdata directive
-  int PersonalityIndex = -1;
   if (!ExTab && !CantUnwind) {
-    // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
-    SwitchToExTabSection(*FnStart);
-
-    // Create .ARM.extab label for offset in .ARM.exidx
-    ExTab = getContext().CreateTempSymbol();
-    EmitLabel(ExTab);
-
-    PersonalityIndex = 1;
-
-    uint32_t Entry = 0;
-    uint32_t NumExtraEntryWords = 0;
-    Entry |= NumExtraEntryWords << 24;
-    Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
-
-    // TODO: This should be generated according to .save, .vsave, .setfp
-    // directives.  Currently, we are simply generating FINISH opcode.
-    Entry |= UNWIND_OPCODE_FINISH << 8;
-    Entry |= UNWIND_OPCODE_FINISH;
-
-    EmitIntValue(Entry, 4, 0);
+    CollectUnwindOpcodes();
+
+    unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+    if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 ||
+        PersonalityIndex == AEABI_UNWIND_CPP_PR2) {
+      // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to
+      // emit the unwind opcodes in the corresponding ".ARM.extab" section, and
+      // then emit a reference to these unwind opcodes in the second word of
+      // the exception index table entry.
+      SwitchToExTabSection(*FnStart);
+      ExTab = getContext().CreateTempSymbol();
+      EmitLabel(ExTab);
+      EmitBytes(UnwindOpAsm.data(), 0);
+    }
   }
 
   // Emit the exception index table entry
   SwitchToExIdxSection(*FnStart);
 
-  if (PersonalityIndex == 1)
-    EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+  unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+  if (PersonalityIndex < NUM_PERSONALITY_INDEX)
+    EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
 
   const MCSymbolRefExpr *FnStartRef =
     MCSymbolRefExpr::Create(FnStart,
@@ -333,12 +366,22 @@ void ARMELFStreamer::EmitFnEnd() {
 
   if (CantUnwind) {
     EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
-  } else {
+  } else if (ExTab) {
+    // Emit a reference to the unwind opcodes in the ".ARM.extab" section.
     const MCSymbolRefExpr *ExTabEntryRef =
       MCSymbolRefExpr::Create(ExTab,
                               MCSymbolRefExpr::VK_ARM_PREL31,
                               getContext());
     EmitValue(ExTabEntryRef, 4, 0);
+  } else {
+    // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
+    // the second word of exception index table entry.  The size of the unwind
+    // opcodes should always be 4 bytes.
+    assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 &&
+           "Compact model must use __aeabi_cpp_unwind_pr0 as personality");
+    assert(UnwindOpAsm.size() == 4u &&
+           "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
+    EmitBytes(UnwindOpAsm.data(), 0);
   }
 
   // Clean exception handling frame information
@@ -368,36 +411,54 @@ void ARMELFStreamer::EmitHandlerData() {
   EmitValue(PersonalityRef, 4, 0);
 
   // Emit unwind opcodes
-  uint32_t Entry = 0;
-  uint32_t NumExtraEntryWords = 0;
-
-  // TODO: This should be generated according to .save, .vsave, .setfp
-  // directives.  Currently, we are simply generating FINISH opcode.
-  Entry |= NumExtraEntryWords << 24;
-  Entry |= UNWIND_OPCODE_FINISH << 16;
-  Entry |= UNWIND_OPCODE_FINISH << 8;
-  Entry |= UNWIND_OPCODE_FINISH;
-
-  EmitIntValue(Entry, 4, 0);
+  CollectUnwindOpcodes();
+  EmitBytes(UnwindOpAsm.data(), 0);
 }
 
 void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
   Personality = Per;
+  UnwindOpAsm.setPersonality(Per);
 }
 
-void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
-                               unsigned NewSpReg,
+void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
+                               unsigned NewSPReg,
                                int64_t Offset) {
-  // TODO: Not implemented
+  assert(SPOffset == 0 &&
+         "Current implementation assumes .setfp precedes .pad");
+
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+  uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg);
+#ifndef NDEBUG
+  uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg);
+#endif
+
+  assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) &&
+         "the operand of .setfp directive should be either $sp or $fp");
+
+  UsedFP = true;
+  FPReg = NewFPRegEncVal;
+  FPOffset = Offset;
 }
 
 void ARMELFStreamer::EmitPad(int64_t Offset) {
-  // TODO: Not implemented
+  SPOffset += Offset;
 }
 
 void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                                  bool IsVector) {
-  // TODO: Not implemented
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+#ifndef NDEBUG
+  unsigned Max = IsVector ? 32 : 16;
+#endif
+  uint32_t &RegMask = IsVector ? VFPRegSave : RegSave;
+
+  for (size_t i = 0; i < RegList.size(); ++i) {
+    unsigned Reg = MRI.getEncodingValue(RegList[i]);
+    assert(Reg < Max && "Register encoded value out of range");
+    RegMask |= 1u << Reg;
+  }
 }
 
 namespace llvm {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
index dad5576df4cd..fa4add65a8df 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -107,6 +107,19 @@ namespace llvm {
     UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
   };
 
+  /// ARM-defined Personality Routine Index
+  enum ARMPersonalityRoutineIndex {
+    // To make the exception handling table become more compact, ARM defined
+    // several personality routines in EHABI.  There are 3 different
+    // personality routines in ARM EHABI currently.  It is possible to have 16
+    // pre-defined personality routines at most.
+    AEABI_UNWIND_CPP_PR0 = 0,
+    AEABI_UNWIND_CPP_PR1 = 1,
+    AEABI_UNWIND_CPP_PR2 = 2,
+
+    NUM_PERSONALITY_INDEX
+  };
+
 }
 
 #endif // ARM_UNWIND_OP_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
new file mode 100644
index 000000000000..191db69fbc2b
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -0,0 +1,198 @@
+//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOpAsm.h"
+
+#include "ARMUnwindOp.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
+  if (RegSave == 0u)
+    return;
+
+  // One byte opcode to save register r14 and r11-r4
+  if (RegSave & (1u << 4)) {
+    // The one byte opcode will always save r4, thus we can't use the one byte
+    // opcode when r4 is not in .save directive.
+
+    // Compute the consecutive registers from r4 to r11.
+    uint32_t Range = 0;
+    uint32_t Mask = (1u << 4);
+    for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
+      if ((RegSave & Bit) == 0u)
+        break;
+      ++Range;
+      Mask |= Bit;
+    }
+
+    // Emit this opcode when the mask covers every registers.
+    uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
+    if (UnmaskedReg == 0u) {
+      // Pop r[4 : (4 + n)]
+      Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+      RegSave &= 0x000fu;
+    } else if (UnmaskedReg == (1u << 14)) {
+      // Pop r[14] + r[4 : (4 + n)]
+      Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+      RegSave &= 0x000fu;
+    }
+  }
+
+  // Two bytes opcode to save register r15-r4
+  if ((RegSave & 0xfff0u) != 0) {
+    uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4);
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+
+  // Opcode to save register r3-r0
+  if ((RegSave & 0x000fu) != 0) {
+    uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu);
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+}
+
+/// Emit unwind opcodes for .vsave directives
+void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
+  size_t i = 32;
+
+  while (i > 16) {
+    uint32_t Bit = 1u << (i - 1);
+    if ((VFPRegSave & Bit) == 0u) {
+      --i;
+      continue;
+    }
+
+    uint32_t Range = 0;
+
+    --i;
+    Bit >>= 1;
+
+    while (i > 16 && (VFPRegSave & Bit)) {
+      --i;
+      ++Range;
+      Bit >>= 1;
+    }
+
+    uint32_t Op =
+        UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range;
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+
+  while (i > 0) {
+    uint32_t Bit = 1u << (i - 1);
+    if ((VFPRegSave & Bit) == 0u) {
+      --i;
+      continue;
+    }
+
+    uint32_t Range = 0;
+
+    --i;
+    Bit >>= 1;
+
+    while (i > 0 && (VFPRegSave & Bit)) {
+      --i;
+      ++Range;
+      Bit >>= 1;
+    }
+
+    uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range;
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+}
+
+/// Emit unwind opcodes for .setfp directives
+void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) {
+  Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg);
+}
+
+/// Emit unwind opcodes to update stack pointer
+void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
+  if (Offset > 0x200) {
+    uint8_t Buff[10];
+    size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff);
+    Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128);
+    Ops.append(Buff, Buff + Size);
+  } else if (Offset > 0) {
+    if (Offset > 0x100) {
+      Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu);
+      Offset -= 0x100;
+    }
+    Ops.push_back(UNWIND_OPCODE_INC_VSP |
+                  static_cast<uint8_t>((Offset - 4) >> 2));
+  } else if (Offset < 0) {
+    while (Offset < -0x100) {
+      Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu);
+      Offset += 0x100;
+    }
+    Ops.push_back(UNWIND_OPCODE_DEC_VSP |
+                  static_cast<uint8_t>(((-Offset) - 4) >> 2));
+  }
+}
+
+void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) {
+  size_t SizeInWords = (size() + 3) / 4;
+  assert(SizeInWords <= 0x100u &&
+         "Only 256 additional words are allowed for unwind opcodes");
+  Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1);
+}
+
+void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) {
+  assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
+  Ops[Pos] = EHT_COMPACT | PI;
+}
+
+void UnwindOpcodeAssembler::EmitFinishOpcodes() {
+  for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i)
+    Ops.push_back(UNWIND_OPCODE_FINISH);
+}
+
+void UnwindOpcodeAssembler::Finalize() {
+  if (HasPersonality) {
+    // Personality specified by .personality directive
+    Offset = 1;
+    AddOpcodeSizePrefix(1);
+  } else {
+    if (getOpcodeSize() <= 3) {
+      // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
+      Offset = 1;
+      PersonalityIndex = AEABI_UNWIND_CPP_PR0;
+      AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+    } else {
+      // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ]
+      Offset = 0;
+      PersonalityIndex = AEABI_UNWIND_CPP_PR1;
+      AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+      AddOpcodeSizePrefix(1);
+    }
+  }
+
+  // Emit the padding finish opcodes if the size() is not multiple of 4.
+  EmitFinishOpcodes();
+
+  // Swap the byte order
+  uint8_t *Ptr = Ops.begin() + Offset;
+  assert(size() % 4 == 0 && "Final unwind opcodes should align to 4");
+  for (size_t i = 0, n = size(); i < n; i += 4) {
+    std::swap(Ptr[i], Ptr[i + 3]);
+    std::swap(Ptr[i + 1], Ptr[i + 2]);
+  }
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
new file mode 100644
index 000000000000..f6ecaeb8b29f
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -0,0 +1,114 @@
+//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_ASM_H
+#define ARM_UNWIND_OP_ASM_H
+
+#include "ARMUnwindOp.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class UnwindOpcodeAssembler {
+private:
+  llvm::SmallVector<uint8_t, 8> Ops;
+
+  unsigned Offset;
+  unsigned PersonalityIndex;
+  bool HasPersonality;
+
+  enum {
+    // The number of bytes to be preserved for the size and personality index
+    // prefix of unwind opcodes.
+    NUM_PRESERVED_PREFIX_BUF = 2
+  };
+
+public:
+  UnwindOpcodeAssembler()
+      : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF),
+        PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) {
+  }
+
+  /// Reset the unwind opcode assembler.
+  void Reset() {
+    Ops.resize(NUM_PRESERVED_PREFIX_BUF);
+    Offset = NUM_PRESERVED_PREFIX_BUF;
+    PersonalityIndex = NUM_PERSONALITY_INDEX;
+    HasPersonality = 0;
+  }
+
+  /// Get the size of the payload (including the size byte)
+  size_t size() const {
+    return Ops.size() - Offset;
+  }
+
+  /// Get the beginning of the payload
+  const uint8_t *begin() const {
+    return Ops.begin() + Offset;
+  }
+
+  /// Get the payload
+  StringRef data() const {
+    return StringRef(reinterpret_cast<const char *>(begin()), size());
+  }
+
+  /// Set the personality index
+  void setPersonality(const MCSymbol *Per) {
+    HasPersonality = 1;
+  }
+
+  /// Get the personality index
+  unsigned getPersonalityIndex() const {
+    return PersonalityIndex;
+  }
+
+  /// Emit unwind opcodes for .save directives
+  void EmitRegSave(uint32_t RegSave);
+
+  /// Emit unwind opcodes for .vsave directives
+  void EmitVFPRegSave(uint32_t VFPRegSave);
+
+  /// Emit unwind opcodes for .setfp directives
+  void EmitSetFP(uint16_t FPReg);
+
+  /// Emit unwind opcodes to update stack pointer
+  void EmitSPOffset(int64_t Offset);
+
+  /// Finalize the unwind opcode sequence for EmitBytes()
+  void Finalize();
+
+private:
+  /// Get the size of the opcodes in bytes.
+  size_t getOpcodeSize() const {
+    return Ops.size() - NUM_PRESERVED_PREFIX_BUF;
+  }
+
+  /// Add the length prefix to the payload
+  void AddOpcodeSizePrefix(size_t Pos);
+
+  /// Add personality index prefix in some compact format
+  void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex);
+
+  /// Fill the words with finish opcode if it is not aligned
+  void EmitFinishOpcodes();
+};
+
+} // namespace llvm
+
+#endif // ARM_UNWIND_OP_ASM_H
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index e17eb4d5e987..a7ac5ca061e6 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMARMDesc
   ARMMCTargetDesc.cpp
   ARMMachObjectWriter.cpp
   ARMELFObjectWriter.cpp
+  ARMUnwindOpAsm.cpp
   )
 add_dependencies(LLVMARMDesc ARMCommonTableGen)
 
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index 463c440852f5..a64707e6f34f 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -173,7 +173,6 @@ GCC is doing a couple of clever things here:
         mov r1, #1
         lsl r1, r1, #8
         tst r2, r1
-  
 
 //===---------------------------------------------------------------------===//
 
@@ -196,7 +195,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack
 objects are referenced off the frame pointer with negative offsets. See
 oggenc for an example.
 
-
 //===---------------------------------------------------------------------===//
 
 Poor codegen test/CodeGen/ARM/select.ll f7:
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index 5a300afd5d36..e5879845a821 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -22,12 +22,14 @@
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
+  class ARMBaseTargetMachine;
   class ARMSubtarget;
 
 class Thumb1FrameLowering : public ARMFrameLowering {
 public:
-  explicit Thumb1FrameLowering(const ARMSubtarget &sti)
-    : ARMFrameLowering(sti) {
+  explicit Thumb1FrameLowering(const ARMBaseTargetMachine &tm,
+                               const ARMSubtarget &sti)
+    : ARMFrameLowering(tm, sti) {
   }
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 609d502aa50e..7452fb776ebd 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -588,7 +588,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // means the stack pointer cannot be used to access the emergency spill slot
   // when !hasReservedCallFrame().
 #ifndef NDEBUG
-  if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+  if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
     assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
            "Cannot use SP to access the emergency spill slot in "
            "functions without a reserved call frame");
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 567bc05272ea..4795aae12fd6 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -15,6 +15,7 @@
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -79,11 +80,8 @@ namespace {
   { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,   0,   1,  0,0, 1,0,1 },
   { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
   { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
-  // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
-  // likely to cause issue in the loop. As a size / performance workaround,
-  // they are not marked as such.
-  { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 0,0,0 },
-  { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,0,0 },
+  { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,1,0 },
   // FIXME: Do we need the 16-bit 'S' variant?
   { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,   0,   0,  1,0, 0,0,0 },
   { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,   0,   1,  0,0, 1,0,0 },
@@ -149,8 +147,7 @@ namespace {
     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
 
-    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
-                             bool IsSelfLoop);
+    bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
 
     bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
                          bool is2Addr, ARMCC::CondCodes Pred,
@@ -160,33 +157,46 @@ namespace {
                          const ReduceEntry &Entry);
 
     bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
-                       const ReduceEntry &Entry, bool LiveCPSR,
-                       MachineInstr *CPSRDef, bool IsSelfLoop);
+                       const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
 
     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
     /// instruction.
     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
-                       const ReduceEntry &Entry,
-                       bool LiveCPSR, MachineInstr *CPSRDef,
+                       const ReduceEntry &Entry, bool LiveCPSR,
                        bool IsSelfLoop);
 
     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
     /// non-two-address instruction.
     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
-                        const ReduceEntry &Entry,
-                        bool LiveCPSR, MachineInstr *CPSRDef,
+                        const ReduceEntry &Entry, bool LiveCPSR,
                         bool IsSelfLoop);
 
     /// ReduceMI - Attempt to reduce MI, return true on success.
     bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
-                  bool LiveCPSR, MachineInstr *CPSRDef,
-                  bool IsSelfLoop);
+                  bool LiveCPSR, bool IsSelfLoop);
 
     /// ReduceMBB - Reduce width of instructions in the specified basic block.
     bool ReduceMBB(MachineBasicBlock &MBB);
 
     bool OptimizeSize;
     bool MinimizeSize;
+
+    // Last instruction to define CPSR in the current block.
+    MachineInstr *CPSRDef;
+    // Was CPSR last defined by a high latency instruction?
+    // When CPSRDef is null, this refers to CPSR defs in predecessors.
+    bool HighLatencyCPSR;
+
+    struct MBBInfo {
+      // The flags leaving this block have high latency.
+      bool HighLatencyCPSR;
+      // Has this block been visited yet?
+      bool Visited;
+
+      MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+    };
+
+    SmallVector<MBBInfo, 8> BlockInfo;
   };
   char Thumb2SizeReduce::ID = 0;
 }
@@ -207,6 +217,16 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
   return false;
 }
 
+// Check for a likely high-latency flag def.
+static bool isHighLatencyCPSR(MachineInstr *Def) {
+  switch(Def->getOpcode()) {
+  case ARM::FMSTAT:
+  case ARM::tMUL:
+    return true;
+  }
+  return false;
+}
+
 /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
 /// the 's' 16-bit instruction partially update CPSR. Abort the
 /// transformation to avoid adding false dependency on last CPSR setting
@@ -225,20 +245,19 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
 /// In this case it would have been ok to narrow the mul.w to muls since there
 /// are indirect RAW dependency between the muls and the mul.w
 bool
-Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
-                                      bool FirstInSelfLoop) {
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
   // Disable the check for -Oz (aka OptimizeForSizeHarder).
   if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
     return false;
 
-  if (!Def)
+  if (!CPSRDef)
     // If this BB loops back to itself, conservatively avoid narrowing the
     // first instruction that does partial flag update.
-    return FirstInSelfLoop;
+    return HighLatencyCPSR || FirstInSelfLoop;
 
   SmallSet<unsigned, 2> Defs;
-  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = Def->getOperand(i);
+  for (unsigned i = 0, e = CPSRDef->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = CPSRDef->getOperand(i);
     if (!MO.isReg() || MO.isUndef() || MO.isUse())
       continue;
     unsigned Reg = MO.getReg();
@@ -256,6 +275,16 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
       return false;
   }
 
+  // If the current CPSR has high latency, try to avoid the false dependency.
+  if (HighLatencyCPSR)
+    return true;
+
+  // tMOVi8 usually doesn't start long dependency chains, and there are a lot
+  // of them, so always shrink them when CPSR doesn't have high latency.
+  if (Use->getOpcode() == ARM::t2MOVi ||
+      Use->getOpcode() == ARM::t2MOVi16)
+    return false;
+
   // No read-after-write dependency. The narrowing will add false dependency.
   return true;
 }
@@ -498,16 +527,15 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR, MachineInstr *CPSRDef,
-                                bool IsSelfLoop) {
+                                bool LiveCPSR, bool IsSelfLoop) {
   unsigned Opc = MI->getOpcode();
   if (Opc == ARM::t2ADDri) {
     // If the source register is SP, try to reduce to tADDrSPi, otherwise
     // it's a normal reduce.
     if (MI->getOperand(1).getReg() != ARM::SP) {
-      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
         return true;
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     }
     // Try to reduce to tADDrSPi.
     unsigned Imm = MI->getOperand(2).getImm();
@@ -557,12 +585,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
       switch (Opc) {
       default: break;
       case ARM::t2ADDSri: {
-        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
           return true;
         // fallthrough
       }
       case ARM::t2ADDSrr:
-        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
       }
     }
     break;
@@ -574,13 +602,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
   case ARM::t2UXTB:
   case ARM::t2UXTH:
     if (MI->getOperand(2).getImm() == 0)
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     break;
   case ARM::t2MOVi16:
     // Can convert only 'pure' immediate operands, not immediates obtained as
     // globals' addresses.
     if (MI->getOperand(1).isImm())
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     break;
   case ARM::t2CMPrr: {
     // Try to reduce to the lo-reg only version first. Why there are two
@@ -590,9 +618,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     // source insn opcode. So for now, we hack a local entry record to use.
     static const ReduceEntry NarrowEntry =
       { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
-    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
+    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
       return true;
-    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
   }
   }
   return false;
@@ -601,8 +629,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR, MachineInstr *CPSRDef,
-                                bool IsSelfLoop) {
+                                bool LiveCPSR, bool IsSelfLoop) {
 
   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     return false;
@@ -683,7 +710,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
-      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
+      canAddPseudoFlagDep(MI, IsSelfLoop))
     return false;
 
   // Add the 16-bit instruction.
@@ -720,8 +747,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                                  const ReduceEntry &Entry,
-                                 bool LiveCPSR, MachineInstr *CPSRDef,
-                                 bool IsSelfLoop) {
+                                 bool LiveCPSR, bool IsSelfLoop) {
   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     return false;
 
@@ -780,7 +806,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
-      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
+      canAddPseudoFlagDep(MI, IsSelfLoop))
     return false;
 
   // Add the 16-bit instruction.
@@ -865,8 +891,7 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
 }
 
 bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
-                                bool LiveCPSR, MachineInstr *CPSRDef,
-                                bool IsSelfLoop) {
+                                bool LiveCPSR, bool IsSelfLoop) {
   unsigned Opcode = MI->getOpcode();
   DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
   if (OPI == ReduceOpcodeMap.end())
@@ -875,16 +900,16 @@ bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Don't attempt normal reductions on "special" cases for now.
   if (Entry.Special)
-    return ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+    return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
 
   // Try to transform to a 16-bit two-address instruction.
   if (Entry.NarrowOpc2 &&
-      ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+      ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     return true;
 
   // Try to transform to a 16-bit non-two-address instruction.
   if (Entry.NarrowOpc1 &&
-      ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+      ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     return true;
 
   return false;
@@ -895,9 +920,25 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 
   // Yes, CPSR could be livein.
   bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
-  MachineInstr *CPSRDef = 0;
   MachineInstr *BundleMI = 0;
 
+  CPSRDef = 0;
+  HighLatencyCPSR = false;
+
+  // Check predecessors for the latest CPSRDef.
+  for (MachineBasicBlock::pred_iterator
+       I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
+    const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
+    if (!PInfo.Visited) {
+      // Since blocks are visited in RPO, this must be a back-edge.
+      continue;
+    }
+    if (PInfo.HighLatencyCPSR) {
+      HighLatencyCPSR = true;
+      break;
+    }
+  }
+
   // If this BB loops back to itself, conservatively avoid narrowing the
   // first instruction that does partial flag update.
   bool IsSelfLoop = MBB.isSuccessor(&MBB);
@@ -911,13 +952,15 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       BundleMI = MI;
       continue;
     }
+    if (MI->isDebugValue())
+      continue;
 
     LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
 
     // Does NextMII belong to the same bundle as MI?
     bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
 
-    if (ReduceMI(MBB, MI, LiveCPSR, CPSRDef, IsSelfLoop)) {
+    if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
       Modified = true;
       MachineBasicBlock::instr_iterator I = prior(NextMII);
       MI = &*I;
@@ -944,14 +987,19 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
     if (MI->isCall()) {
       // Calls don't really set CPSR.
       CPSRDef = 0;
+      HighLatencyCPSR = false;
       IsSelfLoop = false;
     } else if (DefCPSR) {
       // This is the last CPSR defining instruction.
       CPSRDef = MI;
+      HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
       IsSelfLoop = false;
     }
   }
 
+  MBBInfo &Info = BlockInfo[MBB.getNumber()];
+  Info.HighLatencyCPSR = HighLatencyCPSR;
+  Info.Visited = true;
   return Modified;
 }
 
@@ -967,9 +1015,16 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
   MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
                                       Attribute::MinSize);
 
+  BlockInfo.clear();
+  BlockInfo.resize(MF.getNumBlockIDs());
+
+  // Visit blocks in reverse post-order so LastCPSRDef is known for all
+  // predecessors.
+  ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
   bool Modified = false;
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-    Modified |= ReduceMBB(*I);
+  for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
+       I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+    Modified |= ReduceMBB(**I);
   return Modified;
 }
 
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 604abf93ccfa..3e69098edcc3 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -518,7 +518,6 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
         attrs.removeAttribute(Attribute::StackAlignment);
       }
 
-      assert(!attrs.hasAttributes() && "Unhandled attribute!");
       Out << "PAS = AttributeSet::get(mod->getContext(), ";
       if (index == ~0U)
         Out << "~0U,";
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 62aed1353ca3..178662447a7f 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -701,7 +701,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
 
   // If the induction variable bump is not a power of 2, quit.
   // Othwerise we'd need a general integer division.
-  if (!isPowerOf2_64(abs(IVBump)))
+  if (!isPowerOf2_64(abs64(IVBump)))
     return 0;
 
   MachineBasicBlock *PH = Loop->getLoopPreheader();
@@ -1430,7 +1430,6 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
     return 0;
 
   typedef MachineBasicBlock::instr_iterator instr_iterator;
-  typedef MachineBasicBlock::pred_iterator pred_iterator;
 
   // Verify that all existing predecessors have analyzable branches
   // (or no branches at all).
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 3a1c48bac913..2aecb413a6d7 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -113,6 +113,46 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
   SDNode *SelectAdd(SDNode *N);
   bool isConstExtProfitable(SDNode *N) const;
 
+// XformMskToBitPosU5Imm - Returns the bit position which
+// the single bit 32 bit mask represents.
+// Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
+  int32_t bitPos;
+  bitPos = Log2_32(Imm);
+  assert(bitPos >= 0 && bitPos < 32 &&
+         "Constant out of range for 32 BitPos Memops");
+  return CurDAG->getTargetConstant(bitPos, MVT::i32);
+}
+
+// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
+  return XformMskToBitPosU5Imm(Imm);
+}
+
+// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
+  return XformMskToBitPosU5Imm(Imm);
+}
+
+// Return true if there is exactly one bit set in V, i.e., if V is one of the
+// following integers: 2^0, 2^1, ..., 2^31.
+bool ImmIsSingleBit(uint32_t v) const {
+  uint32_t c = CountPopulation_64(v);
+  // Only return true if we counted 1 bit.
+  return c == 1;
+}
+
+// XformM5ToU5Imm - Return a target constant with the specified value, of type
+// i32 where the negative literal is transformed into a positive literal for
+// use in -= memops.
+inline SDValue XformM5ToU5Imm(signed Imm) {
+   assert( (Imm >= -31 && Imm <= -1)  && "Constant out of range for Memops");
+   return CurDAG->getTargetConstant( - Imm, MVT::i32);
+}
+
+
 // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
 // [1..128], used in cmpb.gtu instructions.
 inline SDValue XformU7ToU7M1Imm(signed Imm) {
@@ -657,7 +697,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
 
     // Build post increment store.
     SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
-                                            MVT::Other, Ops, 4);
+                                            MVT::Other, Ops);
     MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
     MemOp[0] = ST->getMemOperand();
     cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
@@ -683,8 +723,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
 
   // Build regular store.
   SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
-  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
-                                            4);
+  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   // Build splitted incriment instruction.
   SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
                                             Base,
@@ -740,7 +779,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
                          Value, Chain};
         // build indexed store
         SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
-                                                MVT::Other, Ops, 4);
+                                                MVT::Other, Ops);
         MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
         MemOp[0] = ST->getMemOperand();
         cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
@@ -1190,8 +1229,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
     }
     EVT ReturnValueVT = N->getValueType(0);
     SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
-                                            ReturnValueVT,
-                                            Ops.data(), Ops.size());
+                                            ReturnValueVT, Ops);
     ReplaceUses(N, Result);
     return Result;
   }
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 0a8b1af21b93..15858a9368ae 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1370,10 +1370,12 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     // Increase jump tables cutover to 5, was 4.
     setMinimumJumpTableEntries(5);
 
+    setOperationAction(ISD::BR_CC, MVT::Other, Expand);
     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
     setOperationAction(ISD::BR_CC, MVT::i1,  Expand);
     setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+    setOperationAction(ISD::BR_CC, MVT::i64, Expand);
 
     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 96a252e1b8c7..60b12ac01c9c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -537,6 +537,15 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   return(0);
 }
 
+MachineInstr*
+HexagonInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                           int FrameIx, uint64_t Offset,
+                                           const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Hexagon::DBG_VALUE))
+    .addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
 
 unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
 
@@ -1881,6 +1890,13 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
   return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
 }
 
+bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+
+  assert(isPredicated(MI));
+  return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
 bool
 HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
                                    std::vector<MachineOperand> &Pred) const {
@@ -1991,46 +2007,28 @@ isValidOffset(const int Opcode, const int Offset) const {
     return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
       (Offset <= Hexagon_ADDI_OFFSET_MAX);
 
-  case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
-  case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
-  case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
-  case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
-  case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
-  case Hexagon::MEMw_ORr_indexed_MEM_V4 :
-  case Hexagon::MEMw_ADDi_MEM_V4 :
-  case Hexagon::MEMw_SUBi_MEM_V4 :
-  case Hexagon::MEMw_ADDr_MEM_V4 :
-  case Hexagon::MEMw_SUBr_MEM_V4 :
-  case Hexagon::MEMw_ANDr_MEM_V4 :
-  case Hexagon::MEMw_ORr_MEM_V4 :
+  case Hexagon::MemOPw_ADDi_V4 :
+  case Hexagon::MemOPw_SUBi_V4 :
+  case Hexagon::MemOPw_ADDr_V4 :
+  case Hexagon::MemOPw_SUBr_V4 :
+  case Hexagon::MemOPw_ANDr_V4 :
+  case Hexagon::MemOPw_ORr_V4 :
     return (0 <= Offset && Offset <= 255);
 
-  case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
-  case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
-  case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
-  case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
-  case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
-  case Hexagon::MEMh_ORr_indexed_MEM_V4 :
-  case Hexagon::MEMh_ADDi_MEM_V4 :
-  case Hexagon::MEMh_SUBi_MEM_V4 :
-  case Hexagon::MEMh_ADDr_MEM_V4 :
-  case Hexagon::MEMh_SUBr_MEM_V4 :
-  case Hexagon::MEMh_ANDr_MEM_V4 :
-  case Hexagon::MEMh_ORr_MEM_V4 :
+  case Hexagon::MemOPh_ADDi_V4 :
+  case Hexagon::MemOPh_SUBi_V4 :
+  case Hexagon::MemOPh_ADDr_V4 :
+  case Hexagon::MemOPh_SUBr_V4 :
+  case Hexagon::MemOPh_ANDr_V4 :
+  case Hexagon::MemOPh_ORr_V4 :
     return (0 <= Offset && Offset <= 127);
 
-  case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
-  case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
-  case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
-  case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
-  case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
-  case Hexagon::MEMb_ORr_indexed_MEM_V4 :
-  case Hexagon::MEMb_ADDi_MEM_V4 :
-  case Hexagon::MEMb_SUBi_MEM_V4 :
-  case Hexagon::MEMb_ADDr_MEM_V4 :
-  case Hexagon::MEMb_SUBr_MEM_V4 :
-  case Hexagon::MEMb_ANDr_MEM_V4 :
-  case Hexagon::MEMb_ORr_MEM_V4 :
+  case Hexagon::MemOPb_ADDi_V4 :
+  case Hexagon::MemOPb_SUBi_V4 :
+  case Hexagon::MemOPb_ADDr_V4 :
+  case Hexagon::MemOPb_SUBr_V4 :
+  case Hexagon::MemOPb_ANDr_V4 :
+  case Hexagon::MemOPb_ORr_V4 :
     return (0 <= Offset && Offset <= 63);
 
   // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
@@ -2086,44 +2084,33 @@ isMemOp(const MachineInstr *MI) const {
   switch (MI->getOpcode())
   {
     default: return false;
-    case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ADDi_MEM_V4 :
-    case Hexagon::MEMw_SUBi_MEM_V4 :
-    case Hexagon::MEMw_ADDr_MEM_V4 :
-    case Hexagon::MEMw_SUBr_MEM_V4 :
-    case Hexagon::MEMw_ANDr_MEM_V4 :
-    case Hexagon::MEMw_ORr_MEM_V4 :
-    case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ADDi_MEM_V4 :
-    case Hexagon::MEMh_SUBi_MEM_V4 :
-    case Hexagon::MEMh_ADDr_MEM_V4 :
-    case Hexagon::MEMh_SUBr_MEM_V4 :
-    case Hexagon::MEMh_ANDr_MEM_V4 :
-    case Hexagon::MEMh_ORr_MEM_V4 :
-    case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ADDi_MEM_V4 :
-    case Hexagon::MEMb_SUBi_MEM_V4 :
-    case Hexagon::MEMb_ADDr_MEM_V4 :
-    case Hexagon::MEMb_SUBr_MEM_V4 :
-    case Hexagon::MEMb_ANDr_MEM_V4 :
-    case Hexagon::MEMb_ORr_MEM_V4 :
-      return true;
+    case Hexagon::MemOPw_ADDi_V4 :
+    case Hexagon::MemOPw_SUBi_V4 :
+    case Hexagon::MemOPw_ADDr_V4 :
+    case Hexagon::MemOPw_SUBr_V4 :
+    case Hexagon::MemOPw_ANDr_V4 :
+    case Hexagon::MemOPw_ORr_V4 :
+    case Hexagon::MemOPh_ADDi_V4 :
+    case Hexagon::MemOPh_SUBi_V4 :
+    case Hexagon::MemOPh_ADDr_V4 :
+    case Hexagon::MemOPh_SUBr_V4 :
+    case Hexagon::MemOPh_ANDr_V4 :
+    case Hexagon::MemOPh_ORr_V4 :
+    case Hexagon::MemOPb_ADDi_V4 :
+    case Hexagon::MemOPb_SUBi_V4 :
+    case Hexagon::MemOPb_ADDr_V4 :
+    case Hexagon::MemOPb_SUBr_V4 :
+    case Hexagon::MemOPb_ANDr_V4 :
+    case Hexagon::MemOPb_ORr_V4 :
+    case Hexagon::MemOPb_SETBITi_V4:
+    case Hexagon::MemOPh_SETBITi_V4:
+    case Hexagon::MemOPw_SETBITi_V4:
+    case Hexagon::MemOPb_CLRBITi_V4:
+    case Hexagon::MemOPh_CLRBITi_V4:
+    case Hexagon::MemOPw_CLRBITi_V4:
+    return true;
   }
+  return false;
 }
 
 
@@ -2382,6 +2369,13 @@ isConditionalStore (const MachineInstr* MI) const {
   }
 }
 
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
+  return (isNewValueInst(MI) ||
+     (isPredicated(MI) && isPredicatedNew(MI)));
+}
+
 unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
 
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index d2f059aa7947..5df13a88b5d3 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -127,6 +127,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
                                    const BranchProbability &Probability) const;
 
   virtual bool isPredicated(const MachineInstr *MI) const;
+  virtual bool isPredicatedNew(const MachineInstr *MI) const;
   virtual bool DefinesPredicate(MachineInstr *MI,
                                 std::vector<MachineOperand> &Pred) const;
   virtual bool
@@ -140,6 +141,11 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
   isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
                             const BranchProbability &Probability) const;
 
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
   virtual DFAPacketizer*
   CreateTargetScheduleState(const TargetMachine *TM,
                             const ScheduleDAG *DAG) const;
@@ -170,6 +176,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
   bool isConditionalLoad (const MachineInstr* MI) const;
   bool isConditionalStore(const MachineInstr* MI) const;
   bool isNewValueInst(const MachineInstr* MI) const;
+  bool isDotNewInst(const MachineInstr* MI) const;
   bool isDeallocRet(const MachineInstr *MI) const;
   unsigned getInvertedPredicatedOpcode(const int Opc) const;
   bool isExtendable(const MachineInstr* MI) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index d7bab200f9fd..f671fd328923 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -97,8 +97,7 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
 //===----------------------------------------------------------------------===//
 multiclass ALU32_Pbase<string mnemonic, bit isNot,
                        bit isPredNew> {
-
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : ALU32_rr<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
@@ -107,7 +106,7 @@ multiclass ALU32_Pbase<string mnemonic, bit isNot,
 }
 
 multiclass ALU32_Pred<string mnemonic, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
@@ -144,7 +143,7 @@ defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
 // ALU32/ALU (ADD with register-immediate form)
 //===----------------------------------------------------------------------===//
 multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : ALU32_ri<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
@@ -153,7 +152,7 @@ multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
 }
 
 multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
@@ -222,13 +221,13 @@ def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
 
 
 multiclass TFR_Pred<bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
                            (ins PredRegs:$src1, IntRegs:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
             []>;
     // Predicate new
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
                              (ins PredRegs:$src1, IntRegs:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
@@ -274,10 +273,10 @@ class T_TFR64_Pred<bit PredNot, bit isPredNew>
 }
 
 multiclass TFR64_Pred<bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     def _c#NAME : T_TFR64_Pred<PredNot, 0>;
 
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
   }
 }
@@ -309,14 +308,14 @@ multiclass TFR64_base<string BaseName> {
 }
 
 multiclass TFRI_Pred<bit PredNot> {
-  let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in {
+  let isMoveImm = 1, isPredicatedFalse = PredNot in {
     def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
                            (ins PredRegs:$src1, s12Ext:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
             []>;
 
     // Predicate new
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
                              (ins PredRegs:$src1, s12Ext:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
@@ -446,38 +445,58 @@ def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
                                                     s8ExtPred:$src2,
                                                     s8ImmPred:$src3)))]>;
 
-// Shift halfword.
-let isPredicable = 1 in
-def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = aslh($src1)",
-           [(set (i32 IntRegs:$dst), (shl 16, (i32 IntRegs:$src1)))]>;
+// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth
+multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> {
+  let isPredicatedNew = isPredNew in
+  def NAME : ALU32Inst<(outs IntRegs:$dst),
+                       (ins PredRegs:$src1, IntRegs:$src2),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
+            ") $dst = ")#mnemonic#"($src2)">,
+            Requires<[HasV4T]>;
+}
 
-let isPredicable = 1 in
-def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = asrh($src1)",
-           [(set (i32 IntRegs:$dst), (sra 16, (i32 IntRegs:$src1)))]>;
+multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> {
+  let isPredicatedFalse = PredNot in {
+    defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>;
+  }
+}
 
-// Sign extend.
-let isPredicable = 1 in
-def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = sxtb($src1)",
-           [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i8))]>;
+multiclass ALU32_2op_base<string mnemonic> {
+  let BaseOpcode = mnemonic in {
+    let isPredicable = 1, neverHasSideEffects = 1 in
+    def NAME : ALU32Inst<(outs IntRegs:$dst),
+                         (ins IntRegs:$src1),
+            "$dst = "#mnemonic#"($src1)">;
 
-let isPredicable = 1 in
-def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = sxth($src1)",
-           [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i16))]>;
-
-// Zero extend.
-let isPredicable = 1, neverHasSideEffects = 1 in
-def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = zxtb($src1)",
-           []>;
+    let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1,
+    neverHasSideEffects = 1 in {
+      defm Pt_V4    : ALU32_2op_Pred<mnemonic, 0>;
+      defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>;
+    }
+  }
+}
+
+defm ASLH : ALU32_2op_base<"aslh">, PredNewRel;
+defm ASRH : ALU32_2op_base<"asrh">, PredNewRel;
+defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel;
+defm SXTH : ALU32_2op_base<"sxth">,  PredNewRel;
+defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel;
+defm ZXTH : ALU32_2op_base<"zxth">,  PredNewRel;
+
+def : Pat <(shl (i32 IntRegs:$src1), (i32 16)),
+           (ASLH IntRegs:$src1)>;
+
+def : Pat <(sra (i32 IntRegs:$src1), (i32 16)),
+           (ASRH IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i8),
+           (SXTB IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
+           (SXTH IntRegs:$src1)>;
 
-let isPredicable = 1, neverHasSideEffects = 1 in
-def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-                    "$dst = zxth($src1)",
-                    []>;
 //===----------------------------------------------------------------------===//
 // ALU32/PERM -
 //===----------------------------------------------------------------------===//
@@ -488,29 +507,30 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
 //===----------------------------------------------------------------------===//
 
 // Conditional combine.
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1 in {
 def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "if ($src1) $dst = combine($src2, $src3)",
             []>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedFalse = 1 in
 def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "if (!$src1) $dst = combine($src2, $src3)",
             []>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedNew = 1 in
 def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "if ($src1.new) $dst = combine($src2, $src3)",
             []>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedNew = 1, isPredicatedFalse = 1 in
 def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "if (!$src1.new) $dst = combine($src2, $src3)",
             []>;
+}
 
 // Compare.
 defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
@@ -871,7 +891,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
 // Load -- MEMri operand
 multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
                           bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
                        (ins PredRegs:$src1, MEMri:$addr),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -880,7 +900,7 @@ multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
 }
 
 multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
@@ -937,7 +957,7 @@ def : Pat < (i64 (load ADDRriS11_3:$addr)),
 // Load - Base with Immediate offset addressing mode
 multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
                      (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -947,7 +967,7 @@ multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
 
 multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
@@ -1009,20 +1029,6 @@ def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
             (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >;
 }
 
-let neverHasSideEffects = 1 in
-def LDrid_GP : LDInst2<(outs DoubleRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memd(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memd(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
 //===----------------------------------------------------------------------===//
 // Post increment load
 // Make sure that in post increment load, the first operand is always the post
@@ -1031,7 +1037,7 @@ def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
 
 multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
                        (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1042,7 +1048,7 @@ multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1095,27 +1101,6 @@ let AddedComplexity = 20 in
 def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
             (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
 
-let neverHasSideEffects = 1 in
-def LDrib_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memb(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDb_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memb(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDub_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memub(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
 def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
             (i32 (LDrih ADDRriS11_1:$addr))>;
 
@@ -1123,27 +1108,6 @@ let AddedComplexity = 20 in
 def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
             (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >;
 
-let neverHasSideEffects = 1 in
-def LDrih_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memh(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDh_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memh(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDuh_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memuh(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
 let AddedComplexity = 10 in
 def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
             (i32 (LDriub ADDRriS11_0:$addr))>;
@@ -1152,21 +1116,6 @@ let AddedComplexity = 20 in
 def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
             (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>;
 
-let neverHasSideEffects = 1 in
-def LDriub_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memub(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-// Load unsigned halfword.
-let neverHasSideEffects = 1 in
-def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memuh(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
 // Load predicate.
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
 isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in
@@ -1175,21 +1124,6 @@ def LDriw_pred : LDInst2<(outs PredRegs:$dst),
             "Error; should not emit",
             []>;
 
-// Indexed load.
-let neverHasSideEffects = 1 in
-def LDriw_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memw(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDw_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memw(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
 // Deallocate stack frame.
 let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
   def DEALLOCFRAME : LDInst2<(outs), (ins),
@@ -1423,35 +1357,15 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
 // ST +
 //===----------------------------------------------------------------------===//
 ///
-/// Assumptions::: ****** DO NOT IGNORE ********
-/// 1. Make sure that in post increment store, the zero'th operand is always the
-///    post increment operand.
-/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
-///    last operand.
-///
 // Store doubleword.
 
-let neverHasSideEffects = 1 in
-def STrid_GP : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
-            "memd(#$global+$offset) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def STd_GP : STInst2<(outs),
-            (ins globaladdress:$global, DoubleRegs:$src),
-            "memd(#$global) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
 //===----------------------------------------------------------------------===//
 // Post increment store
 //===----------------------------------------------------------------------===//
 
 multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2PI<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1462,7 +1376,7 @@ multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1516,7 +1430,7 @@ def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
 //===----------------------------------------------------------------------===//
 multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
                           bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1525,7 +1439,7 @@ multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
 
     // Predicate new
@@ -1582,7 +1496,7 @@ def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
 //===----------------------------------------------------------------------===//
 multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1592,7 +1506,7 @@ multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
 
 multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in {
+  let isPredicatedFalse = PredNot, isPredicated = 1 in {
     defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
 
     // Predicate new
@@ -1655,36 +1569,6 @@ def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
                          (i64 DoubleRegs:$src1))>;
 }
 
-// memb(gp+#u16:0)=Rt
-let neverHasSideEffects = 1 in
-def STrib_GP : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memb(#$global+$offset) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-// memb(#global)=Rt
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def STb_GP : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memb(#$global) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def STrih_GP : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memh(#$global+$offset) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def STh_GP   : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memh(#$global) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
 // memh(Rx++#s4:1)=Rt.H
 
 // Store word.
@@ -1695,20 +1579,6 @@ def STriw_pred : STInst2<(outs),
             "Error; should not emit",
             []>;
 
-let neverHasSideEffects = 1 in
-def STriw_GP : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memw(#$global+$offset) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def STw_GP : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memw(#$global) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
 // Allocate stack frame.
 let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
   def ALLOCFRAME : STInst2<(outs),
@@ -2183,68 +2053,26 @@ def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
 
 // Atomic load and store support
 // 8 bit atomic load
-def : Pat<(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
-          (i32 (LDub_GP tglobaladdr:$global))>,
-            Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                              u16ImmPred:$offset)),
-          (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[NoV4T]>;
-
 def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
           (i32 (LDriub ADDRriS11_0:$src1))>;
 
 def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
           (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
 
-
-
 // 16 bit atomic load
-def : Pat<(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
-          (i32 (LDuh_GP tglobaladdr:$global))>,
-            Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-          (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[NoV4T]>;
-
 def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
           (i32 (LDriuh ADDRriS11_1:$src1))>;
 
 def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
           (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
 
-
-
-// 32 bit atomic load
-def : Pat<(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
-          (i32 (LDw_GP tglobaladdr:$global))>,
-            Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-          (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[NoV4T]>;
-
 def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
           (i32 (LDriw ADDRriS11_2:$src1))>;
 
 def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
           (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
 
-
 // 64 bit atomic load
-def : Pat<(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
-          (i64 (LDd_GP tglobaladdr:$global))>,
-            Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-          (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-          Requires<[NoV4T]>;
-
 def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
           (i64 (LDrid ADDRriS11_3:$src1))>;
 
@@ -2252,30 +2080,6 @@ def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
           (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
 
 
-// 64 bit atomic store
-def : Pat<(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i64 DoubleRegs:$src1)),
-          (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-          Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i64 DoubleRegs:$src1)),
-          (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
-                    (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-
-// 8 bit atomic store
-def : Pat<(atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
-                          (i32 IntRegs:$src1)),
-          (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset),
-                          (i32 IntRegs:$src1)),
-          (STrib_GP tglobaladdr:$global, u16ImmPred:$offset,
-                    (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
-
 def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
           (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>;
 
@@ -2285,18 +2089,6 @@ def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
                          (i32 IntRegs:$src1))>;
 
 
-// 16 bit atomic store
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i32 IntRegs:$src1)),
-          (STrih_GP tglobaladdr:$global, u16ImmPred:$offset,
-                    (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
-
 def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
           (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>;
 
@@ -2305,20 +2097,6 @@ def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
           (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset,
                          (i32 IntRegs:$src1))>;
 
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (STw_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i32 IntRegs:$src1)),
-          (STriw_GP tglobaladdr:$global, u16ImmPred:$offset,
-                                         (i32 IntRegs:$src1))>,
-            Requires<[NoV4T]>;
-
 def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
           (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>;
 
@@ -2387,198 +2165,8 @@ def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
 def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
       (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
 
-// Map from store(globaladdress + x) -> memd(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
-                  (add (HexagonCONST32_GP tglobaladdr:$global),
-                       u16ImmPred:$offset)),
-      (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
-                (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo).
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
-                  (HexagonCONST32_GP tglobaladdr:$global)),
-      (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1),
-              (add (HexagonCONST32_GP tglobaladdr:$global),
-                                      u16ImmPred:$offset)),
-      (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memw(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
-      (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memw(#foo).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
-      (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei16 (i32 IntRegs:$src1),
-                          (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-      (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei16 (i32 IntRegs:$src1),
-                          (HexagonCONST32_GP tglobaladdr:$global)),
-      (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei8 (i32 IntRegs:$src1),
-                         (add (HexagonCONST32_GP tglobaladdr:$global),
-                              u16ImmPred:$offset)),
-      (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei8 (i32 IntRegs:$src1),
-                         (HexagonCONST32_GP tglobaladdr:$global)),
-      (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memw(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
-                      u16ImmPred:$offset))),
-      (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDw_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
-                           u16ImmPred:$offset))),
-      (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
-      (i64 (LDd_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd.
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
-      (i1 (TFR_PdRs (i32 (LDb_GP tglobaladdr:$global))))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-      (i32 (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDrih_GP tglobaladdr:$global, 0))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memuh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                             u16ImmPred:$offset))),
-      (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDriuh_GP tglobaladdr:$global, 0))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDh_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDuh_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                           u16ImmPred:$offset))),
-      (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-      (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memub(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-      (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDb_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDb_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memub(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDub_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// When the Interprocedural Global Variable optimizer realizes that a
-// certain global variable takes only two constant values, it shrinks the
-// global to a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDb_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDb_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDub_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
 // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+let AddedComplexity = 10 in
 def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
       (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
 
@@ -2694,12 +2282,6 @@ def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
 def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
       (STrib ADDRriS11_2:$addr, (TFRI 1))>;
 
-let AddedComplexity = 100 in
-// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
-// memw(#foo) = r0
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
-      (STb_GP tglobaladdr:$global, (TFRI 1))>,
-      Requires<[NoV4T]>;
 
 // Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
 def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 1d0643d03b29..bbfb363691b0 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -95,164 +95,6 @@ def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
 //===----------------------------------------------------------------------===//
 // ALU32 +
 //===----------------------------------------------------------------------===//
-
-// Shift halfword.
-
-let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
-def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = aslh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = aslh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = aslh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = aslh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = asrh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = asrh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = asrh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = asrh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-// Sign extend.
-
-let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
-def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = sxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = sxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = sxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = sxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = sxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = sxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = sxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = sxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-// Zero exten.
-
-let neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in {
-def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = zxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = zxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = zxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = zxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = zxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = zxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = zxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = zxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
 // Generate frame index addresses.
 let neverHasSideEffects = 1, isReMaterializable = 1,
 isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
@@ -465,7 +307,7 @@ def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
 // addressing mode
 multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -474,7 +316,7 @@ multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
@@ -596,329 +438,6 @@ def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
             Requires<[HasV4T]>;
 }
 
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memd(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memd(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rtt=memd(##global)
-def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memd(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rtt=memd(##global)
-def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memd(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rtt=memd(##global)
-def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memd(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memb(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memb(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memb(##global)
-def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memb(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memb(##global)
-def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memb(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memb(##global)
-def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memb(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memub(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memub(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memub(##global)
-def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memub(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memub(##global)
-def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memub(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memub(##global)
-def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memub(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memh(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memh(##global)
-def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memh(##global)
-def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memh(##global)
-def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memuh(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memuh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memuh(##global)
-def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memuh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memuh(##global)
-def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memuh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memuh(##global)
-def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memuh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memw(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memw(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memw(##global)
-def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memw(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memw(##global)
-def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memw(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memw(##global)
-def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memw(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-
-def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i64 (LDd_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (LDw_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo + 0)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i64 (LDd_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>,
-           Requires<[HasV4T]>;
-
-// When the Interprocedural Global Variable optimizer realizes that a certain
-// global variable takes only two constant values, it shrinks the global to
-// a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memub(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDh_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDh_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDw_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
 // zext i1->i64
 def : Pat <(i64 (zext (i1 PredRegs:$src1))),
       (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
@@ -1079,7 +598,7 @@ def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
 // mode
 multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
                  RC:$src5),
@@ -1090,7 +609,7 @@ multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
@@ -1118,7 +637,7 @@ multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
 // addressing mode.
 multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
                  RC:$src5),
@@ -1129,7 +648,7 @@ multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
@@ -1222,7 +741,7 @@ def STrid_shl_V4 : STInst<(outs),
 //===----------------------------------------------------------------------===//
 multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
                         bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1232,7 +751,7 @@ multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
 }
 
 multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
@@ -1366,233 +885,14 @@ def STriw_shl_V4 : STInst<(outs),
             "memw($src1<<#$src2+#$src3) = $src4",
             [(store (i32 IntRegs:$src4),
                     (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                              u0AlwaysExtPred:$src3))]>,
-            Requires<[HasV4T]>;
-
-// memw(Rx++#s4:2)=Rt
-// memw(Rx++#s4:2:circ(Mu))=Rt
-// memw(Rx++I:circ(Mu))=Rt
-// memw(Rx++Mu)=Rt
-// memw(Rx++Mu:brev)=Rt
-// memw(gp+#u16:2)=Rt
-
-
-// memd(#global)=Rtt
-let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in
-def STd_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, DoubleRegs:$src),
-            "memd(#$global) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memd(##global) = Rtt
-let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STd_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
-            "if ($src1) memd(##$global) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memd(##global) = Rtt
-def STd_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
-            "if (!$src1) memd(##$global) = $src2",
-            []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memd(##global) = Rtt
-def STd_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
-            "if ($src1.new) memd(##$global) = $src2",
-            []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memd(##global) = Rtt
-def STd_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
-            "if (!$src1.new) memd(##$global) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-// memb(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STb_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memb(#$global) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STb_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memb(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-def STb_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memb(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-def STb_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memb(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-def STb_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memb(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-}
-
-// memh(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STh_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memh(#$global) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STh_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memh(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-def STh_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memh(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-def STh_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memh(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-def STh_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memh(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-}
-
-// memw(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STw_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memw(#$global) = $src",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STw_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memw(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-def STw_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memw(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-def STw_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memw(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-def STw_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memw(##$global) = $src2",
-            []>,
-              Requires<[HasV4T]>;
-}
-
-// 64 bit atomic store
-def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
-                            (i64 DoubleRegs:$src1)),
-           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-           Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo)
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
-                  (HexagonCONST32_GP tglobaladdr:$global)),
-           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-           Requires<[HasV4T]>;
-
-// 8 bit atomic store
-def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
-                            (i32 IntRegs:$src1)),
-            (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-              Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
-          (HexagonCONST32_GP tglobaladdr:$global)),
-          (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
-//       to "r0 = 1; memw(#foo) = r0"
-let AddedComplexity = 100 in
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
-          (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>,
-          Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
-                         (HexagonCONST32_GP tglobaladdr:$global)),
-          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
+                              u0AlwaysExtPred:$src3))]>,
+            Requires<[HasV4T]>;
 
-// Map from store(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
-          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
 
 //===----------------------------------------------------------------------===
 // ST -
@@ -1607,7 +907,7 @@ def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
 //
 multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
                             Operand predImmOp, bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1618,7 +918,7 @@ multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
 
 multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
                            bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
@@ -1660,7 +960,7 @@ let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
 // and MEMri operand.
 multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                           bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1670,7 +970,7 @@ multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
 
     // Predicate new
@@ -1722,7 +1022,7 @@ def STrib_shl_nv_V4 : NVInst_V4<(outs),
 
 multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1734,7 +1034,7 @@ multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1772,15 +1072,6 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
 // memb(Rx++I:circ(Mu))=Nt.new
 // memb(Rx++Mu)=Nt.new
 // memb(Rx++Mu:brev)=Nt.new
-
-// memb(#global)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memb(#$global) = $src.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memh(Ru<<#u2+#U6)=Nt.new
 let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
 isNVStore = 1, validSubTargets = HasV4SubT in
@@ -1795,14 +1086,6 @@ def STrih_shl_nv_V4 : NVInst_V4<(outs),
 // memh(Rx++Mu)=Nt.new
 // memh(Rx++Mu:brev)=Nt.new
 
-// memh(#global)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memh(#$global) = $src.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memw(Ru<<#u2+#U6)=Nt.new
 let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
 isNVStore = 1, validSubTargets = HasV4SubT in
@@ -1816,102 +1099,6 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs),
 // memw(Rx++I:circ(Mu))=Nt.new
 // memw(Rx++Mu)=Nt.new
 // memw(Rx++Mu:brev)=Nt.new
-// memw(gp+#u16:2)=Nt.new
-
-let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1,
-validSubTargets = HasV4SubT in
-def STw_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memw(#$global) = $src.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memb(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memb(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memb(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memb(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memh(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memh(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memh(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memh(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memw(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memw(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memw(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memw(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-}
 
 //===----------------------------------------------------------------------===//
 // NV/ST -
@@ -2658,414 +1845,367 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
 // MEMOP: Word, Half, Byte
 //===----------------------------------------------------------------------===//
 
+def MEMOPIMM : SDNodeXForm<imm, [{
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int32_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_HALF : SDNodeXForm<imm, [{
+  // -1 .. -31 represented as 65535..65515
+  // assigning to a short restores our desired signed value.
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int16_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
+  // -1 .. -31 represented as 255..235
+  // assigning to a char restores our desired signed value.
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int8_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm);
+}]>;
+
+def SETMEMIMM : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-31].
+   // As an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def CLRMEMIMM : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-31].
+   // As an SDNode.
+   // we bit negate the value first
+   int32_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-15].
+   // As an SDNode.
+   int16_t imm = N->getSExtValue();
+   return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-15].
+   // As an SDNode.
+   // we bit negate the value first
+   int16_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-7].
+   // As an SDNode.
+   int8_t imm =  N->getSExtValue();
+   return XformMskToBitPosU3Imm(imm);
+}]>;
+
+def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-7].
+   // As an SDNode.
+   // we bit negate the value first
+   int8_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU3Imm(imm);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the register value.
+//===----------------------------------------------------------------------===//
+class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
+                     string memOp, bits<2> memOpBits> :
+      MEMInst_V4<(outs),
+                 (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
+                 opc#"($base+#$offset)"#memOp#"$delta",
+                 []>,
+                 Requires<[HasV4T, UseMEMOP]> {
+
+    bits<5> base;
+    bits<5> delta;
+    bits<32> offset;
+    bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+    let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+                     !if (!eq(opcBits, 0b01), offset{6-1},
+                     !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+    let IClass = 0b0011;
+    let Inst{27-24} = 0b1110;
+    let Inst{22-21} = opcBits;
+    let Inst{20-16} = base;
+    let Inst{13} = 0b0;
+    let Inst{12-7} = offsetBits;
+    let Inst{6-5} = memOpBits;
+    let Inst{4-0} = delta;
+}
+
 //===----------------------------------------------------------------------===//
-// MEMOP: Word
-//
-//  Implemented:
-//     MEMw_ADDi_indexed_V4  : memw(Rs+#u6:2)+=#U5
-//     MEMw_SUBi_indexed_V4  : memw(Rs+#u6:2)-=#U5
-//     MEMw_ADDr_indexed_V4  : memw(Rs+#u6:2)+=Rt
-//     MEMw_SUBr_indexed_V4  : memw(Rs+#u6:2)-=Rt
-//     MEMw_CLRr_indexed_V4  : memw(Rs+#u6:2)&=Rt
-//     MEMw_SETr_indexed_V4  : memw(Rs+#u6:2)|=Rt
-//     MEMw_ADDi_V4          : memw(Rs+#u6:2)+=#U5
-//     MEMw_SUBi_V4          : memw(Rs+#u6:2)-=#U5
-//     MEMw_ADDr_V4          : memw(Rs+#u6:2)+=Rt
-//     MEMw_SUBr_V4          : memw(Rs+#u6:2)-=Rt
-//     MEMw_CLRr_V4          : memw(Rs+#u6:2)&=Rt
-//     MEMw_SETr_V4          : memw(Rs+#u6:2)|=Rt
-//
-//   Not implemented:
-//     MEMw_CLRi_indexed_V4  : memw(Rs+#u6:2)=clrbit(#U5)
-//     MEMw_SETi_indexed_V4  : memw(Rs+#u6:2)=setbit(#U5)
-//     MEMw_CLRi_V4          : memw(Rs+#u6:2)=clrbit(#U5)
-//     MEMw_SETi_V4          : memw(Rs+#u6:2)=setbit(#U5)
+// Template class for MemOp instructions with the immediate value.
 //===----------------------------------------------------------------------===//
+class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
+                     string memOp, bits<2> memOpBits> :
+      MEMInst_V4 <(outs),
+                  (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta),
+                  opc#"($base+#$offset)"#memOp#"#$delta"
+                  #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
+                  []>,
+                  Requires<[HasV4T, UseMEMOP]> {
 
+    bits<5> base;
+    bits<5> delta;
+    bits<32> offset;
+    bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
 
+    let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+                     !if (!eq(opcBits, 0b01), offset{6-1},
+                     !if (!eq(opcBits, 0b10), offset{7-2},0)));
 
-// memw(Rs+#u6:2) += #U5
-let AddedComplexity = 30 in
-def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
-            "memw($base+#$offset) += #$addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+    let IClass = 0b0011;
+    let Inst{27-24} = 0b1111;
+    let Inst{22-21} = opcBits;
+    let Inst{20-16} = base;
+    let Inst{13} = 0b0;
+    let Inst{12-7} = offsetBits;
+    let Inst{6-5} = memOpBits;
+    let Inst{4-0} = delta;
+}
 
-// memw(Rs+#u6:2) -= #U5
-let AddedComplexity = 30 in
-def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
-            "memw($base+#$offset) -= #$subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += Rt
-let AddedComplexity = 30 in
-def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
-            "memw($base+#$offset) += $addend",
-            [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                         (i32 IntRegs:$addend)),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) -= Rt
-let AddedComplexity = 30 in
-def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
-            "memw($base+#$offset) -= $subend",
-            [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                         (i32 IntRegs:$subend)),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) &= Rt
-let AddedComplexity = 30 in
-def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
-            "memw($base+#$offset) &= $andend",
-            [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                         (i32 IntRegs:$andend)),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) |= Rt
-let AddedComplexity = 30 in
-def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
-            "memw($base+#$offset) |= $orend",
-            [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                        (i32 IntRegs:$orend)),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += #U5
-let AddedComplexity = 30 in
-def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$addend),
-            "memw($addr) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+// multiclass to define MemOp instructions with register operand.
+multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
+  def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
+  def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
+  def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
+  def _OR#NAME#_V4  : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
+}
 
-// memw(Rs+#u6:2) -= #U5
-let AddedComplexity = 30 in
-def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$subend),
-            "memw($addr) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += Rt
-let AddedComplexity = 30 in
-def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$addend),
-            "memw($addr) += $addend",
-            [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) -= Rt
-let AddedComplexity = 30 in
-def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$subend),
-            "memw($addr) -= $subend",
-            [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) &= Rt
-let AddedComplexity = 30 in
-def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$andend),
-            "memw($addr) &= $andend",
-            [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) |= Rt
-let AddedComplexity = 30 in
-def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$orend),
-            "memw($addr) |= $orend",
-            [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
+// multiclass to define MemOp instructions with immediate Operand.
+multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
+  def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
+  def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
+  def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>;
+  def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>;
+}
+
+multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
+  defm r : MemOp_rr <opc, opcBits, ImmOp>;
+  defm i : MemOp_ri <opc, opcBits, ImmOp>;
+}
+
+// Define MemOp instructions.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
+validSubTargets =HasV4SubT in {
+  let opExtentBits = 6, accessSize = ByteAccess in
+  defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>;
+
+  let opExtentBits = 7, accessSize = HalfWordAccess in
+  defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>;
+
+  let opExtentBits = 8, accessSize = WordAccess in
+  defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>;
+}
 
 //===----------------------------------------------------------------------===//
-// MEMOP: Halfword
-//
-//  Implemented:
-//     MEMh_ADDi_indexed_V4  : memw(Rs+#u6:2)+=#U5
-//     MEMh_SUBi_indexed_V4  : memw(Rs+#u6:2)-=#U5
-//     MEMh_ADDr_indexed_V4  : memw(Rs+#u6:2)+=Rt
-//     MEMh_SUBr_indexed_V4  : memw(Rs+#u6:2)-=Rt
-//     MEMh_CLRr_indexed_V4  : memw(Rs+#u6:2)&=Rt
-//     MEMh_SETr_indexed_V4  : memw(Rs+#u6:2)|=Rt
-//     MEMh_ADDi_V4          : memw(Rs+#u6:2)+=#U5
-//     MEMh_SUBi_V4          : memw(Rs+#u6:2)-=#U5
-//     MEMh_ADDr_V4          : memw(Rs+#u6:2)+=Rt
-//     MEMh_SUBr_V4          : memw(Rs+#u6:2)-=Rt
-//     MEMh_CLRr_V4          : memw(Rs+#u6:2)&=Rt
-//     MEMh_SETr_V4          : memw(Rs+#u6:2)|=Rt
-//
-//   Not implemented:
-//     MEMh_CLRi_indexed_V4  : memw(Rs+#u6:2)=clrbit(#U5)
-//     MEMh_SETi_indexed_V4  : memw(Rs+#u6:2)=setbit(#U5)
-//     MEMh_CLRi_V4          : memw(Rs+#u6:2)=clrbit(#U5)
-//     MEMh_SETi_V4          : memw(Rs+#u6:2)=setbit(#U5)
+// Multiclass to define 'Def Pats' for ALU operations on the memory
+// Here value used for the ALU operation is an immediate value.
+// mem[bh](Rs+#0) += #U5
+// mem[bh](Rs+#u6) += #U5
 //===----------------------------------------------------------------------===//
 
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+                          InstHexagon MI, SDNode OpNode> {
+  let AddedComplexity = 180 in
+  def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+                    IntRegs:$addr),
+              (MI IntRegs:$addr, #0, u5ImmPred:$addend )>;
 
-// memh(Rs+#u6:1) += #U5
-let AddedComplexity = 30 in
-def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
-            "memh($base+#$offset) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+  let AddedComplexity = 190 in
+  def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
+                     u5ImmPred:$addend),
+             (add IntRegs:$base, ExtPred:$offset)),
+       (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+}
 
-// memh(Rs+#u6:1) -= #U5
-let AddedComplexity = 30 in
-def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
-            "memh($base+#$offset) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += Rt
-let AddedComplexity = 30 in
-def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
-            "memh($base+#$offset) += $addend",
-            [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
-                                                   u6_1ImmPred:$offset)),
-                                 (i32 IntRegs:$addend)),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) -= Rt
-let AddedComplexity = 30 in
-def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
-            "memh($base+#$offset) -= $subend",
-            [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base),
-                                                   u6_1ImmPred:$offset)),
-                                 (i32 IntRegs:$subend)),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) &= Rt
-let AddedComplexity = 30 in
-def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
-            "memh($base+#$offset) += $andend",
-            [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base),
-                                                   u6_1ImmPred:$offset)),
-                                 (i32 IntRegs:$andend)),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) |= Rt
-let AddedComplexity = 30 in
-def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
-            "memh($base+#$offset) |= $orend",
-            [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base),
-                                              u6_1ImmPred:$offset)),
-                             (i32 IntRegs:$orend)),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += #U5
-let AddedComplexity = 30 in
-def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$addend),
-            "memh($addr) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+                          InstHexagon addMI, InstHexagon subMI> {
+  defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
+  defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+}
 
-// memh(Rs+#u6:1) -= #U5
-let AddedComplexity = 30 in
-def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$subend),
-            "memh($addr) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += Rt
-let AddedComplexity = 30 in
-def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$addend),
-            "memh($addr) += $addend",
-            [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
-                                 (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) -= Rt
-let AddedComplexity = 30 in
-def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$subend),
-            "memh($addr) -= $subend",
-            [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
-                                 (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) &= Rt
-let AddedComplexity = 30 in
-def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$andend),
-            "memh($addr) &= $andend",
-            [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
-                                 (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) |= Rt
-let AddedComplexity = 30 in
-def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$orend),
-            "memh($addr) |= $orend",
-            [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
-                                (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+                         MemOPh_ADDi_V4, MemOPh_SUBi_V4>;
+  // Byte
+  defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+                         MemOPb_ADDi_V4, MemOPb_SUBi_V4>;
+}
 
+let Predicates = [HasV4T, UseMEMOP] in {
+  defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm : MemOpi_u5ExtType<extloadi8,  extloadi16>;  // any extend
+
+  // Word
+  defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4,
+                         MemOPw_SUBi_V4>;
+}
 
 //===----------------------------------------------------------------------===//
-// MEMOP: Byte
-//
-//  Implemented:
-//     MEMb_ADDi_indexed_V4  : memb(Rs+#u6:0)+=#U5
-//     MEMb_SUBi_indexed_V4  : memb(Rs+#u6:0)-=#U5
-//     MEMb_ADDr_indexed_V4  : memb(Rs+#u6:0)+=Rt
-//     MEMb_SUBr_indexed_V4  : memb(Rs+#u6:0)-=Rt
-//     MEMb_CLRr_indexed_V4  : memb(Rs+#u6:0)&=Rt
-//     MEMb_SETr_indexed_V4  : memb(Rs+#u6:0)|=Rt
-//     MEMb_ADDi_V4          : memb(Rs+#u6:0)+=#U5
-//     MEMb_SUBi_V4          : memb(Rs+#u6:0)-=#U5
-//     MEMb_ADDr_V4          : memb(Rs+#u6:0)+=Rt
-//     MEMb_SUBr_V4          : memb(Rs+#u6:0)-=Rt
-//     MEMb_CLRr_V4          : memb(Rs+#u6:0)&=Rt
-//     MEMb_SETr_V4          : memb(Rs+#u6:0)|=Rt
-//
-//   Not implemented:
-//     MEMb_CLRi_indexed_V4  : memb(Rs+#u6:0)=clrbit(#U5)
-//     MEMb_SETi_indexed_V4  : memb(Rs+#u6:0)=setbit(#U5)
-//     MEMb_CLRi_V4          : memb(Rs+#u6:0)=clrbit(#U5)
-//     MEMb_SETi_V4          : memb(Rs+#u6:0)=setbit(#U5)
+// multiclass to define 'Def Pats' for ALU operations on the memory.
+// Here value used for the ALU operation is a negative value.
+// mem[bh](Rs+#0) += #m5
+// mem[bh](Rs+#u6) += #m5
 //===----------------------------------------------------------------------===//
 
-// memb(Rs+#u6:0) += #U5
-let AddedComplexity = 30 in
-def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
-            "memb($base+#$offset) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+                          PatLeaf immPred, ComplexPattern addrPred,
+                          SDNodeXForm xformFunc, InstHexagon MI> {
+  let AddedComplexity = 190 in
+  def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend),
+                   IntRegs:$addr),
+             (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>;
 
-// memb(Rs+#u6:0) -= #U5
-let AddedComplexity = 30 in
-def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
-            "memb($base+#$offset) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += Rt
-let AddedComplexity = 30 in
-def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
-            "memb($base+#$offset) += $addend",
-            [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
-                                                 u6_0ImmPred:$offset)),
-                                (i32 IntRegs:$addend)),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) -= Rt
-let AddedComplexity = 30 in
-def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
-            "memb($base+#$offset) -= $subend",
-            [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base),
-                                                 u6_0ImmPred:$offset)),
-                                (i32 IntRegs:$subend)),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) &= Rt
-let AddedComplexity = 30 in
-def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
-            "memb($base+#$offset) += $andend",
-            [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base),
-                                                 u6_0ImmPred:$offset)),
-                                (i32 IntRegs:$andend)),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) |= Rt
-let AddedComplexity = 30 in
-def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
-            "memb($base+#$offset) |= $orend",
-            [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base),
-                                                u6_0ImmPred:$offset)),
-                               (i32 IntRegs:$orend)),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += #U5
-let AddedComplexity = 30 in
-def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$addend),
-            "memb($addr) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+  let AddedComplexity = 195 in
+  def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
+                       immPred:$subend),
+                  (add IntRegs:$base, extPred:$offset)),
+            (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+}
 
-// memb(Rs+#u6:0) -= #U5
-let AddedComplexity = 30 in
-def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$subend),
-            "memb($addr) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += Rt
-let AddedComplexity = 30 in
-def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$addend),
-            "memb($addr) += $addend",
-            [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
-                                (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) -= Rt
-let AddedComplexity = 30 in
-def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$subend),
-            "memb($addr) -= $subend",
-            [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
-                                (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) &= Rt
-let AddedComplexity = 30 in
-def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$andend),
-            "memb($addr) &= $andend",
-            [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
-                                (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) |= Rt
-let AddedComplexity = 30 in
-def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$orend),
-            "memb($addr) |= $orend",
-            [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
-                               (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
+                        ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>;
+  // Byte
+  defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
+                        ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>;
+}
+
+let Predicates = [HasV4T, UseMEMOP] in {
+  defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm : MemOpi_m5ExtType<extloadi8,  extloadi16>;  // any extend
+
+  // Word
+  defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
+                          ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for bit operations on the memory.
+// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5)
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
+                     PatLeaf extPred, ComplexPattern addrPred,
+                     SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> {
+
+  // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
+  let AddedComplexity = 250 in
+  def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+                          immPred:$bitend),
+                  (add IntRegs:$base, extPred:$offset)),
+            (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+
+  // mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+  let AddedComplexity = 225 in
+  def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend),
+                   addrPred:$addr),
+             (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>;
+}
+
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Byte - clrbit
+  defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
+                       ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>;
+  // Byte - setbit
+  defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred,  u6ExtPred,
+                       ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>;
+  // Half Word - clrbit
+  defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
+                       ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>;
+  // Half Word - setbit
+  defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
+                       ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>;
+}
+
+let Predicates = [HasV4T, UseMEMOP] in {
+  // mem[bh](Rs+#0) = [clrbit|setbit](#U5)
+  // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
+  defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+  defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+  defm : MemOpi_bitExtType<extloadi8,  extloadi16>;  // any extend
+
+  // memw(Rs+#0) = [clrbit|setbit](#U5)
+  // memw(Rs+#u6:2) = [clrbit|setbit](#U5)
+  defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2,
+                       CLRMEMIMM, MemOPw_CLRBITi_V4, and>;
+  defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2,
+                       SETMEMIMM, MemOPw_SETBITi_V4, or>;
+}
 
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for ALU operations on the memory
+// where addend is a register.
+// mem[bhw](Rs+#0) [+-&|]= Rt
+// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred,
+                     PatLeaf extPred, InstHexagon MI, SDNode OpNode> {
+  let AddedComplexity = 141 in
+  // mem[bhw](Rs+#0) [+-&|]= Rt
+  def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)),
+                   addrPred:$addr),
+             (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>;
+
+  // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+  let AddedComplexity = 150 in
+  def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+                           (i32 IntRegs:$orend)),
+                   (add IntRegs:$base, extPred:$offset)),
+             (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>;
+}
+
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp,
+                        ComplexPattern addrPred, PatLeaf extPred,
+                        InstHexagon addMI, InstHexagon subMI,
+                        InstHexagon andMI, InstHexagon orMI > {
+
+  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>;
+  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>;
+  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>;
+  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI,  or>;
+}
+
+multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred,
+                       MemOPh_ADDr_V4, MemOPh_SUBr_V4,
+                       MemOPh_ANDr_V4, MemOPh_ORr_V4>;
+  // Byte
+  defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred,
+                       MemOPb_ADDr_V4, MemOPb_SUBr_V4,
+                       MemOPb_ANDr_V4, MemOPb_ORr_V4>;
+}
+
+// Define 'def Pats' for MemOps with register addend.
+let Predicates = [HasV4T, UseMEMOP] in {
+  // Byte, Half Word
+  defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm : MemOPr_ExtType<extloadi8,  extloadi16>;  // any extend
+  // Word
+  defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4,
+                       MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >;
+}
 
 //===----------------------------------------------------------------------===//
 // XTYPE/PRED +
@@ -3629,7 +2769,7 @@ let isReturn = 1, isTerminator = 1,
 
 multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_V4 : STInst2<(outs),
             (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -3639,7 +2779,7 @@ multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
@@ -3665,7 +2805,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
 
 multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -3675,7 +2815,7 @@ multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
@@ -3730,9 +2870,111 @@ def : Pat<(store (i64 DoubleRegs:$src1),
           (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>;
 }
 
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with GP-relative addressing mode.
+// mem[bhwd](#global)=Rt
+// if ([!]Pv[.new]) mem[bhwd](##global) = Rt
+//===----------------------------------------------------------------------===//
+multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+  let BaseOpcode = BaseOp, isPredicable = 1 in
+  def NAME#_V4 : STInst2<(outs),
+          (ins globaladdress:$global, RC:$src),
+          mnemonic#"(#$global) = $src",
+          []>;
+
+  // When GP-relative instructions are predicated, their addressing mode is
+  // changed to absolute and they are always constant extended.
+  let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+  isPredicated = 1 in {
+    defm Pt : ST_Abs_Pred <mnemonic, RC, 0>;
+    defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>;
+  }
+}
+
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
+  let BaseOpcode = BaseOp, isPredicable = 1 in
+  def NAME#_nv_V4 : NVInst_V4<(outs),
+          (ins u0AlwaysExt:$global, RC:$src),
+          mnemonic#"(#$global) = $src.new",
+          []>,
+          Requires<[HasV4T]>;
+
+  // When GP-relative instructions are predicated, their addressing mode is
+  // changed to absolute and they are always constant extended.
+  let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+  isPredicated = 1 in {
+    defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+    defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+  }
+}
+
+let validSubTargets = HasV4SubT,  validSubTargets = HasV4SubT in {
+defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>,
+              ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ;
+defm STb_GP : ST_GP<"memb",  "STb_GP", IntRegs>,
+              ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ;
+defm STh_GP : ST_GP<"memh",  "STh_GP", IntRegs>,
+              ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ;
+defm STw_GP : ST_GP<"memw",  "STw_GP", IntRegs>,
+              ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ;
+}
+
+// 64 bit atomic store
+def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+                            (i64 DoubleRegs:$src1)),
+           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+           Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memd(#foo)
+let AddedComplexity = 100 in
+def : Pat <(store (i64 DoubleRegs:$src1),
+                  (HexagonCONST32_GP tglobaladdr:$global)),
+           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
+
+// 8 bit atomic store
+def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+                            (i32 IntRegs:$src1)),
+            (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+          (HexagonCONST32_GP tglobaladdr:$global)),
+          (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+//       to "r0 = 1; memw(#foo) = r0"
+let AddedComplexity = 100 in
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+          (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>;
+
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+                           (i32 IntRegs:$src1)),
+          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+                         (HexagonCONST32_GP tglobaladdr:$global)),
+          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+                           (i32 IntRegs:$src1)),
+          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass for the load instructions with absolute addressing mode.
+//===----------------------------------------------------------------------===//
 multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
             (ins PredRegs:$src1, globaladdressExt:$absaddr),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -3742,7 +2984,7 @@ multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
@@ -3795,6 +3037,107 @@ let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
 
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with GP-relative addressing mode.
+// Rx=mem[bhwd](##global)
+// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
+//===----------------------------------------------------------------------===//
+let neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+  let BaseOpcode = BaseOp in {
+    let isPredicable = 1 in
+    def NAME#_V4 : LDInst2<(outs RC:$dst),
+            (ins globaladdress:$global),
+            "$dst = "#mnemonic#"(#$global)",
+            []>;
+
+    let isExtended = 1, opExtendable = 2, isPredicated = 1 in {
+      defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+      defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+    }
+  }
+}
+
+defm LDd_GP  : LD_GP<"memd",  "LDd_GP",  DoubleRegs>;
+defm LDb_GP  : LD_GP<"memb",  "LDb_GP",  IntRegs>;
+defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>;
+defm LDh_GP  : LD_GP<"memh",  "LDh_GP",  IntRegs>;
+defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>;
+defm LDw_GP  : LD_GP<"memw",  "LDw_GP",  IntRegs>;
+
+def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo + 0)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>;
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memub(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memuh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+
 // Transfer global address into a register
 let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
 def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h
deleted file mode 100644
index e16636ea488c..000000000000
--- a/lib/Target/Hexagon/HexagonMCInst.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class extends MCInst to allow some VLIW annotation.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef HEXAGONMCINST_H
-#define HEXAGONMCINST_H
-
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCInst.h"
-
-namespace llvm {
-  class HexagonMCInst: public MCInst {
-    // Packet start and end markers
-    unsigned startPacket: 1, endPacket: 1;
-    const MachineInstr *MachineI;
-  public:
-    explicit HexagonMCInst(): MCInst(),
-                              startPacket(0), endPacket(0) {}
-
-    const MachineInstr* getMI() const { return MachineI; }
-
-    void setMI(const MachineInstr *MI) { MachineI = MI; }
-
-    bool isStartPacket() const { return (startPacket); }
-    bool isEndPacket() const { return (endPacket); }
-
-    void setStartPacket(bool yes) { startPacket = yes; }
-    void setEndPacket(bool yes) { endPacket = yes; }
-  };
-}
-
-#endif
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index cd3d2898d02a..5e80e48b01d5 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -220,7 +220,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
       return false;
   }
 
-  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
+  unsigned cmpReg1, cmpOp2;
   cmpReg1 = MI->getOperand(1).getReg();
 
   if (secondReg) {
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index f947dfcdf9a9..d8b4e2fcb368 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -14,25 +14,26 @@
 
 #include "HexagonRegisterInfo.h"
 #include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
 #include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -215,28 +216,41 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true);
         MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
       } else if (TII.isMemOp(&MI)) {
-        unsigned resReg = HEXAGON_RESERVED_REG_1;
-        if (!MFI.hasVarSizedObjects() &&
-            TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
-          MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
-                                                       false, false, true);
-          MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
-        } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::ADD_rr),
-                  resReg).addReg(FrameReg).addReg(resReg);
-          MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,
-                                                       true);
-          MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+        // use the constant extender if the instruction provides it
+        // and we are V4TOps.
+        if (Subtarget.hasV4TOps()) {
+          if (TII.isConstExtended(&MI)) {
+            MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+            MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+            TII.immediateExtend(&MI);
+          } else {
+            llvm_unreachable("Need to implement for memops");
+          }
         } else {
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::ADD_ri),
-                  resReg).addReg(FrameReg).addImm(Offset);
-          MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,
-                                                       true);
-          MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+          // Only V3 and older instructions here.
+          unsigned ResReg = HEXAGON_RESERVED_REG_1;
+          if (!MFI.hasVarSizedObjects() &&
+              TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+            MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
+                                                         false, false, false);
+            MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
+          } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                    TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
+            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                    TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg).
+              addReg(ResReg);
+            MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+                                                         true);
+            MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+          } else {
+            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                    TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg).
+              addImm(Offset);
+            MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+                                                         true);
+            MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+          }
         }
       } else {
         unsigned dstReg = MI.getOperand(0).getReg();
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 4bacb8fa670d..07d5ce1d8ab0 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -29,8 +29,16 @@ EnableV3("enable-hexagon-v3", cl::Hidden,
 static cl::opt<bool>
 EnableMemOps(
     "enable-hexagon-memops",
-    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
-    cl::desc("Generate V4 memop instructions."));
+    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+    cl::desc(
+      "Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool>
+DisableMemOps(
+    "disable-hexagon-memops",
+    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+    cl::desc(
+      "Do not generate V4 MEMOP in code generation for Hexagon target"));
 
 static cl::opt<bool>
 EnableIEEERndNear(
@@ -64,7 +72,10 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUString);
 
-  if (EnableMemOps)
+  // UseMemOps on by default unless disabled explicitly
+  if (DisableMemOps)
+    UseMemOps = false;
+  else if (EnableMemOps)
     UseMemOps = true;
   else
     UseMemOps = false;
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index d9fef3e45502..ce45c626f799 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -35,6 +35,10 @@ opt<bool> DisableHexagonMISched("disable-hexagon-misched",
                                 cl::Hidden, cl::ZeroOrMore, cl::init(false),
                                 cl::desc("Disable Hexagon MI Scheduling"));
 
+static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Hexagon CFG Optimization"));
+
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
 /// library.  In particular, it seems that it is not possible to get
@@ -75,19 +79,20 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
     TSInfo(*this),
     FrameLowering(Subtarget),
     InstrItins(&Subtarget.getInstrItineraryData()) {
-  setMCUseCFI(false);
+    setMCUseCFI(false);
 }
 
 // addPassesForOptimizations - Allow the backend (target) to add Target
 // Independent Optimization passes to the Pass Manager.
 bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
-
-  PM.add(createConstantPropagationPass());
-  PM.add(createLoopSimplifyPass());
-  PM.add(createDeadCodeEliminationPass());
-  PM.add(createConstantPropagationPass());
-  PM.add(createLoopUnrollPass());
-  PM.add(createLoopStrengthReducePass());
+  if (getOptLevel() != CodeGenOpt::None) {
+    PM.add(createConstantPropagationPass());
+    PM.add(createLoopSimplifyPass());
+    PM.add(createDeadCodeEliminationPass());
+    PM.add(createConstantPropagationPass());
+    PM.add(createLoopUnrollPass());
+    PM.add(createLoopStrengthReducePass());
+  }
   return true;
 }
 
@@ -121,38 +126,45 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 bool HexagonPassConfig::addInstSelector() {
-  addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+
   addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
-  addPass(createHexagonPeephole());
+
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonPeephole());
+
   return false;
 }
 
 
 bool HexagonPassConfig::addPreRegAlloc() {
-  if (!DisableHardwareLoops) {
+  if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createHexagonHardwareLoops());
-  }
   return false;
 }
 
 bool HexagonPassConfig::addPostRegAlloc() {
-  addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+  if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
   return true;
 }
 
 
 bool HexagonPassConfig::addPreSched2() {
-  addPass(&IfConverterID);
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&IfConverterID);
   return true;
 }
 
 bool HexagonPassConfig::addPreEmitPass() {
 
-  if (!DisableHardwareLoops) {
+  if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createHexagonFixupHwLoops());
-  }
 
-  addPass(createHexagonNewValueJump());
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonNewValueJump());
 
   // Expand Spill code for predicate registers.
   addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
@@ -161,7 +173,8 @@ bool HexagonPassConfig::addPreEmitPass() {
   addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
 
   // Create Packets.
-  addPass(createHexagonPacketizer());
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonPacketizer());
 
   return false;
 }
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 866beb16882f..c0d86da1c05e 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -149,7 +149,6 @@ namespace {
     bool canReserveResourcesForConstExt(MachineInstr *MI);
     void reserveResourcesForConstExt(MachineInstr* MI);
     bool isNewValueInst(MachineInstr* MI);
-    bool isDotNewInst(MachineInstr* MI);
   };
 }
 
@@ -2154,172 +2153,6 @@ static bool GetPredicateSense(MachineInstr* MI,
   return false;
 }
 
-bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  if (QII->isNewValueInst(MI))
-    return true;
-
-  switch (MI->getOpcode()) {
-  case Hexagon::TFR_cdnNotPt:
-  case Hexagon::TFR_cdnPt:
-  case Hexagon::TFRI_cdnNotPt:
-  case Hexagon::TFRI_cdnPt:
-  case Hexagon::LDrid_cdnPt :
-  case Hexagon::LDrid_cdnNotPt :
-  case Hexagon::LDrid_indexed_cdnPt :
-  case Hexagon::LDrid_indexed_cdnNotPt :
-  case Hexagon::POST_LDrid_cdnPt_V4 :
-  case Hexagon::POST_LDrid_cdnNotPt_V4 :
-  case Hexagon::LDriw_cdnPt :
-  case Hexagon::LDriw_cdnNotPt :
-  case Hexagon::LDriw_indexed_cdnPt :
-  case Hexagon::LDriw_indexed_cdnNotPt :
-  case Hexagon::POST_LDriw_cdnPt_V4 :
-  case Hexagon::POST_LDriw_cdnNotPt_V4 :
-  case Hexagon::LDrih_cdnPt :
-  case Hexagon::LDrih_cdnNotPt :
-  case Hexagon::LDrih_indexed_cdnPt :
-  case Hexagon::LDrih_indexed_cdnNotPt :
-  case Hexagon::POST_LDrih_cdnPt_V4 :
-  case Hexagon::POST_LDrih_cdnNotPt_V4 :
-  case Hexagon::LDrib_cdnPt :
-  case Hexagon::LDrib_cdnNotPt :
-  case Hexagon::LDrib_indexed_cdnPt :
-  case Hexagon::LDrib_indexed_cdnNotPt :
-  case Hexagon::POST_LDrib_cdnPt_V4 :
-  case Hexagon::POST_LDrib_cdnNotPt_V4 :
-  case Hexagon::LDriuh_cdnPt :
-  case Hexagon::LDriuh_cdnNotPt :
-  case Hexagon::LDriuh_indexed_cdnPt :
-  case Hexagon::LDriuh_indexed_cdnNotPt :
-  case Hexagon::POST_LDriuh_cdnPt_V4 :
-  case Hexagon::POST_LDriuh_cdnNotPt_V4 :
-  case Hexagon::LDriub_cdnPt :
-  case Hexagon::LDriub_cdnNotPt :
-  case Hexagon::LDriub_indexed_cdnPt :
-  case Hexagon::LDriub_indexed_cdnNotPt :
-  case Hexagon::POST_LDriub_cdnPt_V4 :
-  case Hexagon::POST_LDriub_cdnNotPt_V4 :
-
-  case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
-
-// Coditional add
-  case Hexagon::ADD_ri_cdnPt:
-  case Hexagon::ADD_ri_cdnNotPt:
-  case Hexagon::ADD_rr_cdnPt:
-  case Hexagon::ADD_rr_cdnNotPt:
-
-  // Conditional logical operations
-  case Hexagon::XOR_rr_cdnPt :
-  case Hexagon::XOR_rr_cdnNotPt :
-  case Hexagon::AND_rr_cdnPt :
-  case Hexagon::AND_rr_cdnNotPt :
-  case Hexagon::OR_rr_cdnPt :
-  case Hexagon::OR_rr_cdnNotPt :
-
-  // Conditonal subtract
-  case Hexagon::SUB_rr_cdnPt :
-  case Hexagon::SUB_rr_cdnNotPt :
-
-  // Conditional combine
-  case Hexagon::COMBINE_rr_cdnPt :
-  case Hexagon::COMBINE_rr_cdnNotPt :
-
-  // Conditional shift operations
-  case Hexagon::ASLH_cdnPt_V4:
-  case Hexagon::ASLH_cdnNotPt_V4:
-  case Hexagon::ASRH_cdnPt_V4:
-  case Hexagon::ASRH_cdnNotPt_V4:
-  case Hexagon::SXTB_cdnPt_V4:
-  case Hexagon::SXTB_cdnNotPt_V4:
-  case Hexagon::SXTH_cdnPt_V4:
-  case Hexagon::SXTH_cdnNotPt_V4:
-  case Hexagon::ZXTB_cdnPt_V4:
-  case Hexagon::ZXTB_cdnNotPt_V4:
-  case Hexagon::ZXTH_cdnPt_V4:
-  case Hexagon::ZXTH_cdnNotPt_V4:
-
-  // Conditional stores
-  case Hexagon::STrib_imm_cdnPt_V4 :
-  case Hexagon::STrib_imm_cdnNotPt_V4 :
-  case Hexagon::STrib_cdnPt_V4 :
-  case Hexagon::STrib_cdnNotPt_V4 :
-  case Hexagon::STrib_indexed_cdnPt_V4 :
-  case Hexagon::STrib_indexed_cdnNotPt_V4 :
-  case Hexagon::POST_STbri_cdnPt_V4 :
-  case Hexagon::POST_STbri_cdnNotPt_V4 :
-  case Hexagon::STrib_indexed_shl_cdnPt_V4 :
-  case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
-
-  // Store doubleword conditionally
-  case Hexagon::STrid_indexed_cdnPt_V4 :
-  case Hexagon::STrid_indexed_cdnNotPt_V4 :
-  case Hexagon::STrid_indexed_shl_cdnPt_V4 :
-  case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::POST_STdri_cdnPt_V4 :
-  case Hexagon::POST_STdri_cdnNotPt_V4 :
-
-  // Store halfword conditionally
-  case Hexagon::STrih_cdnPt_V4 :
-  case Hexagon::STrih_cdnNotPt_V4 :
-  case Hexagon::STrih_indexed_cdnPt_V4 :
-  case Hexagon::STrih_indexed_cdnNotPt_V4 :
-  case Hexagon::STrih_imm_cdnPt_V4 :
-  case Hexagon::STrih_imm_cdnNotPt_V4 :
-  case Hexagon::STrih_indexed_shl_cdnPt_V4 :
-  case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::POST_SThri_cdnPt_V4 :
-  case Hexagon::POST_SThri_cdnNotPt_V4 :
-
-  // Store word conditionally
-  case Hexagon::STriw_cdnPt_V4 :
-  case Hexagon::STriw_cdnNotPt_V4 :
-  case Hexagon::STriw_indexed_cdnPt_V4 :
-  case Hexagon::STriw_indexed_cdnNotPt_V4 :
-  case Hexagon::STriw_imm_cdnPt_V4 :
-  case Hexagon::STriw_imm_cdnNotPt_V4 :
-  case Hexagon::STriw_indexed_shl_cdnPt_V4 :
-  case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::POST_STwri_cdnPt_V4 :
-  case Hexagon::POST_STwri_cdnNotPt_V4 :
-
-  case Hexagon::LDd_GP_cdnPt_V4:
-  case Hexagon::LDd_GP_cdnNotPt_V4:
-  case Hexagon::LDb_GP_cdnPt_V4:
-  case Hexagon::LDb_GP_cdnNotPt_V4:
-  case Hexagon::LDub_GP_cdnPt_V4:
-  case Hexagon::LDub_GP_cdnNotPt_V4:
-  case Hexagon::LDh_GP_cdnPt_V4:
-  case Hexagon::LDh_GP_cdnNotPt_V4:
-  case Hexagon::LDuh_GP_cdnPt_V4:
-  case Hexagon::LDuh_GP_cdnNotPt_V4:
-  case Hexagon::LDw_GP_cdnPt_V4:
-  case Hexagon::LDw_GP_cdnNotPt_V4:
-
-  case Hexagon::STd_GP_cdnPt_V4:
-  case Hexagon::STd_GP_cdnNotPt_V4:
-  case Hexagon::STb_GP_cdnPt_V4:
-  case Hexagon::STb_GP_cdnNotPt_V4:
-  case Hexagon::STh_GP_cdnPt_V4:
-  case Hexagon::STh_GP_cdnNotPt_V4:
-  case Hexagon::STw_GP_cdnPt_V4:
-  case Hexagon::STw_GP_cdnNotPt_V4:
-    return true;
-  }
-  return false;
-}
-
 static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
                                                const HexagonInstrInfo *QII) {
   assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
@@ -2490,7 +2323,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
     // sense, i.e, either both should be negated or both should be none negated.
 
     if (( predRegNumDst != predRegNumSrc) ||
-          isDotNewInst(PacketMI) != isDotNewInst(MI)  ||
+          QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI)  ||
           GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) {
       return false;
     }
@@ -2600,8 +2433,9 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
                               MachineBasicBlock::iterator &MII,
                               const TargetRegisterClass* RC )
 {
-  // already a dot new instruction
-  if (isDotNewInst(MI) && !IsNewifyStore(MI))
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  // Already a dot new instruction.
+  if (QII->isDotNewInst(MI) && !IsNewifyStore(MI))
     return false;
 
   if (!isNewifiable(MI))
@@ -2616,7 +2450,6 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
   else {
     // Create a dot new machine instruction to see if resources can be
     // allocated. If not, bail out now.
-    const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
     int NewOpcode = GetDotNewOp(MI->getOpcode());
     const MCInstrDesc &desc = QII->get(NewOpcode);
     DebugLoc dl;
@@ -2759,7 +2592,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
   // !p0 is not complimentary to p0.new
   return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
           (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) &&
-          (isDotNewInst(MI1) == isDotNewInst(MI2)));
+          (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
 }
 
 // initPacketizerState - Initialize packetizer flags
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 86f75d1c2d7a..3deb8d1deb42 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -31,6 +31,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
   AscizDirective = "\t.string\t";
   WeakRefDirective = "\t.weak\t";
 
+  SupportsDebugInformation = true;
   UsesELFSectionDirectiveForBSS  = true;
   ExceptionsType = ExceptionHandling::DwarfCFI;
 }
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 78ad24debb1b..34e33fdcfcc0 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -237,7 +237,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
           // Use load to get GOT target
           SDValue Ops[] = { Callee, GPReg, Chain };
           SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
-                                 MVT::i32, MVT::Other, Ops, 3), 0);
+                                 MVT::i32, MVT::Other, Ops), 0);
           Chain = Load.getValue(1);
 
           // Call target must be on T9
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index edfd421d8532..d31efa86b34d 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -188,7 +188,12 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
   
   // If this global has a name, handle it simply.
   if (GV->hasName()) {
-    getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+    StringRef Name = GV->getName();
+    getNameWithPrefix(OutName, Name, PrefixTy);
+    // No need to do anything else if the global has the special "do not mangle"
+    // flag in the name.
+    if (Name[0] == 1)
+      return;
   } else {
     // Get the ID for the global, assigning a new one if we haven't got one
     // already.
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ebe12c97662f..befc573826e0 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -63,7 +63,6 @@ class MipsAsmParser : public MCTargetAsmParser {
   MCAsmParser &Parser;
   MipsAssemblerOptions Options;
 
-
 #define GET_ASSEMBLER_HEADER
 #include "MipsGenAsmMatcher.inc"
 
@@ -101,6 +100,9 @@ class MipsAsmParser : public MCTargetAsmParser {
   MipsAsmParser::OperandMatchResultTy
   parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
+  bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                         unsigned RegisterClass);
+
   bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
                     StringRef Mnemonic);
 
@@ -119,11 +121,17 @@ class MipsAsmParser : public MCTargetAsmParser {
                             SmallVectorImpl<MCInst> &Instructions);
   void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
+  void expandMemInst(MCInst &Inst, SMLoc IDLoc,
+                     SmallVectorImpl<MCInst> &Instructions,
+                     bool isLoad,bool isImmOpnd);
   bool reportParseError(StringRef ErrorMsg);
 
-  bool parseMemOffset(const MCExpr *&Res);
+  bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
   bool parseRelocOperand(const MCExpr *&Res);
 
+  const MCExpr* evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
+
+  bool isEvaluated(const MCExpr *Expr);
   bool parseDirectiveSet();
 
   bool parseSetAtDirective();
@@ -133,6 +141,8 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseSetReorderDirective();
   bool parseSetNoReorderDirective();
 
+  bool parseSetAssignment();
+
   bool parseDirectiveWord(unsigned Size, SMLoc L);
 
   MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
@@ -163,9 +173,12 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   bool requestsDoubleOperand(StringRef Mnemonic);
 
-  unsigned getReg(int RC,int RegNo);
+  unsigned getReg(int RC, int RegNo);
 
   int getATReg();
+
+  bool processInstruction(MCInst &Inst, SMLoc IDLoc,
+                        SmallVectorImpl<MCInst> &Instructions);
 public:
   MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
     : MCTargetAsmParser(), STI(sti), Parser(parser) {
@@ -258,7 +271,7 @@ class MipsOperand : public MCParsedAsmOperand {
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCExpr *Expr = getImm();
-    addExpr(Inst,Expr);
+    addExpr(Inst, Expr);
   }
 
   void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -267,7 +280,7 @@ class MipsOperand : public MCParsedAsmOperand {
     Inst.addOperand(MCOperand::CreateReg(getMemBase()));
 
     const MCExpr *Expr = getMemOff();
-    addExpr(Inst,Expr);
+    addExpr(Inst, Expr);
   }
 
   bool isReg() const { return Kind == k_Register; }
@@ -380,42 +393,99 @@ class MipsOperand : public MCParsedAsmOperand {
   }
 
   /// getStartLoc - Get the location of the first token of this operand.
-  SMLoc getStartLoc() const { return StartLoc; }
+  SMLoc getStartLoc() const {
+    return StartLoc;
+  }
   /// getEndLoc - Get the location of the last token of this operand.
-  SMLoc getEndLoc() const { return EndLoc; }
+  SMLoc getEndLoc() const {
+    return EndLoc;
+  }
 
   virtual void print(raw_ostream &OS) const {
     llvm_unreachable("unimplemented!");
   }
-};
+}; // class MipsOperand
+}  // namespace
+
+namespace llvm {
+extern const MCInstrDesc MipsInsts[];
+}
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
+  return MipsInsts[Opcode];
+}
+
+bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+                                       SmallVectorImpl<MCInst> &Instructions) {
+  const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+  Inst.setLoc(IDLoc);
+  if (MCID.mayLoad() || MCID.mayStore()) {
+    // Check the offset of memory operand, if it is a symbol
+    // reference or immediate we may have to expand instructions.
+    for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
+      const MCOperandInfo &OpInfo = MCID.OpInfo[i];
+      if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY)
+          || (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+        MCOperand &Op = Inst.getOperand(i);
+        if (Op.isImm()) {
+          int MemOffset = Op.getImm();
+          if (MemOffset < -32768 || MemOffset > 32767) {
+            // Offset can't exceed 16bit value.
+            expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), true);
+            return false;
+          }
+        } else if (Op.isExpr()) {
+          const MCExpr *Expr = Op.getExpr();
+          if (Expr->getKind() == MCExpr::SymbolRef) {
+            const MCSymbolRefExpr *SR =
+                static_cast<const MCSymbolRefExpr*>(Expr);
+            if (SR->getKind() == MCSymbolRefExpr::VK_None) {
+              // Expand symbol.
+              expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
+              return false;
+            }
+          } else if (!isEvaluated(Expr)) {
+            expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
+            return false;
+          }
+        }
+      }
+    } // for
+  } // if load/store
+
+  if (needsExpansion(Inst))
+    expandInstruction(Inst, IDLoc, Instructions);
+  else
+    Instructions.push_back(Inst);
+
+  return false;
 }
 
 bool MipsAsmParser::needsExpansion(MCInst &Inst) {
 
-  switch(Inst.getOpcode()) {
-    case Mips::LoadImm32Reg:
-    case Mips::LoadAddr32Imm:
-    case Mips::LoadAddr32Reg:
-      return true;
-    default:
-      return false;
+  switch (Inst.getOpcode()) {
+  case Mips::LoadImm32Reg:
+  case Mips::LoadAddr32Imm:
+  case Mips::LoadAddr32Reg:
+    return true;
+  default:
+    return false;
   }
 }
 
 void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
-                        SmallVectorImpl<MCInst> &Instructions){
-  switch(Inst.getOpcode()) {
-    case Mips::LoadImm32Reg:
-      return expandLoadImm(Inst, IDLoc, Instructions);
-    case Mips::LoadAddr32Imm:
-      return expandLoadAddressImm(Inst,IDLoc,Instructions);
-    case Mips::LoadAddr32Reg:
-      return expandLoadAddressReg(Inst,IDLoc,Instructions);
-    }
+                                       SmallVectorImpl<MCInst> &Instructions) {
+  switch (Inst.getOpcode()) {
+  case Mips::LoadImm32Reg:
+    return expandLoadImm(Inst, IDLoc, Instructions);
+  case Mips::LoadAddr32Imm:
+    return expandLoadAddressImm(Inst, IDLoc, Instructions);
+  case Mips::LoadAddr32Reg:
+    return expandLoadAddressReg(Inst, IDLoc, Instructions);
+  }
 }
 
 void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
-                                  SmallVectorImpl<MCInst> &Instructions){
+                                  SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(1);
   assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -424,26 +494,24 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
 
   int ImmValue = ImmOp.getImm();
   tmpInst.setLoc(IDLoc);
-  if ( 0 <= ImmValue && ImmValue <= 65535) {
-    // for 0 <= j <= 65535.
+  if (0 <= ImmValue && ImmValue <= 65535) {
+    // For 0 <= j <= 65535.
     // li d,j => ori d,$zero,j
     tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
-  } else if ( ImmValue < 0 && ImmValue >= -32768) {
-    // for -32768 <= j < 0.
+  } else if (ImmValue < 0 && ImmValue >= -32768) {
+    // For -32768 <= j < 0.
     // li d,j => addiu d,$zero,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    // for any other value of j that is representable as a 32-bit integer.
+    // For any other value of j that is representable as a 32-bit integer.
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
@@ -461,7 +529,7 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
 }
 
 void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
-                                         SmallVectorImpl<MCInst> &Instructions){
+                                       SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(2);
   assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -470,19 +538,19 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
   const MCOperand &DstRegOp = Inst.getOperand(0);
   assert(DstRegOp.isReg() && "expected register operand kind");
   int ImmValue = ImmOp.getImm();
-  if ( -32768 <= ImmValue && ImmValue <= 65535) {
-    //for -32768 <= j <= 65535.
-    //la d,j(s) => addiu d,s,j
+  if (-32768 <= ImmValue && ImmValue <= 65535) {
+    // For -32768 <= j <= 65535.
+    // la d,j(s) => addiu d,s,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    //for any other value of j that is representable as a 32-bit integer.
-    //la d,j(s) => lui d,hi16(j)
-    //             ori d,d,lo16(j)
-    //             addu d,d,s
+    // For any other value of j that is representable as a 32-bit integer.
+    // la d,j(s) => lui d,hi16(j)
+    //              ori d,d,lo16(j)
+    //              addu d,d,s
     tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
@@ -503,26 +571,25 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
 }
 
 void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
-                                         SmallVectorImpl<MCInst> &Instructions){
+                                       SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(1);
   assert(ImmOp.isImm() && "expected immediate operand kind");
   const MCOperand &RegOp = Inst.getOperand(0);
   assert(RegOp.isReg() && "expected register operand kind");
   int ImmValue = ImmOp.getImm();
-  if ( -32768 <= ImmValue && ImmValue <= 65535) {
-    //for -32768 <= j <= 65535.
-    //la d,j => addiu d,$zero,j
+  if (-32768 <= ImmValue && ImmValue <= 65535) {
+    // For -32768 <= j <= 65535.
+    // la d,j => addiu d,$zero,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    //for any other value of j that is representable as a 32-bit integer.
-    //la d,j => lui d,hi16(j)
-    //          ori d,d,lo16(j)
+    // For any other value of j that is representable as a 32-bit integer.
+    // la d,j => lui d,hi16(j)
+    //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
@@ -536,28 +603,105 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
   }
 }
 
+void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
+          SmallVectorImpl<MCInst> &Instructions, bool isLoad, bool isImmOpnd) {
+  const MCSymbolRefExpr *SR;
+  MCInst TempInst;
+  unsigned ImmOffset, HiOffset, LoOffset;
+  const MCExpr *ExprOffset;
+  unsigned TmpRegNum;
+  unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID
+                             : Mips::CPURegsRegClassID, getATReg());
+  // 1st operand is either the source or destination register.
+  assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+  unsigned RegOpNum = Inst.getOperand(0).getReg();
+  // 2nd operand is the base register.
+  assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+  unsigned BaseRegNum = Inst.getOperand(1).getReg();
+  // 3rd operand is either an immediate or expression.
+  if (isImmOpnd) {
+    assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
+    ImmOffset = Inst.getOperand(2).getImm();
+    LoOffset = ImmOffset & 0x0000ffff;
+    HiOffset = (ImmOffset & 0xffff0000) >> 16;
+    // If msb of LoOffset is 1(negative number) we must increment HiOffset.
+    if (LoOffset & 0x8000)
+      HiOffset++;
+  } else
+    ExprOffset = Inst.getOperand(2).getExpr();
+  // All instructions will have the same location.
+  TempInst.setLoc(IDLoc);
+  // 1st instruction in expansion is LUi. For load instruction we can use
+  // the dst register as a temporary if base and dst are different,
+  // but for stores we must use $at.
+  TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum;
+  TempInst.setOpcode(Mips::LUi);
+  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  if (isImmOpnd)
+    TempInst.addOperand(MCOperand::CreateImm(HiOffset));
+  else {
+    if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+      SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+      const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
+          SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
+          getContext());
+      TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+    } else {
+      const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
+      TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+    }
+  }
+  // Add the instruction to the list.
+  Instructions.push_back(TempInst);
+  // Prepare TempInst for next instruction.
+  TempInst.clear();
+  // Add temp register to base.
+  TempInst.setOpcode(Mips::ADDu);
+  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+  Instructions.push_back(TempInst);
+  TempInst.clear();
+  // And finaly, create original instruction with low part
+  // of offset and new base.
+  TempInst.setOpcode(Inst.getOpcode());
+  TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
+  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  if (isImmOpnd)
+    TempInst.addOperand(MCOperand::CreateImm(LoOffset));
+  else {
+    if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+      const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
+          SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO,
+          getContext());
+      TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+    } else {
+      const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
+      TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+    }
+  }
+  Instructions.push_back(TempInst);
+  TempInst.clear();
+}
+
 bool MipsAsmParser::
 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                         MCStreamer &Out, unsigned &ErrorInfo,
                         bool MatchingInlineAsm) {
   MCInst Inst;
+  SmallVector<MCInst, 8> Instructions;
   unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
                                               MatchingInlineAsm);
 
   switch (MatchResult) {
-  default: break;
+  default:
+    break;
   case Match_Success: {
-    if (needsExpansion(Inst)) {
-      SmallVector<MCInst, 4> Instructions;
-      expandInstruction(Inst, IDLoc, Instructions);
-      for(unsigned i =0; i < Instructions.size(); i++){
-        Out.EmitInstruction(Instructions[i]);
-      }
-    } else {
-        Inst.setLoc(IDLoc);
-        Out.EmitInstruction(Inst);
-      }
+    if (processInstruction(Inst, IDLoc, Instructions))
+      return true;
+    for (unsigned i = 0; i < Instructions.size(); i++)
+      Out.EmitInstruction(Instructions[i]);
     return false;
   }
   case Match_MissingFeature:
@@ -569,8 +713,9 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
-      ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc();
-      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+      ErrorLoc = ((MipsOperand*) Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
     }
 
     return Error(ErrorLoc, "invalid operand for instruction");
@@ -621,10 +766,10 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
     .Case("t9",  25)
     .Default(-1);
 
-  // Although SGI documentation just cut out t0-t3 for n32/n64,
+  // Although SGI documentation just cuts out t0-t3 for n32/n64,
   // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
   // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
-  if (isMips64() && 8 <= CC  && CC <= 11)
+  if (isMips64() && 8 <= CC && CC <= 11)
     CC += 4;
 
   if (CC == -1 && isMips64())
@@ -640,19 +785,23 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
 
   return CC;
 }
+
 int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
 
+  if (Name.equals("fcc0"))
+    return Mips::FCC0;
+
   int CC;
   CC = matchCPURegisterName(Name);
   if (CC != -1)
-    return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID:
-                               Mips::CPURegsRegClassID);
+    return matchRegisterByNumber(CC, is64BitReg ? Mips::CPU64RegsRegClassID
+                                                : Mips::CPURegsRegClassID);
 
   if (Name[0] == 'f') {
     StringRef NumString = Name.substr(1);
     unsigned IntVal;
-    if( NumString.getAsInteger(10, IntVal))
-      return -1; // not integer
+    if (NumString.getAsInteger(10, IntVal))
+      return -1; // This is not an integer.
     if (IntVal > 31)
       return -1;
 
@@ -661,18 +810,19 @@ int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
     if (Format == FP_FORMAT_S || Format == FP_FORMAT_W)
       return getReg(Mips::FGR32RegClassID, IntVal);
     if (Format == FP_FORMAT_D) {
-      if(isFP64()) {
+      if (isFP64()) {
         return getReg(Mips::FGR64RegClassID, IntVal);
       }
-      // only even numbers available as register pairs
-      if (( IntVal > 31) || (IntVal%2 !=  0))
+      // Only even numbers available as register pairs.
+      if ((IntVal > 31) || (IntVal % 2 != 0))
         return -1;
-      return getReg(Mips::AFGR64RegClassID, IntVal/2);
+      return getReg(Mips::AFGR64RegClassID, IntVal / 2);
     }
   }
 
   return -1;
 }
+
 void MipsAsmParser::setDefaultFpFormat() {
 
   if (isMips64() || isFP64())
@@ -692,6 +842,7 @@ bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){
 
   return IsDouble;
 }
+
 void MipsAsmParser::setFpFormat(StringRef Format) {
 
   FpFormat = StringSwitch<FpFormatTy>(Format.lower())
@@ -714,7 +865,7 @@ int MipsAsmParser::getATReg() {
   return Options.getATRegNum();
 }
 
-unsigned MipsAsmParser::getReg(int RC,int RegNo) {
+unsigned MipsAsmParser::getReg(int RC, int RegNo) {
   return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
 }
 
@@ -735,14 +886,12 @@ int MipsAsmParser::tryParseRegister(bool is64BitReg) {
     RegNum = matchRegisterName(lowerCase, is64BitReg);
   } else if (Tok.is(AsmToken::Integer))
     RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
-                                   is64BitReg ? Mips::CPU64RegsRegClassID
-                                              : Mips::CPURegsRegClassID);
+        is64BitReg ? Mips::CPU64RegsRegClassID : Mips::CPURegsRegClassID);
   return RegNum;
 }
 
-bool MipsAsmParser::
-  tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                          bool is64BitReg){
+bool MipsAsmParser::tryParseRegisterOperand(
+             SmallVectorImpl<MCParsedAsmOperand*> &Operands, bool is64BitReg) {
 
   SMLoc S = Parser.getTok().getLoc();
   int RegNo = -1;
@@ -752,7 +901,7 @@ bool MipsAsmParser::
     return true;
 
   Operands.push_back(MipsOperand::CreateReg(RegNo, S,
-      Parser.getTok().getLoc()));
+                                            Parser.getTok().getLoc()));
   Parser.Lex(); // Eat register token.
   return false;
 }
@@ -775,19 +924,19 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     Error(Parser.getTok().getLoc(), "unexpected token in operand");
     return true;
   case AsmToken::Dollar: {
-    // parse register
+    // Parse the register.
     SMLoc S = Parser.getTok().getLoc();
     Parser.Lex(); // Eat dollar token.
-    // parse register operand
+    // Parse the register operand.
     if (!tryParseRegisterOperand(Operands, isMips64())) {
       if (getLexer().is(AsmToken::LParen)) {
-        // check if it is indexed addressing operand
+        // Check if it is indexed addressing operand.
         Operands.push_back(MipsOperand::CreateToken("(", S));
-        Parser.Lex(); // eat parenthesis
+        Parser.Lex(); // Eat the parenthesis.
         if (getLexer().isNot(AsmToken::Dollar))
           return true;
 
-        Parser.Lex(); // eat dollar
+        Parser.Lex(); // Eat the dollar
         if (tryParseRegisterOperand(Operands, isMips64()))
           return true;
 
@@ -800,7 +949,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
       }
       return false;
     }
-    // maybe it is a symbol reference
+    // Maybe it is a symbol reference.
     StringRef Identifier;
     if (Parser.parseIdentifier(Identifier))
       return true;
@@ -809,7 +958,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
 
     MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
 
-    // Otherwise create a symbol ref.
+    // Otherwise create a symbol reference.
     const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
                                                 getContext());
 
@@ -817,12 +966,17 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     return false;
   }
   case AsmToken::Identifier:
+    // Look for the existing symbol, we should check if
+    // we need to assigne the propper RegisterKind.
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_None))
+      return false;
+    // Else drop to expression parsing.
   case AsmToken::LParen:
   case AsmToken::Minus:
   case AsmToken::Plus:
   case AsmToken::Integer:
   case AsmToken::String: {
-     // quoted label names
+    // Quoted label names.
     const MCExpr *IdVal;
     SMLoc S = Parser.getTok().getLoc();
     if (getParser().parseExpression(IdVal))
@@ -832,9 +986,9 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     return false;
   }
   case AsmToken::Percent: {
-    // it is a symbol reference or constant expression
+    // It is a symbol reference or constant expression.
     const MCExpr *IdVal;
-    SMLoc S = Parser.getTok().getLoc(); // start location of the operand
+    SMLoc S = Parser.getTok().getLoc(); // Start location of the operand.
     if (parseRelocOperand(IdVal))
       return true;
 
@@ -847,129 +1001,200 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
   return true;
 }
 
-bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+const MCExpr* MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
+                                               StringRef RelocStr) {
+  const MCExpr *Res;
+  // Check the type of the expression.
+  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Expr)) {
+    // It's a constant, evaluate lo or hi value.
+    if (RelocStr == "lo") {
+      short Val = MCE->getValue();
+      Res = MCConstantExpr::Create(Val, getContext());
+    } else if (RelocStr == "hi") {
+      int Val = MCE->getValue();
+      int LoSign = Val & 0x8000;
+      Val = (Val & 0xffff0000) >> 16;
+      // Lower part is treated as a signed int, so if it is negative
+      // we must add 1 to the hi part to compensate.
+      if (LoSign)
+        Val++;
+      Res = MCConstantExpr::Create(Val, getContext());
+    } else {
+      llvm_unreachable("Invalid RelocStr value");
+    }
+    return Res;
+  }
+
+  if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) {
+    // It's a symbol, create a symbolic expression from the symbol.
+    StringRef Symbol = MSRE->getSymbol().getName();
+    MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr);
+    Res = MCSymbolRefExpr::Create(Symbol, VK, getContext());
+    return Res;
+  }
+
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+    const MCExpr *LExp = evaluateRelocExpr(BE->getLHS(), RelocStr);
+    const MCExpr *RExp = evaluateRelocExpr(BE->getRHS(), RelocStr);
+    Res = MCBinaryExpr::Create(BE->getOpcode(), LExp, RExp, getContext());
+    return Res;
+  }
 
-  Parser.Lex(); // eat % token
-  const AsmToken &Tok = Parser.getTok(); // get next token, operation
+  if (const MCUnaryExpr *UN = dyn_cast<MCUnaryExpr>(Expr)) {
+    const MCExpr *UnExp = evaluateRelocExpr(UN->getSubExpr(), RelocStr);
+    Res = MCUnaryExpr::Create(UN->getOpcode(), UnExp, getContext());
+    return Res;
+  }
+  // Just return the original expression.
+  return Expr;
+}
+
+bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
+
+  switch (Expr->getKind()) {
+  case MCExpr::Constant:
+    return true;
+  case MCExpr::SymbolRef:
+    return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
+  case MCExpr::Binary:
+    if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+      if (!isEvaluated(BE->getLHS()))
+        return false;
+      return isEvaluated(BE->getRHS());
+    }
+  case MCExpr::Unary:
+    return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
+  default:
+    return false;
+  }
+  return false;
+}
+
+bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+  Parser.Lex(); // Eat the % token.
+  const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
   if (Tok.isNot(AsmToken::Identifier))
     return true;
 
   std::string Str = Tok.getIdentifier().str();
 
-  Parser.Lex(); // eat identifier
-  // now make expression from the rest of the operand
+  Parser.Lex(); // Eat the identifier.
+  // Now make an expression from the rest of the operand.
   const MCExpr *IdVal;
   SMLoc EndLoc;
 
   if (getLexer().getKind() == AsmToken::LParen) {
     while (1) {
-      Parser.Lex(); // eat '(' token
+      Parser.Lex(); // Eat the '(' token.
       if (getLexer().getKind() == AsmToken::Percent) {
-        Parser.Lex(); // eat % token
+        Parser.Lex(); // Eat the % token.
         const AsmToken &nextTok = Parser.getTok();
         if (nextTok.isNot(AsmToken::Identifier))
           return true;
         Str += "(%";
         Str += nextTok.getIdentifier();
-        Parser.Lex(); // eat identifier
+        Parser.Lex(); // Eat the identifier.
         if (getLexer().getKind() != AsmToken::LParen)
           return true;
       } else
         break;
     }
-    if (getParser().parseParenExpression(IdVal,EndLoc))
+    if (getParser().parseParenExpression(IdVal, EndLoc))
       return true;
 
     while (getLexer().getKind() == AsmToken::RParen)
-      Parser.Lex(); // eat ')' token
+      Parser.Lex(); // Eat the ')' token.
 
   } else
-    return true; // parenthesis must follow reloc operand
-
-  // Check the type of the expression
-  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
-    // it's a constant, evaluate lo or hi value
-    int Val = MCE->getValue();
-    if (Str == "lo") {
-      Val = Val & 0xffff;
-    } else if (Str == "hi") {
-      int LoSign = Val & 0x8000;
-      Val = (Val & 0xffff0000) >> 16;
-      //lower part is treated as signed int, so if it is negative
-      //we must add 1 to hi part to compensate
-      if (LoSign)
-        Val++;
-    }
-    Res = MCConstantExpr::Create(Val, getContext());
-    return false;
-  }
+    return true; // Parenthesis must follow the relocation operand.
 
-  if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
-    // it's a symbol, create symbolic expression from symbol
-    StringRef Symbol = MSRE->getSymbol().getName();
-    MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
-    Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
-    return false;
-  }
-  return true;
+  Res = evaluateRelocExpr(IdVal, Str);
+  return false;
 }
 
 bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                   SMLoc &EndLoc) {
-
   StartLoc = Parser.getTok().getLoc();
   RegNo = tryParseRegister(isMips64());
   EndLoc = Parser.getTok().getLoc();
-  return (RegNo == (unsigned)-1);
+  return (RegNo == (unsigned) -1);
 }
 
-bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
-
+bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
   SMLoc S;
+  bool Result = true;
+
+  while (getLexer().getKind() == AsmToken::LParen)
+    Parser.Lex();
 
-  switch(getLexer().getKind()) {
+  switch (getLexer().getKind()) {
   default:
     return true;
+  case AsmToken::Identifier:
+  case AsmToken::LParen:
   case AsmToken::Integer:
   case AsmToken::Minus:
   case AsmToken::Plus:
-    return (getParser().parseExpression(Res));
+    if (isParenExpr)
+      Result = getParser().parseParenExpression(Res, S);
+    else
+      Result = (getParser().parseExpression(Res));
+    while (getLexer().getKind() == AsmToken::RParen)
+      Parser.Lex();
+    break;
   case AsmToken::Percent:
-    return parseRelocOperand(Res);
-  case AsmToken::LParen:
-    return false;  // it's probably assuming 0
+    Result = parseRelocOperand(Res);
   }
-  return true;
+  return Result;
 }
 
 MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
-               SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
+                               SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
 
   const MCExpr *IdVal = 0;
   SMLoc S;
-  // first operand is the offset
+  bool isParenExpr = false;
+  // First operand is the offset.
   S = Parser.getTok().getLoc();
 
-  if (parseMemOffset(IdVal))
-    return MatchOperand_ParseFail;
+  if (getLexer().getKind() == AsmToken::LParen) {
+    Parser.Lex();
+    isParenExpr = true;
+  }
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
-  if (Tok.isNot(AsmToken::LParen)) {
-    MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
-    if (Mnemonic->getToken() == "la") {
-      SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
-      Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
-      return MatchOperand_Success;
+  if (getLexer().getKind() != AsmToken::Dollar) {
+    if (parseMemOffset(IdVal, isParenExpr))
+      return MatchOperand_ParseFail;
+
+    const AsmToken &Tok = Parser.getTok(); // Get the next token.
+    if (Tok.isNot(AsmToken::LParen)) {
+      MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
+      if (Mnemonic->getToken() == "la") {
+        SMLoc E = SMLoc::getFromPointer(
+            Parser.getTok().getLoc().getPointer() - 1);
+        Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+        return MatchOperand_Success;
+      }
+      if (Tok.is(AsmToken::EndOfStatement)) {
+        SMLoc E = SMLoc::getFromPointer(
+            Parser.getTok().getLoc().getPointer() - 1);
+
+        // Zero register assumed, add a memory operand with ZERO as its base.
+        Operands.push_back(MipsOperand::CreateMem(isMips64() ? Mips::ZERO_64
+                                                             : Mips::ZERO,
+                           IdVal, S, E));
+        return MatchOperand_Success;
+      }
+      Error(Parser.getTok().getLoc(), "'(' expected");
+      return MatchOperand_ParseFail;
     }
-    Error(Parser.getTok().getLoc(), "'(' expected");
-    return MatchOperand_ParseFail;
-  }
 
-  Parser.Lex(); // Eat '(' token.
+    Parser.Lex(); // Eat the '(' token.
+  }
 
-  const AsmToken &Tok1 = Parser.getTok(); // get next token
+  const AsmToken &Tok1 = Parser.getTok(); // Get next token
   if (Tok1.is(AsmToken::Dollar)) {
-    Parser.Lex(); // Eat '$' token.
+    Parser.Lex(); // Eat the '$' token.
     if (tryParseRegisterOperand(Operands, isMips64())) {
       Error(Parser.getTok().getLoc(), "unexpected token in operand");
       return MatchOperand_ParseFail;
@@ -980,7 +1205,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
     return MatchOperand_ParseFail;
   }
 
-  const AsmToken &Tok2 = Parser.getTok(); // get next token
+  const AsmToken &Tok2 = Parser.getTok(); // Get next token.
   if (Tok2.isNot(AsmToken::RParen)) {
     Error(Parser.getTok().getLoc(), "')' expected");
     return MatchOperand_ParseFail;
@@ -988,17 +1213,26 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
 
   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
 
-  Parser.Lex(); // Eat ')' token.
+  Parser.Lex(); // Eat the ')' token.
 
   if (IdVal == 0)
     IdVal = MCConstantExpr::Create(0, getContext());
 
-  // now replace register operand with the mem operand
+  // Replace the register operand with the memory operand.
   MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
   int RegNo = op->getReg();
-  // remove register from operands
+  // Remove the register from the operands.
   Operands.pop_back();
-  // and add memory operand
+  // Add the memory operand.
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) {
+    int64_t Imm;
+    if (IdVal->EvaluateAsAbsolute(Imm))
+      IdVal = MCConstantExpr::Create(Imm, getContext());
+    else if (BE->getLHS()->getKind() != MCExpr::SymbolRef)
+      IdVal = MCBinaryExpr::Create(BE->getOpcode(), BE->getRHS(), BE->getLHS(),
+                                   getContext());
+  }
+
   Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E));
   delete op;
   return MatchOperand_Success;
@@ -1009,13 +1243,18 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   if (!isMips64())
     return MatchOperand_NoMatch;
-  // if the first token is not '$' we have an error
+  if (getLexer().getKind() == AsmToken::Identifier) {
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_CPU64Regs))
+      return MatchOperand_Success;
+    return MatchOperand_NoMatch;
+  }
+  // If the first token is not '$', we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
 
   Parser.Lex(); // Eat $
-  if(!tryParseRegisterOperand(Operands, true)) {
-    // set the proper register kind
+  if (!tryParseRegisterOperand(Operands, true)) {
+    // Set the proper register kind.
     MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
     op->setRegKind(MipsOperand::Kind_CPU64Regs);
     return MatchOperand_Success;
@@ -1023,16 +1262,59 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_NoMatch;
 }
 
+bool MipsAsmParser::searchSymbolAlias(
+    SmallVectorImpl<MCParsedAsmOperand*> &Operands, unsigned RegisterKind) {
+
+  MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
+  if (Sym) {
+    SMLoc S = Parser.getTok().getLoc();
+    const MCExpr *Expr;
+    if (Sym->isVariable())
+      Expr = Sym->getVariableValue();
+    else
+      return false;
+    if (Expr->getKind() == MCExpr::SymbolRef) {
+      const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+      const StringRef DefSymbol = Ref->getSymbol().getName();
+      if (DefSymbol.startswith("$")) {
+        // Lookup for the register with the corresponding name.
+        int RegNum = matchRegisterName(DefSymbol.substr(1), isMips64());
+        if (RegNum > -1) {
+          Parser.Lex();
+          MipsOperand *op = MipsOperand::CreateReg(RegNum, S,
+                                                   Parser.getTok().getLoc());
+          op->setRegKind((MipsOperand::RegisterKind) RegisterKind);
+          Operands.push_back(op);
+          return true;
+        }
+      }
+    } else if (Expr->getKind() == MCExpr::Constant) {
+      Parser.Lex();
+      const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
+      MipsOperand *op = MipsOperand::CreateImm(Const, S,
+          Parser.getTok().getLoc());
+      Operands.push_back(op);
+      return true;
+    }
+  }
+  return false;
+}
+
 MipsAsmParser::OperandMatchResultTy
 MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
-  // if the first token is not '$' we have an error
+  if (getLexer().getKind() == AsmToken::Identifier) {
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_CPURegs))
+      return MatchOperand_Success;
+    return MatchOperand_NoMatch;
+  }
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
 
   Parser.Lex(); // Eat $
-  if(!tryParseRegisterOperand(Operands, false)) {
-    // set the propper register kind
+  if (!tryParseRegisterOperand(Operands, false)) {
+    // Set the proper register kind.
     MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
     op->setRegKind(MipsOperand::Kind_CPURegs);
     return MatchOperand_Success;
@@ -1046,87 +1328,88 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (isMips64())
     return MatchOperand_NoMatch;
 
-  // if the first token is not '$' we have error
+  // If the first token is not '$' we have error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
-  Parser.Lex(); // Eat $
+  Parser.Lex(); // Eat the '$'.
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get the next token.
   if (Tok.isNot(AsmToken::Integer))
     return MatchOperand_NoMatch;
 
   unsigned RegNum = Tok.getIntVal();
-  // at the moment only hwreg29 is supported
+  // At the moment only hwreg29 is supported.
   if (RegNum != 29)
     return MatchOperand_ParseFail;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
-        Parser.getTok().getLoc());
+      Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_HWRegs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
 MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseHW64Regs(
+    SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   if (!isMips64())
     return MatchOperand_NoMatch;
-    //if the first token is not '$' we have error
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
   Parser.Lex(); // Eat $
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get the next token.
   if (Tok.isNot(AsmToken::Integer))
     return MatchOperand_NoMatch;
 
   unsigned RegNum = Tok.getIntVal();
-  // at the moment only hwreg29 is supported
+  // At the moment only hwreg29 is supported.
   if (RegNum != 29)
     return MatchOperand_ParseFail;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
-        Parser.getTok().getLoc());
+                                           Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_HW64Regs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
 MipsAsmParser::OperandMatchResultTy
 MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   unsigned RegNum;
-  //if the first token is not '$' we have error
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
-  Parser.Lex(); // Eat $
+  Parser.Lex(); // Eat the '$'
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get next token.
   if (Tok.is(AsmToken::Integer)) {
     RegNum = Tok.getIntVal();
-    // at the moment only fcc0 is supported
+    // At the moment only fcc0 is supported.
     if (RegNum != 0)
       return MatchOperand_ParseFail;
   } else if (Tok.is(AsmToken::Identifier)) {
-    // at the moment only fcc0 is supported
+    // At the moment only fcc0 is supported.
     if (Tok.getIdentifier() != "fcc0")
       return MatchOperand_ParseFail;
   } else
     return MatchOperand_NoMatch;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
-        Parser.getTok().getLoc());
+                                           Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_CCRRegs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
@@ -1158,23 +1441,23 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
 
 static int ConvertCcString(StringRef CondString) {
   int CC = StringSwitch<unsigned>(CondString)
-      .Case(".f",    0)
-      .Case(".un",   1)
-      .Case(".eq",   2)
-      .Case(".ueq",  3)
-      .Case(".olt",  4)
-      .Case(".ult",  5)
-      .Case(".ole",  6)
-      .Case(".ule",  7)
-      .Case(".sf",   8)
-      .Case(".ngle", 9)
-      .Case(".seq",  10)
-      .Case(".ngl",  11)
-      .Case(".lt",   12)
-      .Case(".nge",  13)
-      .Case(".le",   14)
-      .Case(".ngt",  15)
-      .Default(-1);
+    .Case(".f",    0)
+    .Case(".un",   1)
+    .Case(".eq",   2)
+    .Case(".ueq",  3)
+    .Case(".olt",  4)
+    .Case(".ult",  5)
+    .Case(".ole",  6)
+    .Case(".ule",  7)
+    .Case(".sf",   8)
+    .Case(".ngle", 9)
+    .Case(".seq",  10)
+    .Case(".ngl",  11)
+    .Case(".lt",   12)
+    .Case(".nge",  13)
+    .Case(".le",   14)
+    .Case(".ngt",  15)
+    .Default(-1);
 
   return CC;
 }
@@ -1182,16 +1465,16 @@ static int ConvertCcString(StringRef CondString) {
 bool MipsAsmParser::
 parseMathOperation(StringRef Name, SMLoc NameLoc,
                    SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  // split the format
+  // Split the format.
   size_t Start = Name.find('.'), Next = Name.rfind('.');
   StringRef Format1 = Name.slice(Start, Next);
-  // and add the first format to the operands
+  // Add the first format to the operands.
   Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc));
-  // now for the second format
+  // Now for the second format.
   StringRef Format2 = Name.slice(Next, StringRef::npos);
   Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc));
 
-  // set the format for the first register
+  // Set the format for the first register.
   setFpFormat(Format1);
 
   // Read the remaining operands.
@@ -1207,11 +1490,10 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
       SMLoc Loc = getLexer().getLoc();
       Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
-
     }
-    Parser.Lex();  // Eat the comma.
+    Parser.Lex(); // Eat the comma.
 
-    //set the format for the first register
+    // Set the format for the first register
     setFpFormat(Format2);
 
     // Parse and remember the operand.
@@ -1228,7 +1510,7 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
     return Error(Loc, "unexpected token in argument list");
   }
 
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
@@ -1236,13 +1518,12 @@ bool MipsAsmParser::
 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   StringRef Mnemonic;
-  // floating point instructions: should register be treated as double?
+  // Floating point instructions: Should the register be treated as a double?
   if (requestsDoubleOperand(Name)) {
     setFpFormat(FP_FORMAT_D);
-  Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
-  Mnemonic = Name;
-  }
-  else {
+    Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+    Mnemonic = Name;
+  } else {
     setDefaultFpFormat();
     // Create the leading tokens for the mnemonic, split by '.' characters.
     size_t Start = 0, Next = Name.find('.');
@@ -1251,30 +1532,30 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
     Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
 
     if (Next != StringRef::npos) {
-      // there is a format token in mnemonic
-      // StringRef Rest = Name.slice(Next, StringRef::npos);
-      size_t Dot = Name.find('.', Next+1);
+      // There is a format token in mnemonic.
+      size_t Dot = Name.find('.', Next + 1);
       StringRef Format = Name.slice(Next, Dot);
-      if (Dot == StringRef::npos) //only one '.' in a string, it's a format
+      if (Dot == StringRef::npos) // Only one '.' in a string, it's a format.
         Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
       else {
-        if (Name.startswith("c.")){
-          // floating point compare, add '.' and immediate represent for cc
+        if (Name.startswith("c.")) {
+          // Floating point compare, add '.' and immediate represent for cc.
           Operands.push_back(MipsOperand::CreateToken(".", NameLoc));
           int Cc = ConvertCcString(Format);
           if (Cc == -1) {
             return Error(NameLoc, "Invalid conditional code");
           }
           SMLoc E = SMLoc::getFromPointer(
-              Parser.getTok().getLoc().getPointer() -1 );
-          Operands.push_back(MipsOperand::CreateImm(
-              MCConstantExpr::Create(Cc, getContext()), NameLoc, E));
+              Parser.getTok().getLoc().getPointer() - 1);
+          Operands.push_back(
+              MipsOperand::CreateImm(MCConstantExpr::Create(Cc, getContext()),
+                                     NameLoc, E));
         } else {
           // trunc, ceil, floor ...
           return parseMathOperation(Name, NameLoc, Operands);
         }
 
-        // the rest is a format
+        // The rest is a format.
         Format = Name.slice(Dot, StringRef::npos);
         Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
       }
@@ -1292,8 +1573,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
       return Error(Loc, "unexpected token in argument list");
     }
 
-    while (getLexer().is(AsmToken::Comma) ) {
-      Parser.Lex();  // Eat the comma.
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex(); // Eat the comma.
 
       // Parse and remember the operand.
       if (ParseOperand(Operands, Name)) {
@@ -1310,48 +1591,47 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
     return Error(Loc, "unexpected token in argument list");
   }
 
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
-   SMLoc Loc = getLexer().getLoc();
-   Parser.eatToEndOfStatement();
-   return Error(Loc, ErrorMsg);
+  SMLoc Loc = getLexer().getLoc();
+  Parser.eatToEndOfStatement();
+  return Error(Loc, ErrorMsg);
 }
 
 bool MipsAsmParser::parseSetNoAtDirective() {
-  // line should look like:
-  //  .set noat
-  // set at reg to 0
+  // Line should look like: ".set noat".
+  // set at reg to 0.
   Options.setATReg(0);
   // eat noat
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
+
 bool MipsAsmParser::parseSetAtDirective() {
-  // line can be
-  //  .set at - defaults to $1
+  // Line can be .set at - defaults to $1
   // or .set at=$reg
   int AtRegNo;
   getParser().Lex();
   if (getLexer().is(AsmToken::EndOfStatement)) {
     Options.setATReg(1);
-    Parser.Lex(); // Consume the EndOfStatement
+    Parser.Lex(); // Consume the EndOfStatement.
     return false;
   } else if (getLexer().is(AsmToken::Equal)) {
-    getParser().Lex(); //eat '='
+    getParser().Lex(); // Eat the '='.
     if (getLexer().isNot(AsmToken::Dollar)) {
       reportParseError("unexpected token in statement");
       return false;
     }
-    Parser.Lex(); // eat '$'
+    Parser.Lex(); // Eat the '$'.
     const AsmToken &Reg = Parser.getTok();
     if (Reg.is(AsmToken::Identifier)) {
       AtRegNo = matchCPURegisterName(Reg.getIdentifier());
@@ -1362,7 +1642,7 @@ bool MipsAsmParser::parseSetAtDirective() {
       return false;
     }
 
-    if ( AtRegNo < 1 || AtRegNo > 31) {
+    if (AtRegNo < 1 || AtRegNo > 31) {
       reportParseError("unexpected token in statement");
       return false;
     }
@@ -1371,13 +1651,13 @@ bool MipsAsmParser::parseSetAtDirective() {
       reportParseError("unexpected token in statement");
       return false;
     }
-    getParser().Lex(); //eat reg
+    getParser().Lex(); // Eat the register.
 
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       reportParseError("unexpected token in statement");
       return false;
-     }
-    Parser.Lex(); // Consume the EndOfStatement
+    }
+    Parser.Lex(); // Consume the EndOfStatement.
     return false;
   } else {
     reportParseError("unexpected token in statement");
@@ -1387,43 +1667,43 @@ bool MipsAsmParser::parseSetAtDirective() {
 
 bool MipsAsmParser::parseSetReorderDirective() {
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
   Options.setReorder();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::parseSetNoReorderDirective() {
-    Parser.Lex();
-    // if this is not the end of the statement, report error
-    if (getLexer().isNot(AsmToken::EndOfStatement)) {
-      reportParseError("unexpected token in statement");
-      return false;
-    }
-    Options.setNoreorder();
-    Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex();
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token in statement");
     return false;
+  }
+  Options.setNoreorder();
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
 }
 
 bool MipsAsmParser::parseSetMacroDirective() {
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
   Options.setMacro();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::parseSetNoMacroDirective() {
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("`noreorder' must be set before `nomacro'");
     return false;
@@ -1433,12 +1713,37 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
     return false;
   }
   Options.setNomacro();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool MipsAsmParser::parseSetAssignment() {
+  StringRef Name;
+  const MCExpr *Value;
+
+  if (Parser.parseIdentifier(Name))
+    reportParseError("expected identifier after .set");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return reportParseError("unexpected token in .set directive");
+  Lex(); // Eat comma
+
+  if (Parser.parseExpression(Value))
+    reportParseError("expected valid expression after comma");
+
+  // Check if the Name already exists as a symbol.
+  MCSymbol *Sym = getContext().LookupSymbol(Name);
+  if (Sym)
+    return reportParseError("symbol already defined");
+  Sym = getContext().GetOrCreateSymbol(Name);
+  Sym->setVariableValue(Value);
+
   return false;
 }
+
 bool MipsAsmParser::parseDirectiveSet() {
 
-  // get next token
+  // Get the next token.
   const AsmToken &Tok = Parser.getTok();
 
   if (Tok.getString() == "noat") {
@@ -1454,13 +1759,17 @@ bool MipsAsmParser::parseDirectiveSet() {
   } else if (Tok.getString() == "nomacro") {
     return parseSetNoMacroDirective();
   } else if (Tok.getString() == "nomips16") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   } else if (Tok.getString() == "nomicromips") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
+  } else {
+    // It is just an identifier, look for an assignment.
+    parseSetAssignment();
+    return false;
   }
 
   return true;
@@ -1495,20 +1804,20 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
 
   StringRef IDVal = DirectiveID.getString();
 
-  if ( IDVal == ".ent") {
-    // ignore this directive for now
+  if (IDVal == ".ent") {
+    // Ignore this directive for now.
     Parser.Lex();
     return false;
   }
 
   if (IDVal == ".end") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.Lex();
     return false;
   }
 
   if (IDVal == ".frame") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
@@ -1518,19 +1827,19 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
   }
 
   if (IDVal == ".fmask") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
 
   if (IDVal == ".mask") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
 
   if (IDVal == ".gpword") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index cf8bb189e475..78a9f70c6681 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -32,6 +32,8 @@ add_llvm_target(MipsCodeGen
   MipsLongBranch.cpp
   MipsMCInstLower.cpp
   MipsMachineFunction.cpp
+  MipsModuleISelDAGToDAG.cpp
+  MipsOs16.cpp
   MipsRegisterInfo.cpp
   MipsSEFrameLowering.cpp
   MipsSEInstrInfo.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 025a783f93df..0dba33a2767a 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -138,10 +138,20 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
                                                 uint64_t Address,
                                                 const void *Decoder);
 
-static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
-                                              unsigned RegNo,
-                                              uint64_t Address,
-                                              const void *Decoder);
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+
+static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+
+static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
 
 static DecodeStatus DecodeBranchTarget(MCInst &Inst,
                                        unsigned Offset,
@@ -484,14 +494,38 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
-                                              unsigned RegNo,
-                                              uint64_t Address,
-                                              const void *Decoder) {
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  if (RegNo >= 4)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  if (RegNo >= 4)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = getReg(Decoder, Mips::HIRegsDSPRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
   if (RegNo >= 4)
     return MCDisassembler::Fail;
 
-  unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo);
+  unsigned Reg = getReg(Decoder, Mips::LORegsDSPRegClassID, RegNo);
   Inst.addOperand(MCOperand::CreateReg(Reg));
   return MCDisassembler::Success;
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 96f93a078987..9460731c1914 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -27,6 +27,9 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define GET_INSTRMAP_INFO
+#include "MipsGenInstrInfo.inc"
+
 using namespace llvm;
 
 namespace {
@@ -35,12 +38,13 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
   void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
   const MCInstrInfo &MCII;
   MCContext &Ctx;
+  const MCSubtargetInfo &STI;
   bool IsLittleEndian;
 
 public:
   MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
                     const MCSubtargetInfo &sti, bool IsLittle) :
-    MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
+    MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {}
 
   ~MipsMCCodeEmitter() {}
 
@@ -88,6 +92,9 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
   unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
                               SmallVectorImpl<MCFixup> &Fixups) const;
 
+  unsigned
+  getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const;
+
 }; // class MipsMCCodeEmitter
 }  // namespace
 
@@ -141,6 +148,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary)
     llvm_unreachable("unimplemented opcode in EncodeInstruction()");
 
+  if (STI.getFeatureBits() & Mips::FeatureMicroMips) {
+    int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips);
+    if (NewOpcode != -1) {
+      Opcode = NewOpcode;
+      TmpInst.setOpcode (NewOpcode);
+      Binary = getBinaryCodeForInstr(TmpInst, Fixups);
+    }
+  }
+
   const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
 
   // Get byte count of instruction
@@ -160,8 +176,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
 
   const MCOperand &MO = MI.getOperand(OpNo);
 
-  // If the destination is an immediate, we have nothing to do.
-  if (MO.isImm()) return MO.getImm();
+  // If the destination is an immediate, divide by 4.
+  if (MO.isImm()) return MO.getImm() >> 2;
+
   assert(MO.isExpr() &&
          "getBranchTargetOpValue expects only expressions or immediates");
 
@@ -179,8 +196,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
                      SmallVectorImpl<MCFixup> &Fixups) const {
 
   const MCOperand &MO = MI.getOperand(OpNo);
-  // If the destination is an immediate, we have nothing to do.
-  if (MO.isImm()) return MO.getImm();
+  // If the destination is an immediate, divide by 4.
+  if (MO.isImm()) return MO.getImm()>>2;
+
   assert(MO.isExpr() &&
          "getJumpTargetOpValue expects only expressions or an immediate");
 
@@ -190,35 +208,24 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
   return 0;
 }
 
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
 unsigned MipsMCCodeEmitter::
-getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-                  SmallVectorImpl<MCFixup> &Fixups) const {
-  if (MO.isReg()) {
-    unsigned Reg = MO.getReg();
-    unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
-    return RegNo;
-  } else if (MO.isImm()) {
-    return static_cast<unsigned>(MO.getImm());
-  } else if (MO.isFPImm()) {
-    return static_cast<unsigned>(APFloat(MO.getFPImm())
-        .bitcastToAPInt().getHiBits(32).getLimitedValue());
-  }
+getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
+  int64_t Res;
 
-  // MO must be an Expr.
-  assert(MO.isExpr());
+  if (Expr->EvaluateAsAbsolute(Res))
+    return Res;
 
-  const MCExpr *Expr = MO.getExpr();
   MCExpr::ExprKind Kind = Expr->getKind();
+  if (Kind == MCExpr::Constant) {
+    return cast<MCConstantExpr>(Expr)->getValue();
+  }
 
   if (Kind == MCExpr::Binary) {
-    Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS();
-    Kind = Expr->getKind();
+    unsigned Res = getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups);
+    Res += getExprOpValue(cast<MCBinaryExpr>(Expr)->getRHS(), Fixups);
+    return Res;
   }
-
-  assert (Kind == MCExpr::SymbolRef);
-
+  if (Kind == MCExpr::SymbolRef) {
   Mips::Fixups FixupKind = Mips::Fixups(0);
 
   switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
@@ -298,12 +305,32 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
     break;
   } // switch
 
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
-
-  // All of the information is in the fixup.
+    Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind)));
+    return 0;
+  }
   return 0;
 }
 
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
+    return RegNo;
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  } else if (MO.isFPImm()) {
+    return static_cast<unsigned>(APFloat(MO.getFPImm())
+        .bitcastToAPInt().getHiBits(32).getLimitedValue());
+  }
+  // MO must be an Expr.
+  assert(MO.isExpr());
+  return getExprOpValue(MO.getExpr(),Fixups);
+}
+
 /// getMemEncoding - Return binary encoding of memory related operand.
 /// If the offset operand requires relocation, record the relocation.
 unsigned
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
new file mode 100644
index 000000000000..adbcef516542
--- /dev/null
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -0,0 +1,70 @@
+class MMArch {
+  string Arch = "micromips";
+  list<dag> Pattern = [];
+}
+
+class ADD_FM_MM<bits<6> op, bits<10> funct> : MMArch {
+  bits<5> rt;
+  bits<5> rs;
+  bits<5> rd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10}    = 0;
+  let Inst{9-0}   = funct;
+}
+
+class ADDI_FM_MM<bits<6> op> : MMArch {
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-0}  = imm16;
+}
+
+class SLTI_FM_MM<bits<6> op> : MMArch {
+  bits<5> rt;
+  bits<5> rs;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class LUI_FM_MM : MMArch {
+  bits<5> rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x10;
+  let Inst{25-21} = 0xd;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class MULT_FM_MM<bits<10> funct> : MMArch {
+  bits<5>  rs;
+  bits<5>  rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x00;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-6}  = funct;
+  let Inst{5-0}   = 0x3c;
+}
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
new file mode 100644
index 000000000000..2f70da7ccd02
--- /dev/null
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -0,0 +1,39 @@
+let isCodeGenOnly = 1 in {
+  /// Arithmetic Instructions (ALU Immediate)
+  def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd>,
+                 ADDI_FM_MM<0xc>;
+  def ADDi_MM  : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>,
+                 ADDI_FM_MM<0x4>;
+  def SLTi_MM  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>,
+                 SLTI_FM_MM<0x24>;
+  def SLTiu_MM : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>,
+                 SLTI_FM_MM<0x2c>;
+  def ANDi_MM  : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+                 ADDI_FM_MM<0x34>;
+  def ORi_MM   : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+                 ADDI_FM_MM<0x14>;
+  def XORi_MM  : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+                 ADDI_FM_MM<0x1c>;
+  def LUi_MM   : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM_MM;
+
+  /// Arithmetic Instructions (3-Operand, R-Type)
+  def ADDu_MM  : MMRel, ArithLogicR<"addu", CPURegsOpnd>, ADD_FM_MM<0, 0x150>;
+  def SUBu_MM  : MMRel, ArithLogicR<"subu", CPURegsOpnd>, ADD_FM_MM<0, 0x1d0>;
+  def MUL_MM   : MMRel, ArithLogicR<"mul", CPURegsOpnd>, ADD_FM_MM<0, 0x210>;
+  def ADD_MM   : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM_MM<0, 0x110>;
+  def SUB_MM   : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM_MM<0, 0x190>;
+  def SLT_MM   : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM_MM<0, 0x350>;
+  def SLTu_MM  : MMRel, SetCC_R<"sltu", setult, CPURegs>,
+                 ADD_FM_MM<0, 0x390>;
+  def AND_MM   : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>,
+                 ADD_FM_MM<0, 0x250>;
+  def OR_MM    : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>,
+                 ADD_FM_MM<0, 0x290>;
+  def XOR_MM   : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>,
+                 ADD_FM_MM<0, 0x310>;
+  def NOR_MM   : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM_MM<0, 0x2d0>;
+  def MULT_MM  : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>,
+                 MULT_FM_MM<0x22c>;
+  def MULTu_MM : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>,
+                 MULT_FM_MM<0x26c>;
+}
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 25f4ffb9292a..54fdb7871466 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -20,7 +20,7 @@ namespace llvm {
 class Mips16FrameLowering : public MipsFrameLowering {
 public:
   explicit Mips16FrameLowering(const MipsSubtarget &STI)
-    : MipsFrameLowering(STI) {}
+    : MipsFrameLowering(STI, 8) {}
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 00b3449300c5..c1c635cb9f4f 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -35,6 +35,11 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
+bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  if (!Subtarget.inMips16Mode())
+    return false;
+  return MipsDAGToDAGISel::runOnMachineFunction(MF);
+}
 /// Select multiply instructions.
 std::pair<SDNode*, SDNode*>
 Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
@@ -267,7 +272,7 @@ std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
     EVT VT = LHS.getValueType();
 
     unsigned Sltu_op = Mips::SltuRxRyRz16;
-    SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+    SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops);
     unsigned Addu_op = Mips::AdduRxRyRz16;
     SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
                                               SDValue(Carry,0), RHS);
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index baa85877d957..f05f9b766df8 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -28,6 +28,8 @@ class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
 
   SDValue getMips16SPAliasReg();
 
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
   void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
 
   virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index fd3cc8f1902d..17dd2c07967a 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -98,10 +98,10 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 }
 
 void Mips16InstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                    unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC,
-                    const TargetRegisterInfo *TRI) const {
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                unsigned SrcReg, bool isKill, int FI,
+                const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+                int64_t Offset) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -110,14 +110,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = Mips::SwRxSpImmX16;
   assert(Opc && "Register class not handled!");
   BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
-    .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 }
 
 void Mips16InstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                     unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC,
-                     const TargetRegisterInfo *TRI) const {
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                 unsigned DestReg, int FI, const TargetRegisterClass *RC,
+                 const TargetRegisterInfo *TRI, int64_t Offset) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -126,7 +125,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
     Opc = Mips::LwRxSpImmX16;
   assert(Opc && "Register class not handled!");
-  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
     .addMemOperand(MMO);
 }
 
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 1cb1dfe1966f..a77a9043bb17 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -48,17 +48,19 @@ class Mips16InstrInfo : public MipsInstrInfo {
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const;
 
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const;
-
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const;
+  virtual void storeRegToStack(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MBBI,
+                               unsigned SrcReg, bool isKill, int FrameIndex,
+                               const TargetRegisterClass *RC,
+                               const TargetRegisterInfo *TRI,
+                               int64_t Offset) const;
+
+  virtual void loadRegFromStack(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                unsigned DestReg, int FrameIndex,
+                                const TargetRegisterClass *RC,
+                                const TargetRegisterInfo *TRI,
+                                int64_t Offset) const;
 
   virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
 
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 629382965b48..aa51aaf46565 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -1466,14 +1466,14 @@ def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
 // MipsDivRem
 //
 def: Mips16Pat
-  <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry),
+  <(MipsDivRem16 CPU16Regs:$rx, CPU16Regs:$ry),
    (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
 
 //
 // MipsDivRemU
 //
 def: Mips16Pat
-  <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry),
+  <(MipsDivRemU16 CPU16Regs:$rx, CPU16Regs:$ry),
    (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
 
 //  signed a,b
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 0ea93689490d..6cca2276856d 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -72,6 +72,12 @@ bool Mips16RegisterInfo::saveScavengerRegister
   return true;
 }
 
+const TargetRegisterClass *
+Mips16RegisterInfo::intRegClass(unsigned Size) const {
+  assert(Size == 4);
+  return &Mips::CPU16RegsRegClass;
+}
+
 void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
                                      unsigned OpNo, int FrameIndex,
                                      uint64_t StackSize,
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index b8f818a478d5..2b3d2b1a4ecb 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -37,6 +37,8 @@ class Mips16RegisterInfo : public MipsRegisterInfo {
                                      const TargetRegisterClass *RC,
                                      unsigned Reg) const;
 
+  virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
+
 private:
   virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
                            int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 5903b9e6235e..846a8224af35 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -66,6 +66,14 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc],
   defm ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap64<atomic_cmp_swap_64>;
 }
 
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+  defm LOAD_AC128  : LoadM<"load_ac128", ACRegs128>;
+  defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>;
+}
+
+def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>;
+
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
@@ -179,10 +187,16 @@ def DMULT  : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
              MULT_FM<0, 0x1c>;
 def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
              MULT_FM<0, 0x1d>;
-def DSDIV  : Div<MipsDivRem, "ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>,
-             MULT_FM<0, 0x1e>;
-def DUDIV  : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>,
-             MULT_FM<0, 0x1f>;
+def PseudoDMULT  : MultDivPseudo<DMULT, ACRegs128, CPU64RegsOpnd, MipsMult,
+                                 IIImul>;
+def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, CPU64RegsOpnd, MipsMultu,
+                                 IIImul>;
+def DSDIV : Div<"ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>;
+def DUDIV : Div<"ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>;
+def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, CPU64RegsOpnd, MipsDivRem,
+                                IIIdiv, 0>;
+def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, CPU64RegsOpnd, MipsDivRemU,
+                                IIIdiv, 0>;
 
 def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>;
 def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>;
@@ -306,6 +320,10 @@ def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)),
 // bswap MipsPattern
 def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
 
+// mflo/hi patterns.
+def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)),
+              (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>;
+
 //===----------------------------------------------------------------------===//
 // Instruction aliases
 //===----------------------------------------------------------------------===//
@@ -332,13 +350,19 @@ def : InstAlias<"not $rt, $rs",
 def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>;
 def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>,
       Requires<[HasMips64]>;
+def : InstAlias<"jal $rs", (JALR64 RA_64, CPU64Regs:$rs), 0>,
+                 Requires<[HasMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR64 CPU64Regs:$rd, CPU64Regs:$rs), 0>,
+                 Requires<[HasMips64]>;
 def : InstAlias<"daddu $rs, $rt, $imm",
                 (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
                 1>;
 def : InstAlias<"dadd $rs, $rt, $imm",
                 (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
                 1>;
-
+def : InstAlias<"or $rs, $rt, $imm",
+                (ORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+                1>, Requires<[HasMips64]>;
 /// Move between CPU and coprocessor registers
 
 let DecoderNamespace = "Mips64" in {
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1876cb6ffae4..f4f71cbccb2f 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -46,6 +46,10 @@
 using namespace llvm;
 
 bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  // Initialize TargetLoweringObjectFile.
+  if (Subtarget->allowMixed16_32())
+    const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+      .Initialize(OutContext, TM);
   MipsFI = MF.getInfo<MipsFunctionInfo>();
   AsmPrinter::runOnMachineFunction(MF);
   return true;
@@ -419,12 +423,18 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                            unsigned OpNum, unsigned AsmVariant,
                                            const char *ExtraCode,
                                            raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
+  int Offset = 0;
+  // Currently we are expecting either no ExtraCode or 'D'
+  if (ExtraCode) {
+    if (ExtraCode[0] == 'D')
+      Offset = 4;
+    else
+      return true; // Unknown modifier.
+  }
 
   const MachineOperand &MO = MI->getOperand(OpNum);
   assert(MO.isReg() && "unexpected inline asm memory operand");
-  O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+  O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
 
   return false;
 }
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index df877b654848..3fc402ba6423 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -115,6 +115,10 @@ class MipsCodeEmitter : public MachineFunctionPass {
   void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
                                   int Offset) const;
 
+  /// Expand pseudo instructions with accumulator register operands.
+  void expandACCInstr(MachineBasicBlock::instr_iterator MI,
+                      MachineBasicBlock &MBB, unsigned Opc) const;
+
   /// \brief Expand pseudo instruction. Return true if MI was expanded.
   bool expandPseudos(MachineBasicBlock::instr_iterator &MI,
                      MachineBasicBlock &MBB) const;
@@ -298,6 +302,14 @@ void MipsCodeEmitter::emitWord(unsigned Word) {
     MCE.emitWordBE(Word);
 }
 
+void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI,
+                                     MachineBasicBlock &MBB,
+                                     unsigned Opc) const {
+  // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
+  BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opc))
+    .addReg(MI->getOperand(1).getReg()).addReg(MI->getOperand(2).getReg());
+}
+
 bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
                                     MachineBasicBlock &MBB) const {
   switch (MI->getOpcode()) {
@@ -309,6 +321,30 @@ bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
     BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
       .addReg(MI->getOperand(0).getReg());
     break;
+  case Mips::PseudoMULT:
+    expandACCInstr(MI, MBB, Mips::MULT);
+    break;
+  case Mips::PseudoMULTu:
+    expandACCInstr(MI, MBB, Mips::MULTu);
+    break;
+  case Mips::PseudoSDIV:
+    expandACCInstr(MI, MBB, Mips::SDIV);
+    break;
+  case Mips::PseudoUDIV:
+    expandACCInstr(MI, MBB, Mips::UDIV);
+    break;
+  case Mips::PseudoMADD:
+    expandACCInstr(MI, MBB, Mips::MADD);
+    break;
+  case Mips::PseudoMADDU:
+    expandACCInstr(MI, MBB, Mips::MADDU);
+    break;
+  case Mips::PseudoMSUB:
+    expandACCInstr(MI, MBB, Mips::MSUB);
+    break;
+  case Mips::PseudoMSUBU:
+    expandACCInstr(MI, MBB, Mips::MSUBU);
+    break;
   default:
     return false;
   }
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index b5de1ebad22b..1951324cf1a1 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -80,6 +80,10 @@ FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
 }
 
 bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
-  return true;
+  // The intention is for this to be a mips16 only pass for now
+  // FIXME:
+  // if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+  //  return false;
+  return false;
 }
 
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index a72a763fde06..cf09113cd8ad 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -219,6 +219,33 @@ class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst {
   let Inst{5-0} = funct;
 }
 
+// MFHI sub-class format.
+class MFHI_FMT<bits<6> funct> : DSPInst {
+  bits<5> rd;
+  bits<2> ac;
+
+  let Inst{31-26} = 0;
+  let Inst{25-23} = 0;
+  let Inst{22-21} = ac;
+  let Inst{20-16} = 0;
+  let Inst{15-11} = rd;
+  let Inst{10-6} = 0;
+  let Inst{5-0} = funct;
+}
+
+// MTHI sub-class format.
+class MTHI_FMT<bits<6> funct> : DSPInst {
+  bits<5> rs;
+  bits<2> ac;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rs;
+  let Inst{20-13} = 0;
+  let Inst{12-11} = ac;
+  let Inst{10-6} = 0;
+  let Inst{5-0} = funct;
+}
+
 // EXTR.W sub-class format (type 1).
 class EXTR_W_TY1_FMT<bits<5> op> : DSPInst {
   bits<5> rt;
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index 9531b9148765..6790a2798399 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -20,17 +20,20 @@ def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
 def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
 
 // Mips-specific dsp nodes
-def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
-def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
+def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+                                        SDTCisVT<2, untyped>]>;
+def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+                                         SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>;
+def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+                                       SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsSHIFT_DSP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                             SDTCisVT<2, i32>]>;
 
 class MipsDSPBase<string Opc, SDTypeProfile Prof> :
-  SDNode<!strconcat("MipsISD::", Opc), Prof,
-         [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+  SDNode<!strconcat("MipsISD::", Opc), Prof>;
 
 class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> :
-  SDNode<!strconcat("MipsISD::", Opc), Prof,
-         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>;
+  SDNode<!strconcat("MipsISD::", Opc), Prof, [SDNPHasChain, SDNPSideEffect]>;
 
 def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>;
 def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>;
@@ -40,7 +43,7 @@ def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>;
 def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>;
 
 def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>;
-def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>;
+def MipsMTHLIP : MipsDSPSideEffectBase<"MTHLIP", SDT_MipsShilo>;
 
 def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>;
 def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>;
@@ -73,6 +76,9 @@ def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>;
 def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>;
 def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
 def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
+def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>;
 
 // Flags.
 class UseAC {
@@ -144,6 +150,10 @@ class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>;
 class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>;
 class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>;
 class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>;
+class MFHI_ENC : MFHI_FMT<0b010000>;
+class MFLO_ENC : MFHI_FMT<0b010010>;
+class MTHI_ENC : MTHI_FMT<0b010001>;
+class MTLO_ENC : MTHI_FMT<0b010011>;
 class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>;
 class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>;
 class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>;
@@ -343,6 +353,7 @@ class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
   InstrItinClass Itinerary = itin;
   list<Register> Defs = [DSPCtrl];
+  bit hasSideEffects = 1;
 }
 
 class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -383,7 +394,7 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                               InstrItinClass itin> {
   dag OutOperandList = (outs CPURegs:$rt);
-  dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs);
+  dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
   list<Register> Defs = [DSPCtrl];
@@ -392,46 +403,40 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                               InstrItinClass itin> {
   dag OutOperandList = (outs CPURegs:$rt);
-  dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs);
+  dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
   list<Register> Defs = [DSPCtrl];
 }
 
-class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
-                           Instruction realinst> :
-  PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>,
-  PseudoInstExpansion<(realinst AC0, simm16:$shift)> {
-  list<Register> Defs = [DSPCtrl, AC0];
-  list<Register> Uses = [AC0];
-  InstrItinClass Itinerary = itin;
-}
-
-class SHILO_R1_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
-  dag InOperandList = (ins simm16:$shift);
+class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin);
   string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
+  list<Register> Defs = [DSPCtrl];
+  string Constraints = "$acin = $ac";
 }
 
-class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
-                           Instruction realinst> :
-  PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>,
-  PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> {
-  list<Register> Defs = [DSPCtrl, AC0];
-  list<Register> Uses = [AC0];
-  InstrItinClass Itinerary = itin;
-}
-
-class SHILO_R2_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
-  dag InOperandList = (ins CPURegs:$rs);
+class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+  list<Register> Defs = [DSPCtrl];
+  string Constraints = "$acin = $ac";
 }
 
-class MTHLIP_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
-  dag InOperandList = (ins CPURegs:$rs);
+class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
   string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+  list<Register> Uses = [DSPCtrl];
+  string Constraints = "$acin = $ac";
 }
 
 class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -454,35 +459,51 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<Register> Defs = [DSPCtrl];
 }
 
-class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
-                           Instruction realinst> :
-  PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
-            [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
-  PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
-  list<Register> Defs = [DSPCtrl, AC0];
-  list<Register> Uses = [AC0];
-  InstrItinClass Itinerary = itin;
+class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
+  string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+  list<Register> Defs = [DSPCtrl];
+  string Constraints = "$acin = $ac";
 }
 
-class DPA_W_PH_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
+class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                     InstrItinClass itin> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
   dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+  list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, CPURegs:$rt))];
+  InstrItinClass Itinerary = itin;
+  int AddedComplexity = 20;
+  bit isCommutable = 1;
 }
 
-class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
-                       Instruction realinst> :
-  PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
-            [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
-  PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
-  list<Register> Defs = [DSPCtrl, AC0];
+class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                     InstrItinClass itin> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
+  string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
   InstrItinClass Itinerary = itin;
+  int AddedComplexity = 20;
+  string Constraints = "$acin = $ac";
 }
 
-class MULT_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
-  dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
-  string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+class MFHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
+  dag OutOperandList = (outs CPURegs:$rd);
+  dag InOperandList = (ins RC:$ac);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
+  InstrItinClass Itinerary = itin;
+}
+
+class MTHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
+  dag OutOperandList = (outs RC:$ac);
+  dag InOperandList = (ins CPURegs:$rs);
+  string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+  InstrItinClass Itinerary = itin;
 }
 
 class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
@@ -518,27 +539,27 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 //===----------------------------------------------------------------------===//
 
 // Addition/subtraction
-class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary,
+class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", null_frag, NoItinerary,
                                        DSPRegs, DSPRegs>, IsCommutable;
 
 class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
                                          NoItinerary, DSPRegs, DSPRegs>,
                        IsCommutable;
 
-class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary,
+class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary,
                                        DSPRegs, DSPRegs>;
 
 class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
                                          NoItinerary, DSPRegs, DSPRegs>;
 
-class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary,
+class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary,
                                        DSPRegs, DSPRegs>, IsCommutable;
 
 class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
                                          NoItinerary, DSPRegs, DSPRegs>,
                        IsCommutable;
 
-class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary,
+class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary,
                                        DSPRegs, DSPRegs>;
 
 class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
@@ -551,10 +572,10 @@ class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
 class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
                                         NoItinerary, CPURegs, CPURegs>;
 
-class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary,
+class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary,
                                      CPURegs, CPURegs>, IsCommutable;
 
-class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary,
+class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary,
                                      CPURegs, CPURegs>,
                    IsCommutable, UseDSPCtrl;
 
@@ -644,19 +665,19 @@ class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
                             ClearDefs;
 
 // Shift
-class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3,
+class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
                                           NoItinerary, DSPRegs>;
 
 class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
                                            NoItinerary, DSPRegs>;
 
-class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3,
+class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
                                           NoItinerary, DSPRegs>, ClearDefs;
 
 class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
                                            NoItinerary, DSPRegs>, ClearDefs;
 
-class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4,
+class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
                                           NoItinerary, DSPRegs>;
 
 class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
@@ -668,7 +689,7 @@ class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
 class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
                                              NoItinerary, DSPRegs>;
 
-class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4,
+class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
                                           NoItinerary, DSPRegs>, ClearDefs;
 
 class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
@@ -717,44 +738,46 @@ class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
                                           NoItinerary, DSPRegs, DSPRegs>,
                         IsCommutable;
 
-class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">;
+class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
+                                              MipsMULSAQ_S_W_PH>;
 
-class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">;
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>;
 
-class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">;
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>;
 
-class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">;
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>;
 
-class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">;
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>;
 
-// Dot product with accumulate/subtract
-class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">;
-
-class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">;
-
-class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">;
+// Move from/to hi/lo.
+class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HIRegsDSP, NoItinerary>;
+class MFLO_DESC : MFHI_DESC_BASE<"mflo", LORegsDSP, NoItinerary>;
+class MTHI_DESC : MTHI_DESC_BASE<"mthi", HIRegsDSP, NoItinerary>;
+class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LORegsDSP, NoItinerary>;
 
-class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">;
-
-class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">;
-
-class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">;
+// Dot product with accumulate/subtract
+class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
 
-class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">;
+class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr", MipsDPAU_H_QBR>;
 
-class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">;
+class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>;
 
-class MULT_DSP_DESC : MULT_DESC_BASE<"mult">;
+class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>;
 
-class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">;
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>;
 
-class MADD_DSP_DESC : MULT_DESC_BASE<"madd">;
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>;
 
-class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">;
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>;
 
-class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">;
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>;
 
-class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">;
+class MULT_DSP_DESC  : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>;
+class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>;
+class MADD_DSP_DESC  : MADD_DESC_BASE<"madd", MipsMAdd, NoItinerary>;
+class MADDU_DSP_DESC : MADD_DESC_BASE<"maddu", MipsMAddu, NoItinerary>;
+class MSUB_DSP_DESC  : MADD_DESC_BASE<"msub", MipsMSub, NoItinerary>;
+class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
 
 // Comparison
 class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
@@ -867,11 +890,11 @@ class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
 class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
                                                NoItinerary>;
 
-class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">;
+class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
 
-class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">;
+class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>;
 
-class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">;
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>;
 
 class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
 
@@ -958,7 +981,7 @@ class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
                                               NoItinerary, DSPRegs>;
 
 // Multiplication
-class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary,
+class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary,
                                        DSPRegs>, IsCommutable;
 
 class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
@@ -975,23 +998,25 @@ class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
                        IsCommutable;
 
 // Dot product with accumulate/subtract
-class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">;
+class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>;
 
-class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">;
+class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>;
 
-class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">;
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>;
 
-class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">;
+class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph",
+                                              MipsDPAQX_SA_W_PH>;
 
-class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">;
+class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>;
 
-class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">;
+class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>;
 
-class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">;
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>;
 
-class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">;
+class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph",
+                                              MipsDPSQX_SA_W_PH>;
 
-class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">;
+class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
 
 // Precision reduce/expand
 class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
@@ -1009,7 +1034,7 @@ class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
                                                        CPURegs>, ClearDefs;
 
 // Shift
-class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3,
+class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
                                           NoItinerary, DSPRegs>, ClearDefs;
 
 class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
@@ -1022,7 +1047,7 @@ class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
 class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
                                              NoItinerary, DSPRegs>, ClearDefs;
 
-class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4,
+class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
                                           NoItinerary, DSPRegs>, ClearDefs;
 
 class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
@@ -1099,6 +1124,10 @@ def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
 def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
 def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
 def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP : MFHI_ENC, MFHI_DESC;
+def MFLO_DSP : MFLO_ENC, MFLO_DESC;
+def MTHI_DSP : MTHI_ENC, MTHI_DESC;
+def MTLO_DSP : MTLO_ENC, MTLO_DESC;
 def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
 def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
 def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
@@ -1206,71 +1235,14 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
 }
 
 // Pseudos.
-def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary,
-                                                MULSAQ_S_W_PH>;
-def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary,
-                                              MAQ_S_W_PHL>;
-def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary,
-                                              MAQ_S_W_PHR>;
-def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary,
-                                               MAQ_SA_W_PHL>;
-def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary,
-                                               MAQ_SA_W_PHR>;
-def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary,
-                                             DPAU_H_QBL>;
-def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary,
-                                             DPAU_H_QBR>;
-def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary,
-                                             DPSU_H_QBL>;
-def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary,
-                                             DPSU_H_QBR>;
-def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary,
-                                              DPAQ_S_W_PH>;
-def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary,
-                                              DPSQ_S_W_PH>;
-def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary,
-                                              DPAQ_SA_L_W>;
-def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary,
-                                              DPSQ_SA_L_W>;
-
-def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>,
-                      IsCommutable;
-def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>,
-                       IsCommutable;
-def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>,
-                      IsCommutable, UseAC;
-def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>,
-                       IsCommutable, UseAC;
-def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>,
-                      UseAC;
-def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>,
-                       UseAC;
-
-def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>;
-def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>;
-def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>;
-
-let Predicates = [HasDSPR2] in {
-
-def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>;
-def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>;
-def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary,
-                                               DPAQX_S_W_PH>;
-def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary,
-                                                DPAQX_SA_W_PH>;
-def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary,
-                                            DPAX_W_PH>;
-def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary,
-                                            DPSX_W_PH>;
-def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary,
-                                               DPSQX_S_W_PH>;
-def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary,
-                                                DPSQX_SA_W_PH>;
-def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary,
-                                             MULSA_W_PH>;
-
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+  defm LOAD_AC_DSP  : LoadM<"load_ac_dsp", ACRegsDSP>;
+  defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>;
 }
 
+def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+
 // Patterns.
 class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
   Pat<pattern, result>, Requires<[pred]>;
@@ -1294,12 +1266,52 @@ def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
 def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
              (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
 
+// Binary operations.
+class DSPBinPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
+                Predicate Pred = HasDSP> :
+  DSPPat<(Node ValTy:$a, ValTy:$b), (Inst ValTy:$a, ValTy:$b), Pred>;
+
+def : DSPBinPat<ADDQ_PH, v2i16, int_mips_addq_ph>;
+def : DSPBinPat<ADDQ_PH, v2i16, add>;
+def : DSPBinPat<SUBQ_PH, v2i16, int_mips_subq_ph>;
+def : DSPBinPat<SUBQ_PH, v2i16, sub>;
+def : DSPBinPat<MUL_PH, v2i16, int_mips_mul_ph, HasDSPR2>;
+def : DSPBinPat<MUL_PH, v2i16, mul, HasDSPR2>;
+def : DSPBinPat<ADDU_QB, v4i8, int_mips_addu_qb>;
+def : DSPBinPat<ADDU_QB, v4i8, add>;
+def : DSPBinPat<SUBU_QB, v4i8, int_mips_subu_qb>;
+def : DSPBinPat<SUBU_QB, v4i8, sub>;
+def : DSPBinPat<ADDSC, i32, int_mips_addsc>;
+def : DSPBinPat<ADDSC, i32, addc>;
+def : DSPBinPat<ADDWC, i32, int_mips_addwc>;
+def : DSPBinPat<ADDWC, i32, adde>;
+
+// Shift immediate patterns.
+class DSPShiftPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
+                  ImmLeaf Imm, Predicate Pred = HasDSP> :
+  DSPPat<(Node ValTy:$a, Imm:$shamt), (Inst ValTy:$a, Imm:$shamt), Pred>;
+
+def : DSPShiftPat<SHLL_PH, v2i16, MipsSHLL_DSP, immZExt4>;
+def : DSPShiftPat<SHRA_PH, v2i16, MipsSHRA_DSP, immZExt4>;
+def : DSPShiftPat<SHRL_PH, v2i16, MipsSHRL_DSP, immZExt4, HasDSPR2>;
+def : DSPShiftPat<SHLL_PH, v2i16, int_mips_shll_ph, immZExt4>;
+def : DSPShiftPat<SHRA_PH, v2i16, int_mips_shra_ph, immZExt4>;
+def : DSPShiftPat<SHRL_PH, v2i16, int_mips_shrl_ph, immZExt4, HasDSPR2>;
+def : DSPShiftPat<SHLL_QB, v4i8, MipsSHLL_DSP, immZExt3>;
+def : DSPShiftPat<SHRA_QB, v4i8, MipsSHRA_DSP, immZExt3, HasDSPR2>;
+def : DSPShiftPat<SHRL_QB, v4i8, MipsSHRL_DSP, immZExt3>;
+def : DSPShiftPat<SHLL_QB, v4i8, int_mips_shll_qb, immZExt3>;
+def : DSPShiftPat<SHRA_QB, v4i8, int_mips_shra_qb, immZExt3, HasDSPR2>;
+def : DSPShiftPat<SHRL_QB, v4i8, int_mips_shrl_qb, immZExt3>;
+
 // Extr patterns.
 class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
-  DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>;
+  DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
+         (Instr ACRegsDSP:$ac, CPURegs:$rs)>;
 
 class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
-  DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>;
+  DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)),
+         (Instr ACRegsDSP:$ac, immZExt5:$shift)>;
 
 def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
 def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
@@ -1313,3 +1325,19 @@ def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>;
 def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
 def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
 def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
+
+// mflo/hi patterns.
+let AddedComplexity = 20 in
+def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)),
+             (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>;
+
+// Indexed load patterns.
+class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> :
+  DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))),
+         (Instr i32:$base, i32:$index)>;
+
+let AddedComplexity = 20 in {
+  def : IndexedLoadPat<zextloadi8, LBUX>;
+  def : IndexedLoadPat<sextloadi16, LHX>;
+  def : IndexedLoadPat<load, LWX>;
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index e26559014173..d07a595af38a 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -220,9 +220,9 @@ namespace {
     /// that can be moved to the delay slot. Returns true on success.
     bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
 
-    /// This function searches MBB's successor blocks for an instruction that
-    /// can be moved to the delay slot and inserts clones of the instruction
-    /// into the successor blocks.
+    /// This function searches one of MBB's successor blocks for an instruction
+    /// that can be moved to the delay slot and inserts clones of the
+    /// instruction into the successor's predecessor blocks.
     bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
 
     /// Pick a successor block of MBB. Return NULL if MBB doesn't have a
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 14268d2130ee..6a5f79d0dfc4 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -26,9 +26,8 @@ class MipsFrameLowering : public TargetFrameLowering {
   const MipsSubtarget &STI;
 
 public:
-  explicit MipsFrameLowering(const MipsSubtarget &sti)
-    : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0,
-                          sti.hasMips64() ? 16 : 8), STI(sti) {}
+  explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment)
+    : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {}
 
   static const MipsFrameLowering *create(MipsTargetMachine &TM,
                                          const MipsSubtarget &ST);
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 77b08cb11e0c..968e5364842f 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -17,7 +17,6 @@
 #include "MipsSEISelDAGToDAG.h"
 #include "Mips.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
-#include "MipsAnalyzeImmediate.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 4bf43f42e3a7..9f136f43d24b 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -30,7 +30,6 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -158,12 +157,18 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::CMovFP_T:          return "MipsISD::CMovFP_T";
   case MipsISD::CMovFP_F:          return "MipsISD::CMovFP_F";
   case MipsISD::FPRound:           return "MipsISD::FPRound";
+  case MipsISD::ExtractLOHI:       return "MipsISD::ExtractLOHI";
+  case MipsISD::InsertLOHI:        return "MipsISD::InsertLOHI";
+  case MipsISD::Mult:              return "MipsISD::Mult";
+  case MipsISD::Multu:             return "MipsISD::Multu";
   case MipsISD::MAdd:              return "MipsISD::MAdd";
   case MipsISD::MAddu:             return "MipsISD::MAddu";
   case MipsISD::MSub:              return "MipsISD::MSub";
   case MipsISD::MSubu:             return "MipsISD::MSubu";
   case MipsISD::DivRem:            return "MipsISD::DivRem";
   case MipsISD::DivRemU:           return "MipsISD::DivRemU";
+  case MipsISD::DivRem16:          return "MipsISD::DivRem16";
+  case MipsISD::DivRemU16:         return "MipsISD::DivRemU16";
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
   case MipsISD::Wrapper:           return "MipsISD::Wrapper";
@@ -192,6 +197,9 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::MADDU_DSP:         return "MipsISD::MADDU_DSP";
   case MipsISD::MSUB_DSP:          return "MipsISD::MSUB_DSP";
   case MipsISD::MSUBU_DSP:         return "MipsISD::MSUBU_DSP";
+  case MipsISD::SHLL_DSP:          return "MipsISD::SHLL_DSP";
+  case MipsISD::SHRA_DSP:          return "MipsISD::SHRA_DSP";
+  case MipsISD::SHRL_DSP:          return "MipsISD::SHRL_DSP";
   default:                         return NULL;
   }
 }
@@ -340,9 +348,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
   setOperationAction(ISD::VAEND,             MVT::Other, Expand);
 
-  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
-  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
-
   // Use the default for now
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
@@ -376,8 +381,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
     setTruncStoreAction(MVT::i64, MVT::i32, Custom);
   }
 
-  setTargetDAGCombine(ISD::ADDE);
-  setTargetDAGCombine(ISD::SUBE);
   setTargetDAGCombine(ISD::SDIVREM);
   setTargetDAGCombine(ISD::UDIVREM);
   setTargetDAGCombine(ISD::SELECT);
@@ -408,178 +411,6 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
   return VT.changeVectorElementTypeToInteger();
 }
 
-// selectMADD -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-//  (addc multLo, Lo0), (adde multHi, Hi0),
-// where,
-//  multHi/Lo: product of multiplication
-//  Lo0: initial value of Lo register
-//  Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
-  // ADDENode's second operand must be a flag output of an ADDC node in order
-  // for the matching to be successful.
-  SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
-
-  if (ADDCNode->getOpcode() != ISD::ADDC)
-    return false;
-
-  SDValue MultHi = ADDENode->getOperand(0);
-  SDValue MultLo = ADDCNode->getOperand(0);
-  SDNode *MultNode = MultHi.getNode();
-  unsigned MultOpc = MultHi.getOpcode();
-
-  // MultHi and MultLo must be generated by the same node,
-  if (MultLo.getNode() != MultNode)
-    return false;
-
-  // and it must be a multiplication.
-  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
-    return false;
-
-  // MultLo amd MultHi must be the first and second output of MultNode
-  // respectively.
-  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
-    return false;
-
-  // Transform this to a MADD only if ADDENode and ADDCNode are the only users
-  // of the values of MultNode, in which case MultNode will be removed in later
-  // phases.
-  // If there exist users other than ADDENode or ADDCNode, this function returns
-  // here, which will result in MultNode being mapped to a single MULT
-  // instruction node rather than a pair of MULT and MADD instructions being
-  // produced.
-  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
-    return false;
-
-  SDValue Chain = CurDAG->getEntryNode();
-  DebugLoc DL = ADDENode->getDebugLoc();
-
-  // create MipsMAdd(u) node
-  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
-
-  SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Glue,
-                                 MultNode->getOperand(0),// Factor 0
-                                 MultNode->getOperand(1),// Factor 1
-                                 ADDCNode->getOperand(1),// Lo0
-                                 ADDENode->getOperand(1));// Hi0
-
-  // create CopyFromReg nodes
-  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32,
-                                              MAdd);
-  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL,
-                                              Mips::HI, MVT::i32,
-                                              CopyFromLo.getValue(2));
-
-  // replace uses of adde and addc here
-  if (!SDValue(ADDCNode, 0).use_empty())
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
-
-  if (!SDValue(ADDENode, 0).use_empty())
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
-
-  return true;
-}
-
-// selectMSUB -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-//  (addc Lo0, multLo), (sube Hi0, multHi),
-// where,
-//  multHi/Lo: product of multiplication
-//  Lo0: initial value of Lo register
-//  Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
-  // SUBENode's second operand must be a flag output of an SUBC node in order
-  // for the matching to be successful.
-  SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
-
-  if (SUBCNode->getOpcode() != ISD::SUBC)
-    return false;
-
-  SDValue MultHi = SUBENode->getOperand(1);
-  SDValue MultLo = SUBCNode->getOperand(1);
-  SDNode *MultNode = MultHi.getNode();
-  unsigned MultOpc = MultHi.getOpcode();
-
-  // MultHi and MultLo must be generated by the same node,
-  if (MultLo.getNode() != MultNode)
-    return false;
-
-  // and it must be a multiplication.
-  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
-    return false;
-
-  // MultLo amd MultHi must be the first and second output of MultNode
-  // respectively.
-  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
-    return false;
-
-  // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
-  // of the values of MultNode, in which case MultNode will be removed in later
-  // phases.
-  // If there exist users other than SUBENode or SUBCNode, this function returns
-  // here, which will result in MultNode being mapped to a single MULT
-  // instruction node rather than a pair of MULT and MSUB instructions being
-  // produced.
-  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
-    return false;
-
-  SDValue Chain = CurDAG->getEntryNode();
-  DebugLoc DL = SUBENode->getDebugLoc();
-
-  // create MipsSub(u) node
-  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
-
-  SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
-                                 MultNode->getOperand(0),// Factor 0
-                                 MultNode->getOperand(1),// Factor 1
-                                 SUBCNode->getOperand(0),// Lo0
-                                 SUBENode->getOperand(0));// Hi0
-
-  // create CopyFromReg nodes
-  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32,
-                                              MSub);
-  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL,
-                                              Mips::HI, MVT::i32,
-                                              CopyFromLo.getValue(2));
-
-  // replace uses of sube and subc here
-  if (!SDValue(SUBCNode, 0).use_empty())
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
-
-  if (!SDValue(SUBENode, 0).use_empty())
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
-
-  return true;
-}
-
-static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
-                                  TargetLowering::DAGCombinerInfo &DCI,
-                                  const MipsSubtarget *Subtarget) {
-  if (DCI.isBeforeLegalize())
-    return SDValue();
-
-  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
-      selectMADD(N, &DAG))
-    return SDValue(N, 0);
-
-  return SDValue();
-}
-
-static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
-                                  TargetLowering::DAGCombinerInfo &DCI,
-                                  const MipsSubtarget *Subtarget) {
-  if (DCI.isBeforeLegalize())
-    return SDValue();
-
-  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
-      selectMSUB(N, &DAG))
-    return SDValue(N, 0);
-
-  return SDValue();
-}
-
 static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
                                     const MipsSubtarget *Subtarget) {
@@ -589,8 +420,8 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
   EVT Ty = N->getValueType(0);
   unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
   unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
-  unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
-                                                  MipsISD::DivRemU;
+  unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 :
+                                                  MipsISD::DivRemU16;
   DebugLoc DL = N->getDebugLoc();
 
   SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue,
@@ -617,7 +448,7 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+static Mips::CondCode condCodeToFCC(ISD::CondCode CC) {
   switch (CC) {
   default: llvm_unreachable("Unknown fp condition code!");
   case ISD::SETEQ:
@@ -644,8 +475,9 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
 }
 
 
-// Returns true if condition code has to be inverted.
-static bool invertFPCondCode(Mips::CondCode CC) {
+/// This function returns true if the floating point conditional branches and
+/// conditional moves which use condition code CC should be inverted.
+static bool invertFPCondCodeUser(Mips::CondCode CC) {
   if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
     return false;
 
@@ -675,15 +507,14 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 
   return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
-                     DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+                     DAG.getConstant(condCodeToFCC(CC), MVT::i32));
 }
 
 // Creates and returns a CMovFPT/F node.
 static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
                             SDValue False, DebugLoc DL) {
-  bool invert = invertFPCondCode((Mips::CondCode)
-                                 cast<ConstantSDNode>(Cond.getOperand(2))
-                                 ->getSExtValue());
+  ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2));
+  bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue());
 
   return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
                      True.getValueType(), True, False, Cond);
@@ -850,10 +681,6 @@ SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
 
   switch (Opc) {
   default: break;
-  case ISD::ADDE:
-    return performADDECombine(N, DAG, DCI, Subtarget);
-  case ISD::SUBE:
-    return performSUBECombine(N, DAG, DCI, Subtarget);
   case ISD::SDIVREM:
   case ISD::UDIVREM:
     return performDivRemCombine(N, DAG, DCI, Subtarget);
@@ -895,32 +722,30 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
   switch (Op.getOpcode())
   {
-    case ISD::BR_JT:              return lowerBR_JT(Op, DAG);
-    case ISD::BRCOND:             return lowerBRCOND(Op, DAG);
-    case ISD::ConstantPool:       return lowerConstantPool(Op, DAG);
-    case ISD::GlobalAddress:      return lowerGlobalAddress(Op, DAG);
-    case ISD::BlockAddress:       return lowerBlockAddress(Op, DAG);
-    case ISD::GlobalTLSAddress:   return lowerGlobalTLSAddress(Op, DAG);
-    case ISD::JumpTable:          return lowerJumpTable(Op, DAG);
-    case ISD::SELECT:             return lowerSELECT(Op, DAG);
-    case ISD::SELECT_CC:          return lowerSELECT_CC(Op, DAG);
-    case ISD::SETCC:              return lowerSETCC(Op, DAG);
-    case ISD::VASTART:            return lowerVASTART(Op, DAG);
-    case ISD::FCOPYSIGN:          return lowerFCOPYSIGN(Op, DAG);
-    case ISD::FABS:               return lowerFABS(Op, DAG);
-    case ISD::FRAMEADDR:          return lowerFRAMEADDR(Op, DAG);
-    case ISD::RETURNADDR:         return lowerRETURNADDR(Op, DAG);
-    case ISD::EH_RETURN:          return lowerEH_RETURN(Op, DAG);
-    case ISD::MEMBARRIER:         return lowerMEMBARRIER(Op, DAG);
-    case ISD::ATOMIC_FENCE:       return lowerATOMIC_FENCE(Op, DAG);
-    case ISD::SHL_PARTS:          return lowerShiftLeftParts(Op, DAG);
-    case ISD::SRA_PARTS:          return lowerShiftRightParts(Op, DAG, true);
-    case ISD::SRL_PARTS:          return lowerShiftRightParts(Op, DAG, false);
-    case ISD::LOAD:               return lowerLOAD(Op, DAG);
-    case ISD::STORE:              return lowerSTORE(Op, DAG);
-    case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
-    case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
-    case ISD::ADD:                return lowerADD(Op, DAG);
+  case ISD::BR_JT:              return lowerBR_JT(Op, DAG);
+  case ISD::BRCOND:             return lowerBRCOND(Op, DAG);
+  case ISD::ConstantPool:       return lowerConstantPool(Op, DAG);
+  case ISD::GlobalAddress:      return lowerGlobalAddress(Op, DAG);
+  case ISD::BlockAddress:       return lowerBlockAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:   return lowerGlobalTLSAddress(Op, DAG);
+  case ISD::JumpTable:          return lowerJumpTable(Op, DAG);
+  case ISD::SELECT:             return lowerSELECT(Op, DAG);
+  case ISD::SELECT_CC:          return lowerSELECT_CC(Op, DAG);
+  case ISD::SETCC:              return lowerSETCC(Op, DAG);
+  case ISD::VASTART:            return lowerVASTART(Op, DAG);
+  case ISD::FCOPYSIGN:          return lowerFCOPYSIGN(Op, DAG);
+  case ISD::FABS:               return lowerFABS(Op, DAG);
+  case ISD::FRAMEADDR:          return lowerFRAMEADDR(Op, DAG);
+  case ISD::RETURNADDR:         return lowerRETURNADDR(Op, DAG);
+  case ISD::EH_RETURN:          return lowerEH_RETURN(Op, DAG);
+  case ISD::MEMBARRIER:         return lowerMEMBARRIER(Op, DAG);
+  case ISD::ATOMIC_FENCE:       return lowerATOMIC_FENCE(Op, DAG);
+  case ISD::SHL_PARTS:          return lowerShiftLeftParts(Op, DAG);
+  case ISD::SRA_PARTS:          return lowerShiftRightParts(Op, DAG, true);
+  case ISD::SRL_PARTS:          return lowerShiftRightParts(Op, DAG, false);
+  case ISD::LOAD:               return lowerLOAD(Op, DAG);
+  case ISD::STORE:              return lowerSTORE(Op, DAG);
+  case ISD::ADD:                return lowerADD(Op, DAG);
   }
   return SDValue();
 }
@@ -940,17 +765,6 @@ addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
   return VReg;
 }
 
-// Get fp branch code (not opcode) from condition code.
-static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) {
-  if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
-    return Mips::BRANCH_T;
-
-  assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
-         "Invalid CondCode.");
-
-  return Mips::BRANCH_F;
-}
-
 MachineBasicBlock *
 MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                 MachineBasicBlock *BB) const {
@@ -1581,8 +1395,8 @@ lowerBRCOND(SDValue Op, SelectionDAG &DAG) const
   SDValue CCNode  = CondRes.getOperand(2);
   Mips::CondCode CC =
     (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
-  SDValue BrCode = DAG.getConstant(getFPBranchCodeFromCond(CC), MVT::i32);
-
+  unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T;
+  SDValue BrCode = DAG.getConstant(Opc, MVT::i32);
   return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode,
                      Dest, CondRes);
 }
@@ -2101,7 +1915,7 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
   return DAG.getMergeValues(Ops, 2, DL);
 }
 
-static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
+static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
                             SDValue Chain, SDValue Src, unsigned Offset) {
   SDValue Ptr = LD->getBasePtr();
   EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT();
@@ -2141,15 +1955,15 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   //  (set tmp, (ldl (add baseptr, 7), undef))
   //  (set dst, (ldr baseptr, tmp))
   if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) {
-    SDValue LDL = CreateLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
+    SDValue LDL = createLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
                                IsLittle ? 7 : 0);
-    return CreateLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
+    return createLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
                         IsLittle ? 0 : 7);
   }
 
-  SDValue LWL = CreateLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
+  SDValue LWL = createLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
                              IsLittle ? 3 : 0);
-  SDValue LWR = CreateLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
+  SDValue LWR = createLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
                              IsLittle ? 0 : 3);
 
   // Expand
@@ -2180,7 +1994,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getMergeValues(Ops, 2, DL);
 }
 
-static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
+static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
                              SDValue Chain, unsigned Offset) {
   SDValue Ptr = SD->getBasePtr(), Value = SD->getValue();
   EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType();
@@ -2217,9 +2031,9 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   //  (swl val, (add baseptr, 3))
   //  (swr val, baseptr)
   if ((VT == MVT::i32) || SD->isTruncatingStore()) {
-    SDValue SWL = CreateStoreLR(MipsISD::SWL, DAG, SD, Chain,
+    SDValue SWL = createStoreLR(MipsISD::SWL, DAG, SD, Chain,
                                 IsLittle ? 3 : 0);
-    return CreateStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
+    return createStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
   }
 
   assert(VT == MVT::i64);
@@ -2229,153 +2043,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   // to
   //  (sdl val, (add baseptr, 7))
   //  (sdr val, baseptr)
-  SDValue SDL = CreateStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
-  return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
-}
-
-// This function expands mips intrinsic nodes which have 64-bit input operands
-// or output values.
-//
-// out64 = intrinsic-node in64
-// =>
-// lo = copy (extract-element (in64, 0))
-// hi = copy (extract-element (in64, 1))
-// mips-specific-node
-// v0 = copy lo
-// v1 = copy hi
-// out64 = merge-values (v0, v1)
-//
-static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG,
-                            unsigned Opc, bool HasI64In, bool HasI64Out) {
-  DebugLoc DL = Op.getDebugLoc();
-  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
-  SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode();
-  SmallVector<SDValue, 3> Ops;
-
-  if (HasI64In) {
-    SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
-                               Op->getOperand(1 + HasChainIn),
-                               DAG.getConstant(0, MVT::i32));
-    SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
-                               Op->getOperand(1 + HasChainIn),
-                               DAG.getConstant(1, MVT::i32));
-
-    Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue());
-    Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1));
-
-    Ops.push_back(Chain);
-    Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end());
-    Ops.push_back(Chain.getValue(1));
-  } else {
-    Ops.push_back(Chain);
-    Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end());
-  }
-
-  if (!HasI64Out)
-    return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(),
-                       Ops.begin(), Ops.size());
-
-  SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue),
-                             Ops.begin(), Ops.size());
-  SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32,
-                                     Intr.getValue(1));
-  SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32,
-                                     OutLo.getValue(2));
-  SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi);
-
-  if (!HasChainIn)
-    return Out;
-
-  SDValue Vals[] = { Out, OutHi.getValue(1) };
-  return DAG.getMergeValues(Vals, 2, DL);
-}
-
-SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
-                                                    SelectionDAG &DAG) const {
-  switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
-  default:
-    return SDValue();
-  case Intrinsic::mips_shilo:
-    return lowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true);
-  case Intrinsic::mips_dpau_h_qbl:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true);
-  case Intrinsic::mips_dpau_h_qbr:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true);
-  case Intrinsic::mips_dpsu_h_qbl:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true);
-  case Intrinsic::mips_dpsu_h_qbr:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true);
-  case Intrinsic::mips_dpa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true);
-  case Intrinsic::mips_dps_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true);
-  case Intrinsic::mips_dpax_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true);
-  case Intrinsic::mips_dpsx_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true);
-  case Intrinsic::mips_mulsa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true);
-  case Intrinsic::mips_mult:
-    return lowerDSPIntr(Op, DAG, MipsISD::MULT, false, true);
-  case Intrinsic::mips_multu:
-    return lowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true);
-  case Intrinsic::mips_madd:
-    return lowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true);
-  case Intrinsic::mips_maddu:
-    return lowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true);
-  case Intrinsic::mips_msub:
-    return lowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true);
-  case Intrinsic::mips_msubu:
-    return lowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true);
-  }
-}
-
-SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
-  default:
-    return SDValue();
-  case Intrinsic::mips_extp:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false);
-  case Intrinsic::mips_extpdp:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false);
-  case Intrinsic::mips_extr_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false);
-  case Intrinsic::mips_extr_r_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false);
-  case Intrinsic::mips_extr_rs_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false);
-  case Intrinsic::mips_extr_s_h:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false);
-  case Intrinsic::mips_mthlip:
-    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true);
-  case Intrinsic::mips_mulsaq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true);
-  case Intrinsic::mips_maq_s_w_phl:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true);
-  case Intrinsic::mips_maq_s_w_phr:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true);
-  case Intrinsic::mips_maq_sa_w_phl:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true);
-  case Intrinsic::mips_maq_sa_w_phr:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true);
-  case Intrinsic::mips_dpaq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true);
-  case Intrinsic::mips_dpsq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true);
-  case Intrinsic::mips_dpaq_sa_l_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true);
-  case Intrinsic::mips_dpsq_sa_l_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true);
-  case Intrinsic::mips_dpaqx_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true);
-  case Intrinsic::mips_dpaqx_sa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true);
-  case Intrinsic::mips_dpsqx_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true);
-  case Intrinsic::mips_dpsqx_sa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true);
-  }
+  SDValue SDL = createStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
+  return createStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
 }
 
 SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 71977d7f1c6f..e1e6055b397d 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -68,6 +68,16 @@ namespace llvm {
 
       EH_RETURN,
 
+      // Node used to extract integer from accumulator.
+      ExtractLOHI,
+
+      // Node used to insert integers to accumulator.
+      InsertLOHI,
+
+      // Mult nodes.
+      Mult,
+      Multu,
+
       // MAdd/Sub nodes
       MAdd,
       MAddu,
@@ -77,6 +87,8 @@ namespace llvm {
       // DivRem(u)
       DivRem,
       DivRemU,
+      DivRem16,
+      DivRemU16,
 
       BuildPairF64,
       ExtractElementF64,
@@ -131,6 +143,11 @@ namespace llvm {
       MSUB_DSP,
       MSUBU_DSP,
 
+      // DSP shift nodes.
+      SHLL_DSP,
+      SHRA_DSP,
+      SHRL_DSP,
+
       // Load/Store Left/Right nodes.
       LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LWR,
@@ -333,8 +350,6 @@ namespace llvm {
                                  bool IsSRA) const;
     SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
 
     /// isEligibleForTailCallOptimization - Check whether the call is eligible
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 891bdc12243e..6b23057c9cdb 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -503,32 +503,27 @@ let Predicates = [IsFP64bit, HasStdEnc] in {
   def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
 }
 
-// Load/Store patterns.
+// Patterns for loads/stores with a reg+imm operand.
 let AddedComplexity = 40 in {
   let Predicates = [IsN64, HasStdEnc] in {
-    def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1_P8 addrRegImm:$a)>;
-    def : MipsPat<(store FGR32:$v, addrRegImm:$a),
-                  (SWC1_P8 FGR32:$v, addrRegImm:$a)>;
-    def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164_P8 addrRegImm:$a)>;
-    def : MipsPat<(store FGR64:$v, addrRegImm:$a),
-                  (SDC164_P8 FGR64:$v, addrRegImm:$a)>;
+    def : LoadRegImmPat<LWC1_P8, f32, load>;
+    def : StoreRegImmPat<SWC1_P8, f32>;
+    def : LoadRegImmPat<LDC164_P8, f64, load>;
+    def : StoreRegImmPat<SDC164_P8, f64>;
   }
 
   let Predicates = [NotN64, HasStdEnc] in {
-    def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1 addrRegImm:$a)>;
-    def : MipsPat<(store FGR32:$v, addrRegImm:$a),
-                  (SWC1 FGR32:$v, addrRegImm:$a)>;
+    def : LoadRegImmPat<LWC1, f32, load>;
+    def : StoreRegImmPat<SWC1, f32>;
   }
 
   let Predicates = [NotN64, HasMips64, HasStdEnc] in {
-    def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164 addrRegImm:$a)>;
-    def : MipsPat<(store FGR64:$v, addrRegImm:$a),
-                  (SDC164 FGR64:$v, addrRegImm:$a)>;
+    def : LoadRegImmPat<LDC164, f64, load>;
+    def : StoreRegImmPat<SDC164, f64>;
   }
 
   let Predicates = [NotN64, NotMips64, HasStdEnc] in {
-    def : MipsPat<(f64 (load addrRegImm:$a)), (LDC1 addrRegImm:$a)>;
-    def : MipsPat<(store AFGR64:$v, addrRegImm:$a),
-                  (SDC1 AFGR64:$v, addrRegImm:$a)>;
+    def : LoadRegImmPat<LDC1, f64, load>;
+    def : StoreRegImmPat<SDC1, f64>;
   }
 }
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index ee432c875355..03375db68e03 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -36,6 +36,24 @@ def FrmFR     : Format<4>;
 def FrmFI     : Format<5>;
 def FrmOther  : Format<6>; // Instruction w/ a custom format
 
+class MMRel;
+
+def Std2MicroMips : InstrMapping {
+  let FilterClass = "MMRel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["Arch"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = ["se"];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["se"], ["micromips"]];
+}
+
+class StdArch {
+  string Arch = "se";
+}
+
 // Generic Mips Format
 class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
                InstrItinClass itin, Format f>: Instruction
@@ -74,9 +92,11 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
 
 // Mips32/64 Instruction Format
 class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
-             InstrItinClass itin, Format f>:
+             InstrItinClass itin, Format f, string opstr = ""> :
   MipsInst<outs, ins, asmstr, pattern, itin, f> {
   let Predicates = [HasStdEnc];
+  string BaseOpcode = opstr;
+  string Arch;
 }
 
 // Mips Pseudo Instructions Format
@@ -192,7 +212,7 @@ class MFC3OP_FM<bits<6> op, bits<5> mfmt>
   let Inst{2-0}   = sel;
 }
 
-class ADD_FM<bits<6> op, bits<6> funct> {
+class ADD_FM<bits<6> op, bits<6> funct> : StdArch {
   bits<5> rd;
   bits<5> rs;
   bits<5> rt;
@@ -207,7 +227,7 @@ class ADD_FM<bits<6> op, bits<6> funct> {
   let Inst{5-0}   = funct;
 }
 
-class ADDI_FM<bits<6> op> {
+class ADDI_FM<bits<6> op> : StdArch {
   bits<5>  rs;
   bits<5>  rt;
   bits<16> imm16;
@@ -288,7 +308,7 @@ class B_FM {
   let Inst{15-0}  = offset;
 }
 
-class SLTI_FM<bits<6> op> {
+class SLTI_FM<bits<6> op> : StdArch {
   bits<5> rt;
   bits<5> rs;
   bits<16> imm16;
@@ -413,7 +433,7 @@ class SYNC_FM {
   let Inst{5-0}   = 0xf;
 }
 
-class MULT_FM<bits<6> op, bits<6> funct> {
+class MULT_FM<bits<6> op, bits<6> funct> : StdArch {
   bits<5>  rs;
   bits<5>  rt;
 
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 3cd9088140b9..8c05d97beac2 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -86,6 +86,36 @@ class MipsInstrInfo : public MipsGenInstrInfo {
   /// Return the number of bytes of code the specified instruction may be.
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
 
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const {
+    storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
+  }
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
+    loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
+  }
+
+  virtual void storeRegToStack(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI,
+                               unsigned SrcReg, bool isKill, int FrameIndex,
+                               const TargetRegisterClass *RC,
+                               const TargetRegisterInfo *TRI,
+                               int64_t Offset) const = 0;
+
+  virtual void loadRegFromStack(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI,
+                                unsigned DestReg, int FrameIndex,
+                                const TargetRegisterClass *RC,
+                                const TargetRegisterInfo *TRI,
+                                int64_t Offset) const = 0;
+
 protected:
   bool isZeroImm(const MachineOperand &op) const;
 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 25b5d240be06..1be7792807ce 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -23,13 +23,16 @@ def SDT_MipsCMov         : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
                                                 SDTCisInt<4>]>;
 def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
 def SDT_MipsCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_MipsMAddMSub     : SDTypeProfile<0, 4,
-                                         [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
-                                          SDTCisSameAs<1, 2>,
-                                          SDTCisSameAs<2, 3>]>;
-def SDT_MipsDivRem       : SDTypeProfile<0, 2,
-                                         [SDTCisInt<0>,
-                                          SDTCisSameAs<0, 1>]>;
+def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>,
+                                           SDTCisVT<2, i32>]>;
+def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+                                          SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>,
+                                    SDTCisSameAs<1, 2>]>;
+def SDT_MipsMAddMSub : SDTypeProfile<1, 3,
+                                     [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+                                      SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsDivRem16 : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
 
 def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 
@@ -82,20 +85,27 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
                            [SDNPHasChain, SDNPSideEffect,
                             SDNPOptInGlue, SDNPOutGlue]>;
 
+// Node used to extract integer from LO/HI register.
+def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>;
+
+// Node used to insert 32-bit integers to LOHI register pair.
+def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>;
+
+// Mult nodes.
+def MipsMult  : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>;
+def MipsMultu : SDNode<"MipsISD::Multu", SDT_MipsMultDiv>;
+
 // MAdd*/MSub* nodes
-def MipsMAdd      : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
-                           [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMAddu     : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
-                           [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMSub      : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
-                           [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMSubu     : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
-                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAdd  : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub>;
+def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub>;
+def MipsMSub  : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub>;
+def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub>;
 
 // DivRem(u) nodes
-def MipsDivRem    : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
-                           [SDNPOutGlue]>;
-def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+def MipsDivRem    : SDNode<"MipsISD::DivRem", SDT_MipsMultDiv>;
+def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsMultDiv>;
+def MipsDivRem16  : SDNode<"MipsISD::DivRem16", SDT_MipsDivRem16, [SDNPOutGlue]>;
+def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16,
                            [SDNPOutGlue]>;
 
 // Target constant nodes that are not part of any isel patterns and remain
@@ -169,6 +179,7 @@ def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">,
                       AssemblerPredicate<"FeatureMips32">;
 def HasStdEnc :       Predicate<"Subtarget.hasStandardEncoding()">,
                       AssemblerPredicate<"!FeatureMips16">;
+def NotDSP :          Predicate<"!Subtarget.hasDSP()">;
 
 class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
   let Predicates = [HasStdEnc];
@@ -256,6 +267,7 @@ def mem : Operand<i32> {
   let MIOperandInfo = (ops CPURegs, simm16);
   let EncoderMethod = "getMemEncoding";
   let ParserMatchClass = MipsMemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 def mem64 : Operand<i64> {
@@ -263,18 +275,21 @@ def mem64 : Operand<i64> {
   let MIOperandInfo = (ops CPU64Regs, simm16_64);
   let EncoderMethod = "getMemEncoding";
   let ParserMatchClass = MipsMemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 def mem_ea : Operand<i32> {
   let PrintMethod = "printMemOperandEA";
   let MIOperandInfo = (ops CPURegs, simm16);
   let EncoderMethod = "getMemEncoding";
+  let OperandType = "OPERAND_MEMORY";
 }
 
 def mem_ea_64 : Operand<i64> {
   let PrintMethod = "printMemOperandEA";
   let MIOperandInfo = (ops CPU64Regs, simm16_64);
   let EncoderMethod = "getMemEncoding";
+  let OperandType = "OPERAND_MEMORY";
 }
 
 // size operand of ext instruction
@@ -360,11 +375,9 @@ class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
                   SDPatternOperator OpNode = null_frag>:
   InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+         [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR, opstr> {
   let isCommutable = isComm;
   let isReMaterializable = 1;
-  string BaseOpcode;
-  string Arch;
 }
 
 // Arithmetic and logical instructions with 2 register operands.
@@ -373,15 +386,15 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
                   SDPatternOperator OpNode = null_frag> :
   InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
          !strconcat(opstr, "\t$rt, $rs, $imm16"),
-         [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
+         [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))],
+         IIAlu, FrmI, opstr> {
   let isReMaterializable = 1;
 }
 
 // Arithmetic Multiply ADD/SUB
-class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> :
+class MArithR<string opstr, bit isComm = 0> :
   InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt),
-         !strconcat(opstr, "\t$rs, $rt"),
-         [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> {
+         !strconcat(opstr, "\t$rs, $rt"), [], IIImul, FrmR> {
   let Defs = [HI, LO];
   let Uses = [HI, LO];
   let isCommutable = isComm;
@@ -391,7 +404,7 @@ class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> :
 class LogicNOR<string opstr, RegisterOperand RC>:
   InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
+         [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR, opstr> {
   let isCommutable = 1;
 }
 
@@ -427,33 +440,39 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 
 // Memory Load/Store
 class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
-           Operand MemOpnd> :
+           Operand MemOpnd, ComplexPattern Addr> :
   InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
-         [(set RC:$rt, (OpNode addr:$addr))], NoItinerary, FrmI> {
+         [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> {
   let DecoderMethod = "DecodeMem";
   let canFoldAsLoad = 1;
+  let mayLoad = 1;
 }
 
 class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
-            Operand MemOpnd> :
+            Operand MemOpnd, ComplexPattern Addr> :
   InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
-         [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+         [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> {
   let DecoderMethod = "DecodeMem";
+  let mayStore = 1;
 }
 
 multiclass LoadM<string opstr, RegisterClass RC,
-                 SDPatternOperator OpNode = null_frag> {
-  def NAME : Load<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>;
-  def _P8  : Load<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> {
+                 SDPatternOperator OpNode = null_frag,
+                 ComplexPattern Addr = addr> {
+  def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Load<opstr, OpNode, RC, mem64, Addr>,
+             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
 }
 
 multiclass StoreM<string opstr, RegisterClass RC,
-                  SDPatternOperator OpNode = null_frag> {
-  def NAME : Store<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>;
-  def _P8  : Store<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> {
+                  SDPatternOperator OpNode = null_frag,
+                  ComplexPattern Addr = addr> {
+  def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Store<opstr, OpNode, RC, mem64, Addr>,
+             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -523,14 +542,15 @@ class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
 class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
   InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
+         [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))],
+         IIAlu, FrmR, opstr>;
 
 class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
               RegisterClass RC>:
   InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
          !strconcat(opstr, "\t$rt, $rs, $imm16"),
          [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))],
-         IIAlu, FrmI>;
+         IIAlu, FrmI, opstr>;
 
 // Jump
 class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
@@ -617,17 +637,40 @@ class SYNC_FT :
 class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
            list<Register> DefRegs> :
   InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
-         itin, FrmR> {
+         itin, FrmR, opstr> {
   let isCommutable = 1;
   let Defs = DefRegs;
   let neverHasSideEffects = 1;
 }
 
-class Div<SDNode op, string opstr, InstrItinClass itin, RegisterOperand RO,
+// Pseudo multiply/divide instruction with explicit accumulator register
+// operands.
+class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1,
+                    SDPatternOperator OpNode, InstrItinClass Itin,
+                    bit IsComm = 1, bit HasSideEffects = 0> :
+  PseudoSE<(outs R0:$ac), (ins R1:$rs, R1:$rt),
+           [(set R0:$ac, (OpNode R1:$rs, R1:$rt))], Itin>,
+  PseudoInstExpansion<(RealInst R1:$rs, R1:$rt)> {
+  let isCommutable = IsComm;
+  let hasSideEffects = HasSideEffects;
+}
+
+// Pseudo multiply add/sub instruction with explicit accumulator register
+// operands.
+class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
+  : PseudoSE<(outs ACRegs:$ac),
+             (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin),
+             [(set ACRegs:$ac,
+              (OpNode CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin))],
+             IIImul>,
+    PseudoInstExpansion<(RealInst CPURegsOpnd:$rs, CPURegsOpnd:$rt)> {
+  string Constraints = "$acin = $ac";
+}
+
+class Div<string opstr, InstrItinClass itin, RegisterOperand RO,
           list<Register> DefRegs> :
-  InstSE<(outs), (ins RO:$rs, RO:$rt),
-         !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin,
-         FrmR> {
+  InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"),
+         [], itin, FrmR> {
   let Defs = DefRegs;
 }
 
@@ -790,6 +833,14 @@ let usesCustomInserter = 1 in {
   defm ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap32<atomic_cmp_swap_32>;
 }
 
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+  defm LOAD_AC64  : LoadM<"load_ac64", ACRegs>;
+  defm STORE_AC64 : StoreM<"store_ac64", ACRegs>;
+}
+
+def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
+
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
@@ -798,31 +849,39 @@ let usesCustomInserter = 1 in {
 //===----------------------------------------------------------------------===//
 
 /// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
+def ADDiu : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
             ADDI_FM<0x9>, IsAsCheapAsAMove;
-def ADDi  : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
-def SLTi  : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
-def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
-def ANDi  : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+def ADDi  : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
+def SLTi  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>,
+            SLTI_FM<0xa>;
+def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>,
+            SLTI_FM<0xb>;
+def ANDi  : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
             ADDI_FM<0xc>;
-def ORi   : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+def ORi   : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
             ADDI_FM<0xd>;
-def XORi  : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+def XORi  : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
             ADDI_FM<0xe>;
-def LUi   : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
+def LUi   : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
 
 /// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
-def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
-def MUL  : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
-def ADD  : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
-def SUB  : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
-def SLT  : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
-def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
-def AND  : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
-def OR   : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
-def XOR  : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
-def NOR  : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
+def ADDu  : MMRel, ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>,
+            ADD_FM<0, 0x21>;
+def SUBu  : MMRel, ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>,
+            ADD_FM<0, 0x23>;
+def MUL   : MMRel, ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>,
+            ADD_FM<0x1c, 2>;
+def ADD   : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB   : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
+def SLT   : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
+def SLTu  : MMRel, SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
+def AND   : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>,
+            ADD_FM<0, 0x24>;
+def OR    : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>,
+            ADD_FM<0, 0x25>;
+def XOR   : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>,
+            ADD_FM<0, 0x26>;
+def NOR   : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
 
 /// Shift Instructions
 def SLL  : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
@@ -845,10 +904,10 @@ let Predicates = [HasMips32r2, HasStdEnc] in {
 /// Load and Store Instructions
 ///  aligned
 defm LB  : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
-defm LBu : LoadM<"lbu", CPURegs, zextloadi8>, LW_FM<0x24>;
-defm LH  : LoadM<"lh", CPURegs, sextloadi16>, LW_FM<0x21>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>;
+defm LH  : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>;
 defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
-defm LW  : LoadM<"lw", CPURegs, load>, LW_FM<0x23>;
+defm LW  : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>;
 defm SB  : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
 defm SH  : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
 defm SW  : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
@@ -918,12 +977,17 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
 }
 
 /// Multiply and Divide Instructions.
-def MULT  : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
-def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
-def SDIV  : Div<MipsDivRem, "div", IIIdiv, CPURegsOpnd, [HI, LO]>,
-            MULT_FM<0, 0x1a>;
-def UDIV  : Div<MipsDivRemU, "divu", IIIdiv, CPURegsOpnd, [HI, LO]>,
-            MULT_FM<0, 0x1b>;
+def MULT  : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>,
+            MULT_FM<0, 0x18>;
+def MULTu : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>,
+            MULT_FM<0, 0x19>;
+def PseudoMULT  : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>;
+def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>;
+def SDIV  : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
+def UDIV  : Div<"divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>;
+def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, CPURegsOpnd, MipsDivRem, IIIdiv, 0>;
+def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, CPURegsOpnd, MipsDivRemU, IIIdiv,
+                               0>;
 
 def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>;
 def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>;
@@ -951,10 +1015,14 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
 def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>;
 
 // MADD*/MSUB*
-def MADD  : MArithR<"madd", MipsMAdd, 1>, MULT_FM<0x1c, 0>;
-def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>;
-def MSUB  : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>;
-def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>;
+def MADD  : MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
+def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
+def MSUB  : MArithR<"msub">, MULT_FM<0x1c, 4>;
+def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>;
+def PseudoMADD  : MAddSubPseudo<MADD, MipsMAdd>;
+def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>;
+def PseudoMSUB  : MAddSubPseudo<MSUB, MipsMSub>;
+def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>;
 
 def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM;
 
@@ -997,6 +1065,9 @@ def : InstAlias<"and $rs, $rt, $imm",
 def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>,
       Requires<[NotMips64]>;
 def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rs", (JALR RA, CPURegs:$rs), 0>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR CPURegs:$rd, CPURegs:$rs), 0>,
+                 Requires<[NotMips64]>;
 def : InstAlias<"not $rt, $rs",
                 (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>;
 def : InstAlias<"neg $rt, $rs",
@@ -1006,8 +1077,11 @@ def : InstAlias<"negu $rt, $rs",
 def : InstAlias<"slt $rs, $rt, $imm",
                 (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>;
 def : InstAlias<"xor $rs, $rt, $imm",
-                (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+                (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>,
       Requires<[NotMips64]>;
+def : InstAlias<"or $rs, $rt, $imm",
+                (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>,
+                 Requires<[NotMips64]>;
 def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
 def : InstAlias<"mfc0 $rt, $rd",
                 (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
@@ -1043,6 +1117,13 @@ def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>;
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
 
+// Load/store pattern templates.
+class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> :
+  MipsPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>;
+
+class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> :
+  MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>;
+
 // Small immediates
 def : MipsPat<(i32 immSExt16:$in),
               (ADDiu ZERO, imm:$in)>;
@@ -1058,10 +1139,12 @@ def : MipsPat<(i32 imm:$imm),
 // Carry MipsPatterns
 def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs),
               (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
-def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
-              (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
-def : MipsPat<(addc  CPURegs:$src, immSExt16:$imm),
-              (ADDiu CPURegs:$src, imm:$imm)>;
+let Predicates = [HasStdEnc, NotDSP] in {
+  def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
+                (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+  def : MipsPat<(addc  CPURegs:$src, immSExt16:$imm),
+                (ADDiu CPURegs:$src, imm:$imm)>;
+}
 
 // Call
 def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
@@ -1220,6 +1303,24 @@ defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
 // bswap pattern
 def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
 
+// mflo/hi patterns.
+def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)),
+              (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>;
+
+// Load halfword/word patterns.
+let AddedComplexity = 40 in {
+  let Predicates = [NotN64, HasStdEnc] in {
+    def : LoadRegImmPat<LBu, i32, zextloadi8>;
+    def : LoadRegImmPat<LH, i32, sextloadi16>;
+    def : LoadRegImmPat<LW, i32, load>;
+  }
+  let Predicates = [IsN64, HasStdEnc] in {
+    def : LoadRegImmPat<LBu_P8, i32, zextloadi8>;
+    def : LoadRegImmPat<LH_P8, i32, sextloadi16>;
+    def : LoadRegImmPat<LW_P8, i32, load>;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Floating Point Support
 //===----------------------------------------------------------------------===//
@@ -1238,3 +1339,6 @@ include "Mips16InstrInfo.td"
 include "MipsDSPInstrFormats.td"
 include "MipsDSPInstrInfo.td"
 
+// Micromips
+include "MicroMipsInstrFormats.td"
+include "MicroMipsInstrInfo.td"
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 2efe534053a2..bf5ad3703119 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -399,6 +399,8 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
 }
 
 bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
+  if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+    return false;
   if ((TM.getRelocationModel() == Reloc::PIC_) &&
       TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
       F.getInfo<MipsFunctionInfo>()->globalBaseRegSet())
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
new file mode 100644
index 000000000000..c6abf17df353
--- /dev/null
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+// Instruction Selector Subtarget Control
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// This file defines a pass used to change the subtarget for the
+// Mips Instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsISelDAGToDAG.h"
+#include "MipsModuleISelDAGToDAG.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
+  const_cast<MipsSubtarget&>(Subtarget).resetSubtarget(&MF);
+  return false;
+}
+
+char MipsModuleDAGToDAGISel::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) {
+  return new MipsModuleDAGToDAGISel(TM);
+}
+
+
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
new file mode 100644
index 000000000000..fda35ae288bb
--- /dev/null
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
@@ -0,0 +1,66 @@
+//===---- MipsModuleISelDAGToDAG.h -  Change Subtarget             --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass used to change the subtarget for the
+// Mips Instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMODULEISELDAGTODAG_H
+#define MIPSMODULEISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsModuleDAGToDAGISel : public MachineFunctionPass {
+public:
+
+  static char ID;
+
+  explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
+    : MachineFunctionPass(ID),
+      TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MIPS DAG->DAG Pattern Instruction Selection";
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual SDNode *Select(SDNode *N) {
+    llvm_unreachable("unexpected");
+  }
+
+protected:
+  /// Keep a pointer to the MipsSubtarget around so that we can make the right
+  /// decision when generating code for different targets.
+  const TargetMachine &TM;
+  const MipsSubtarget &Subtarget;
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM);
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
new file mode 100644
index 000000000000..aabc4663698a
--- /dev/null
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -0,0 +1,113 @@
+//===---- MipsOs16.cpp for Mips Option -Os16                         --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an optimization phase for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-os16"
+#include "MipsOs16.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace {
+
+  // Figure out if we need float point based on the function signature.
+  // We need to move variables in and/or out of floating point
+  // registers because of the ABI
+  //
+  bool needsFPFromSig(Function &F) {
+    Type* RetType = F.getReturnType();
+    switch (RetType->getTypeID()) {
+    case Type::FloatTyID:
+    case Type::DoubleTyID:
+      return true;
+    default:
+      ;
+    }
+    if (F.arg_size() >=1) {
+      Argument &Arg = F.getArgumentList().front();
+      switch (Arg.getType()->getTypeID()) {
+        case Type::FloatTyID:
+        case Type::DoubleTyID:
+          return true;
+        default:
+          ;
+      }
+    }
+    return false;
+  }
+
+  // Figure out if the function will need floating point operations
+  //
+  bool needsFP(Function &F) {
+    if (needsFPFromSig(F))
+      return true;
+    for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+        const Instruction &Inst = *I;
+        switch (Inst.getOpcode()) {
+        case Instruction::FAdd:
+        case Instruction::FSub:
+        case Instruction::FMul:
+        case Instruction::FDiv:
+        case Instruction::FRem:
+        case Instruction::FPToUI:
+        case Instruction::FPToSI:
+        case Instruction::UIToFP:
+        case Instruction::SIToFP:
+        case Instruction::FPTrunc:
+        case Instruction::FPExt:
+        case Instruction::FCmp:
+          return true;
+        default:
+          ;
+        }
+        if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+          DEBUG(dbgs() << "Working on call" << "\n");
+          Function &F_ =  *CI->getCalledFunction();
+          if (needsFPFromSig(F_))
+            return true;
+        }
+      }
+    return false;
+  }
+}
+namespace llvm {
+
+
+bool MipsOs16::runOnModule(Module &M) {
+  DEBUG(errs() << "Run on Module MipsOs16\n");
+  bool modified = false;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    DEBUG(dbgs() << "Working on " << F->getName() << "\n");
+    if (needsFP(*F)) {
+      DEBUG(dbgs() << " need to compile as nomips16 \n");
+      F->addFnAttr("nomips16");
+    }
+    else {
+      F->addFnAttr("mips16");
+      DEBUG(dbgs() << " no need to compile as nomips16 \n");
+    }
+  }
+  return modified;
+}
+
+char MipsOs16::ID = 0;
+
+}
+
+ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) {
+  return new MipsOs16;
+}
+
+
diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h
new file mode 100644
index 000000000000..21beef8549cd
--- /dev/null
+++ b/lib/Target/Mips/MipsOs16.h
@@ -0,0 +1,49 @@
+//===---- MipsOs16.h for Mips Option -Os16                         --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an optimization phase for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+
+
+#ifndef MIPSOS16_H
+#define MIPSOS16_H
+
+using namespace llvm;
+
+namespace llvm {
+
+class MipsOs16 : public ModulePass {
+
+public:
+  static char ID;
+
+  MipsOs16() : ModulePass(ID) {
+
+  }
+
+  virtual const char *getPassName() const {
+    return "MIPS Os16 Optimization";
+  }
+
+  virtual bool runOnModule(Module &M);
+
+};
+
+ModulePass *createMipsOs16(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 3c210e71bb7b..5ed51241391f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -68,6 +68,9 @@ class MipsRegisterInfo : public MipsGenRegisterInfo {
   unsigned getEHExceptionRegister() const;
   unsigned getEHHandlerRegister() const;
 
+  /// \brief Return GPR register class.
+  virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
+
 private:
   virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
                            int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 6d76e8a54a15..30699ac42105 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -58,6 +58,13 @@ class AFPR64<bits<16> Enc, string n, list<Register> subregs>
   let SubRegIndices = [sub_32];
 }
 
+// Accumulator Registers
+class ACC<bits<16> Enc, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<Enc, n, subregs> {
+  let SubRegIndices = [sub_lo, sub_hi];
+  let CoveredBySubRegs = 1;
+}
+
 // Mips Hardware Registers
 class HWR<bits<16> Enc, string n> : MipsReg<Enc, n>;
 
@@ -222,8 +229,14 @@ let Namespace = "Mips" in {
   def D31_64  : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>;
 
   // Hi/Lo registers
-  def HI  : Register<"hi">, DwarfRegNum<[64]>;
-  def LO  : Register<"lo">, DwarfRegNum<[65]>;
+  def HI  : Register<"ac0">, DwarfRegNum<[64]>;
+  def HI1 : Register<"ac1">, DwarfRegNum<[176]>;
+  def HI2 : Register<"ac2">, DwarfRegNum<[178]>;
+  def HI3 : Register<"ac3">, DwarfRegNum<[180]>;
+  def LO  : Register<"ac0">, DwarfRegNum<[65]>;
+  def LO1 : Register<"ac1">, DwarfRegNum<[177]>;
+  def LO2 : Register<"ac2">, DwarfRegNum<[179]>;
+  def LO3 : Register<"ac3">, DwarfRegNum<[181]>;
 
   let SubRegIndices = [sub_32] in {
   def HI64  : RegisterWithSubRegs<"hi", [HI]>;
@@ -244,11 +257,12 @@ let Namespace = "Mips" in {
   def HWR29_64 : MipsReg<29, "29">;
 
   // Accum registers
-  let SubRegIndices = [sub_lo, sub_hi] in
-  def AC0 : MipsRegWithSubRegs<0, "ac0", [LO, HI]>;
-  def AC1 : MipsReg<1, "ac1">;
-  def AC2 : MipsReg<2, "ac2">;
-  def AC3 : MipsReg<3, "ac3">;
+  def AC0 : ACC<0, "ac0", [LO, HI]>;
+  def AC1 : ACC<1, "ac1", [LO1, HI1]>;
+  def AC2 : ACC<2, "ac2", [LO2, HI2]>;
+  def AC3 : ACC<3, "ac3", [LO3, HI3]>;
+
+  def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
 
   def DSPCtrl : Register<"dspctrl">;
 }
@@ -328,14 +342,25 @@ def CCR  : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
 // Hi/Lo Registers
 def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
 def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
+def LORegsDSP : RegisterClass<"Mips", [i32], 32, (add LO, LO1, LO2, LO3)>;
+def HIRegsDSP : RegisterClass<"Mips", [i32], 32, (add HI, HI1, HI2, HI3)>;
 
 // Hardware registers
 def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
 def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
 
 // Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>,
-             Unallocatable;
+def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
+  let Size = 64;
+}
+
+def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
+  let Size = 128;
+}
+
+def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
+  let Size = 64;
+}
 
 def CPURegsAsmOperand : AsmOperandClass {
   let Name = "CPURegsAsm";
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 0dd671376f8e..68ec92188802 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -29,6 +29,155 @@
 
 using namespace llvm;
 
+namespace {
+typedef MachineBasicBlock::iterator Iter;
+
+/// Helper class to expand accumulator pseudos.
+class ExpandACCPseudo {
+public:
+  ExpandACCPseudo(MachineFunction &MF);
+  bool expand();
+
+private:
+  bool expandInstr(MachineBasicBlock &MBB, Iter I);
+  void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+
+  MachineFunction &MF;
+  const MipsSEInstrInfo &TII;
+  const MipsRegisterInfo &RegInfo;
+  MachineRegisterInfo &MRI;
+};
+}
+
+ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_)
+  : MF(MF_),
+    TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())),
+    RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {}
+
+bool ExpandACCPseudo::expand() {
+  bool Expanded = false;
+
+  for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end();
+       BB != BBEnd; ++BB)
+    for (Iter I = BB->begin(), End = BB->end(); I != End;)
+      Expanded |= expandInstr(*BB, I++);
+
+  return Expanded;
+}
+
+bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
+  switch(I->getOpcode()) {
+  case Mips::LOAD_AC64:
+  case Mips::LOAD_AC64_P8:
+  case Mips::LOAD_AC_DSP:
+  case Mips::LOAD_AC_DSP_P8:
+    expandLoad(MBB, I, 4);
+    break;
+  case Mips::LOAD_AC128:
+  case Mips::LOAD_AC128_P8:
+    expandLoad(MBB, I, 8);
+    break;
+  case Mips::STORE_AC64:
+  case Mips::STORE_AC64_P8:
+  case Mips::STORE_AC_DSP:
+  case Mips::STORE_AC_DSP_P8:
+    expandStore(MBB, I, 4);
+    break;
+  case Mips::STORE_AC128:
+  case Mips::STORE_AC128_P8:
+    expandStore(MBB, I, 8);
+    break;
+  case Mips::COPY_AC64:
+  case Mips::COPY_AC_DSP:
+    expandCopy(MBB, I, 4);
+    break;
+  case Mips::COPY_AC128:
+    expandCopy(MBB, I, 8);
+    break;
+  default:
+    return false;
+  }
+
+  MBB.erase(I);
+  return true;
+}
+
+void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
+                                 unsigned RegSize) {
+  //  load $vr0, FI
+  //  copy lo, $vr0
+  //  load $vr1, FI + 4
+  //  copy hi, $vr1
+
+  assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+  unsigned VR0 = MRI.createVirtualRegister(RC);
+  unsigned VR1 = MRI.createVirtualRegister(RC);
+  unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+  unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+  unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+  DebugLoc DL = I->getDebugLoc();
+  const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+
+  TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0);
+  BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill);
+  TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize);
+  BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill);
+}
+
+void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
+                                  unsigned RegSize) {
+  //  copy $vr0, lo
+  //  store $vr0, FI
+  //  copy $vr1, hi
+  //  store $vr1, FI + 4
+
+  assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+  unsigned VR0 = MRI.createVirtualRegister(RC);
+  unsigned VR1 = MRI.createVirtualRegister(RC);
+  unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+  unsigned SrcKill = getKillRegState(I->getOperand(0).isKill());
+  unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo);
+  unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi);
+  DebugLoc DL = I->getDebugLoc();
+
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill);
+  TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0);
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill);
+  TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
+}
+
+void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
+                                 unsigned RegSize) {
+  //  copy $vr0, src_lo
+  //  copy dst_lo, $vr0
+  //  copy $vr1, src_hi
+  //  copy dst_hi, $vr1
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+  unsigned VR0 = MRI.createVirtualRegister(RC);
+  unsigned VR1 = MRI.createVirtualRegister(RC);
+  unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+  unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
+  unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+  unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+  unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo);
+  unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi);
+  DebugLoc DL = I->getDebugLoc();
+
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill);
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo)
+    .addReg(VR0, RegState::Kill);
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
+    .addReg(VR1, RegState::Kill);
+}
+
 unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
   static const unsigned EhDataReg[] = {
     Mips::A0, Mips::A1, Mips::A2, Mips::A3
@@ -246,7 +395,10 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 
   // Reserve call frame if the size of the maximum call frame fits into 16-bit
   // immediate field and there are no variable sized objects on the stack.
-  return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+  // Make sure the second register scavenger spill slot can be accessed with one
+  // instruction.
+  return isInt<16>(MFI->getMaxCallFrameSize() + getStackAlignment()) &&
+    !MFI->hasVarSizedObjects();
 }
 
 // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
@@ -284,6 +436,18 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   if (MipsFI->callsEhReturn())
     MipsFI->createEhDataRegsFI();
 
+  // Expand pseudo instructions which load, store or copy accumulators.
+  // Add an emergency spill slot if a pseudo was expanded.
+  if (ExpandACCPseudo(MF).expand()) {
+    // The spill slot should be half the size of the accumulator. If target is
+    // mips64, it should be 64-bit, otherwise it should be 32-bt.
+    const TargetRegisterClass *RC = STI.hasMips64() ?
+      &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+    int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+                                                  RC->getAlignment(), false);
+    RS->addScavengingFrameIndex(FI);
+  }
+
   // Set scavenging frame index if necessary.
   uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
     estimateStackSize(MF);
@@ -295,7 +459,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
   int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
                                                 RC->getAlignment(), false);
-  RS->setScavengingFrameIndex(FI);
+  RS->addScavengingFrameIndex(FI);
 }
 
 const MipsFrameLowering *
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 7becd25455f5..193a66cc65a7 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -21,7 +21,7 @@ namespace llvm {
 class MipsSEFrameLowering : public MipsFrameLowering {
 public:
   explicit MipsSEFrameLowering(const MipsSubtarget &STI)
-    : MipsFrameLowering(STI) {}
+    : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {}
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index e22c3c8b3fda..b54f1f490aa8 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -35,6 +35,11 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
+bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  if (Subtarget.inMips16Mode())
+    return false;
+  return MipsDAGToDAGISel::runOnMachineFunction(MF);
+}
 
 bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
                                                 const MachineInstr& MI) {
@@ -177,27 +182,6 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
       replaceUsesWithZeroReg(MRI, *I);
 }
 
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
-                               bool HasLo, bool HasHi) {
-  SDNode *Lo = 0, *Hi = 0;
-  SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
-                                       N->getOperand(1));
-  SDValue InFlag = SDValue(Mul, 0);
-
-  if (HasLo) {
-    unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
-    Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
-    InFlag = SDValue(Lo, 1);
-  }
-  if (HasHi) {
-    unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
-    Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
-  }
-  return std::make_pair(Lo, Hi);
-}
-
 SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
                                            SDValue CmpLHS, DebugLoc DL,
                                            SDNode *Node) const {
@@ -211,7 +195,7 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
   SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
   EVT VT = LHS.getValueType();
 
-  SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+  SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops);
   SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
                                             SDValue(Carry, 0), RHS);
   return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
@@ -307,9 +291,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   // Instruction Selection not handled by the auto-generated
   // tablegen selection should be handled here.
   ///
-  EVT NodeTy = Node->getValueType(0);
   SDNode *Result;
-  unsigned MultOpc;
 
   switch(Opcode) {
   default: break;
@@ -321,51 +303,13 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   }
 
   case ISD::ADDE: {
+    if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
+      break;
     SDValue InFlag = Node->getOperand(2);
     Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
     return std::make_pair(true, Result);
   }
 
-  /// Mul with two results
-  case ISD::SMUL_LOHI:
-  case ISD::UMUL_LOHI: {
-    if (NodeTy == MVT::i32)
-      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
-    else
-      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
-    std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
-                                                  true, true);
-
-    if (!SDValue(Node, 0).use_empty())
-      ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
-    if (!SDValue(Node, 1).use_empty())
-      ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
-    return std::make_pair(true, (SDNode*)NULL);
-  }
-
-  /// Special Muls
-  case ISD::MUL: {
-    // Mips32 has a 32-bit three operand mul instruction.
-    if (Subtarget.hasMips32() && NodeTy == MVT::i32)
-      break;
-    MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
-    Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
-    return std::make_pair(true, Result);
-  }
-  case ISD::MULHS:
-  case ISD::MULHU: {
-    if (NodeTy == MVT::i32)
-      MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
-    else
-      MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
-    Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
-    return std::make_pair(true, Result);
-  }
-
   case ISD::ConstantFP: {
     ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
     if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
@@ -450,6 +394,19 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
     ReplaceUses(SDValue(Node, 0), ResNode);
     return std::make_pair(true, ResNode.getNode());
   }
+
+  case MipsISD::InsertLOHI: {
+    unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID :
+                                         Mips::ACRegsRegClassID;
+    SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32);
+    SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32);
+    SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32);
+    const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
+                            Node->getOperand(1), HiIdx };
+    SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+                                         MVT::Untyped, Ops);
+    return std::make_pair(true, Res);
+  }
   }
 
   return std::make_pair(false, (SDNode*)NULL);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 6137ab040bbc..0dae73dd5ed7 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -24,6 +24,9 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
   explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
 
 private:
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
   bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
 
   std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 287e2ede0ec0..efadaaa1f729 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
@@ -27,6 +28,9 @@ EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
 MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
   : MipsTargetLowering(TM) {
   // Set up the register classes
+
+  clearRegisterClasses();
+
   addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
 
   if (HasMips64)
@@ -42,12 +46,21 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
       for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
         setOperationAction(Opc, VecTys[i], Expand);
 
+      setOperationAction(ISD::ADD, VecTys[i], Legal);
+      setOperationAction(ISD::SUB, VecTys[i], Legal);
       setOperationAction(ISD::LOAD, VecTys[i], Legal);
       setOperationAction(ISD::STORE, VecTys[i], Legal);
       setOperationAction(ISD::BITCAST, VecTys[i], Legal);
     }
+
+    setTargetDAGCombine(ISD::SHL);
+    setTargetDAGCombine(ISD::SRA);
+    setTargetDAGCombine(ISD::SRL);
   }
 
+  if (Subtarget->hasDSPR2())
+    setOperationAction(ISD::MUL, MVT::v2i16, Legal);
+
   if (!TM.Options.UseSoftFloat) {
     addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
 
@@ -60,11 +73,32 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
     }
   }
 
+  setOperationAction(ISD::SMUL_LOHI,          MVT::i32, Custom);
+  setOperationAction(ISD::UMUL_LOHI,          MVT::i32, Custom);
+  setOperationAction(ISD::MULHS,              MVT::i32, Custom);
+  setOperationAction(ISD::MULHU,              MVT::i32, Custom);
+
+  if (HasMips64) {
+    setOperationAction(ISD::MULHS,            MVT::i64, Custom);
+    setOperationAction(ISD::MULHU,            MVT::i64, Custom);
+    setOperationAction(ISD::MUL,              MVT::i64, Custom);
+  }
+
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN,  MVT::i64, Custom);
+
+  setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+  setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+  setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
   setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
   setOperationAction(ISD::LOAD,               MVT::i32, Custom);
   setOperationAction(ISD::STORE,              MVT::i32, Custom);
 
+  setTargetDAGCombine(ISD::ADDE);
+  setTargetDAGCombine(ISD::SUBE);
+
   computeRegisterProperties();
 }
 
@@ -89,6 +123,274 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
   }
 }
 
+SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  switch(Op.getOpcode()) {
+  case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
+  case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
+  case ISD::MULHS:     return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
+  case ISD::MULHU:     return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
+  case ISD::MUL:       return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
+  case ISD::SDIVREM:   return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
+  case ISD::UDIVREM:   return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
+  }
+
+  return MipsTargetLowering::LowerOperation(Op, DAG);
+}
+
+// selectMADD -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
+  // ADDENode's second operand must be a flag output of an ADDC node in order
+  // for the matching to be successful.
+  SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
+
+  if (ADDCNode->getOpcode() != ISD::ADDC)
+    return false;
+
+  SDValue MultHi = ADDENode->getOperand(0);
+  SDValue MultLo = ADDCNode->getOperand(0);
+  SDNode *MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than ADDENode or ADDCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MADD instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  DebugLoc DL = ADDENode->getDebugLoc();
+
+  // Initialize accumulator.
+  SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+                                  ADDCNode->getOperand(1),
+                                  ADDENode->getOperand(1));
+
+  // create MipsMAdd(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+  SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 ACCIn);
+
+  // replace uses of adde and addc here
+  if (!SDValue(ADDCNode, 0).use_empty()) {
+    SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+    SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+                                    LoIdx);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
+  }
+  if (!SDValue(ADDENode, 0).use_empty()) {
+    SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+    SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+                                    HiIdx);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
+  }
+
+  return true;
+}
+
+// selectMSUB -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
+  // SUBENode's second operand must be a flag output of an SUBC node in order
+  // for the matching to be successful.
+  SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
+
+  if (SUBCNode->getOpcode() != ISD::SUBC)
+    return false;
+
+  SDValue MultHi = SUBENode->getOperand(1);
+  SDValue MultLo = SUBCNode->getOperand(1);
+  SDNode *MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than SUBENode or SUBCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MSUB instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  DebugLoc DL = SUBENode->getDebugLoc();
+
+  // Initialize accumulator.
+  SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+                                  SUBCNode->getOperand(0),
+                                  SUBENode->getOperand(0));
+
+  // create MipsSub(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+  SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 ACCIn);
+
+  // replace uses of sube and subc here
+  if (!SDValue(SUBCNode, 0).use_empty()) {
+    SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+    SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+                                    LoIdx);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
+  }
+  if (!SDValue(SUBENode, 0).use_empty()) {
+    SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+    SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+                                    HiIdx);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
+  }
+
+  return true;
+}
+
+static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget *Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+      selectMADD(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget *Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+      selectMSUB(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
+                                      SelectionDAG &DAG,
+                                      const MipsSubtarget *Subtarget) {
+  // See if this is a vector splat immediate node.
+  APInt SplatValue, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
+  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+
+  if (!BV || !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+                                  HasAnyUndefs, EltSize,
+                                  !Subtarget->isLittle()))
+    return SDValue();
+
+  return DAG.getNode(Opc, N->getDebugLoc(), Ty, N->getOperand(0),
+                     DAG.getConstant(SplatValue.getZExtValue(), MVT::i32));
+}
+
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
+}
+
+static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2()))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
+}
+
+
+static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
+}
+
+SDValue
+MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  switch (N->getOpcode()) {
+  case ISD::ADDE:
+    return performADDECombine(N, DAG, DCI, Subtarget);
+  case ISD::SUBE:
+    return performSUBECombine(N, DAG, DCI, Subtarget);
+  case ISD::SHL:
+    return performSHLCombine(N, DAG, DCI, Subtarget);
+  case ISD::SRA:
+    return performSRACombine(N, DAG, DCI, Subtarget);
+  case ISD::SRL:
+    return performSRLCombine(N, DAG, DCI, Subtarget);
+  default:
+    return MipsTargetLowering::PerformDAGCombine(N, DCI);
+  }
+}
+
 MachineBasicBlock *
 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                   MachineBasicBlock *BB) const {
@@ -133,6 +435,194 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
                                   InternalLinkage, CLI, Callee, Chain);
 }
 
+SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
+                                          bool HasLo, bool HasHi,
+                                          SelectionDAG &DAG) const {
+  EVT Ty = Op.getOperand(0).getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
+                             Op.getOperand(0), Op.getOperand(1));
+  SDValue Lo, Hi;
+
+  if (HasLo)
+    Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+                     DAG.getConstant(Mips::sub_lo, MVT::i32));
+  if (HasHi)
+    Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+                     DAG.getConstant(Mips::sub_hi, MVT::i32));
+
+  if (!HasLo || !HasHi)
+    return HasLo ? Lo : Hi;
+
+  SDValue Vals[] = { Lo, Hi };
+  return DAG.getMergeValues(Vals, 2, DL);
+}
+
+
+static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
+  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+                             DAG.getConstant(0, MVT::i32));
+  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+                             DAG.getConstant(1, MVT::i32));
+  return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+}
+
+static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
+  SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+                           DAG.getConstant(Mips::sub_lo, MVT::i32));
+  SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+                           DAG.getConstant(Mips::sub_hi, MVT::i32));
+  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
+// This function expands mips intrinsic nodes which have 64-bit input operands
+// or output values.
+//
+// out64 = intrinsic-node in64
+// =>
+// lo = copy (extract-element (in64, 0))
+// hi = copy (extract-element (in64, 1))
+// mips-specific-node
+// v0 = copy lo
+// v1 = copy hi
+// out64 = merge-values (v0, v1)
+//
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
+  DebugLoc DL = Op.getDebugLoc();
+  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
+  SmallVector<SDValue, 3> Ops;
+  unsigned OpNo = 0;
+
+  // See if Op has a chain input.
+  if (HasChainIn)
+    Ops.push_back(Op->getOperand(OpNo++));
+
+  // The next operand is the intrinsic opcode.
+  assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
+
+  // See if the next operand has type i64.
+  SDValue Opnd = Op->getOperand(++OpNo), In64;
+
+  if (Opnd.getValueType() == MVT::i64)
+    In64 = initAccumulator(Opnd, DL, DAG);
+  else
+    Ops.push_back(Opnd);
+
+  // Push the remaining operands.
+  for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
+    Ops.push_back(Op->getOperand(OpNo));
+
+  // Add In64 to the end of the list.
+  if (In64.getNode())
+    Ops.push_back(In64);
+
+  // Scan output.
+  SmallVector<EVT, 2> ResTys;
+
+  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
+       I != E; ++I)
+    ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+
+  // Create node.
+  SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+  SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
+
+  if (!HasChainIn)
+    return Out;
+
+  assert(Val->getValueType(1) == MVT::Other);
+  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
+  return DAG.getMergeValues(Vals, 2, DL);
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
+  default:
+    return SDValue();
+  case Intrinsic::mips_shilo:
+    return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
+  case Intrinsic::mips_dpau_h_qbl:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
+  case Intrinsic::mips_dpau_h_qbr:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
+  case Intrinsic::mips_dpsu_h_qbl:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
+  case Intrinsic::mips_dpsu_h_qbr:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
+  case Intrinsic::mips_dpa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
+  case Intrinsic::mips_dps_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
+  case Intrinsic::mips_dpax_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
+  case Intrinsic::mips_dpsx_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
+  case Intrinsic::mips_mulsa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
+  case Intrinsic::mips_mult:
+    return lowerDSPIntr(Op, DAG, MipsISD::Mult);
+  case Intrinsic::mips_multu:
+    return lowerDSPIntr(Op, DAG, MipsISD::Multu);
+  case Intrinsic::mips_madd:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
+  case Intrinsic::mips_maddu:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
+  case Intrinsic::mips_msub:
+    return lowerDSPIntr(Op, DAG, MipsISD::MSub);
+  case Intrinsic::mips_msubu:
+    return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
+  }
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+  default:
+    return SDValue();
+  case Intrinsic::mips_extp:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
+  case Intrinsic::mips_extpdp:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
+  case Intrinsic::mips_extr_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
+  case Intrinsic::mips_extr_r_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
+  case Intrinsic::mips_extr_rs_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
+  case Intrinsic::mips_extr_s_h:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
+  case Intrinsic::mips_mthlip:
+    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
+  case Intrinsic::mips_mulsaq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
+  case Intrinsic::mips_maq_s_w_phl:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
+  case Intrinsic::mips_maq_s_w_phr:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
+  case Intrinsic::mips_maq_sa_w_phl:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
+  case Intrinsic::mips_maq_sa_w_phr:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
+  case Intrinsic::mips_dpaq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
+  case Intrinsic::mips_dpsq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
+  case Intrinsic::mips_dpaq_sa_l_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
+  case Intrinsic::mips_dpsq_sa_l_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
+  case Intrinsic::mips_dpaqx_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
+  case Intrinsic::mips_dpaqx_sa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
+  case Intrinsic::mips_dpsqx_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
+  case Intrinsic::mips_dpsqx_sa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
+  }
+}
+
 MachineBasicBlock * MipsSETargetLowering::
 emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
   // $bb:
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 04a28ce54c13..ec8a5c73f1a4 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -15,6 +15,7 @@
 #define MipsSEISELLOWERING_H
 
 #include "MipsISelLowering.h"
+#include "MipsRegisterInfo.h"
 
 namespace llvm {
   class MipsSETargetLowering : public MipsTargetLowering  {
@@ -23,9 +24,26 @@ namespace llvm {
 
     virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
 
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
     virtual MachineBasicBlock *
     EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
 
+    virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+                                    EVT VT) const {
+      return false;
+    }
+
+    virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
+      if (VT == MVT::Untyped)
+        return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
+                                     &Mips::ACRegsRegClass;
+
+      return TargetLowering::getRepRegClassFor(VT);
+    }
+
   private:
     virtual bool
     isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
@@ -38,6 +56,12 @@ namespace llvm {
                 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
                 CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
 
+    SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
+                        SelectionDAG &DAG) const;
+
+    SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+
     MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
                                     MachineBasicBlock *BB) const;
   };
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index a9809ef7126d..ca0315ed9f6e 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -136,6 +136,12 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     else if (Mips::FGR64RegClass.contains(DestReg))
       Opc = Mips::DMTC1;
   }
+  else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg))
+    Opc = Mips::COPY_AC64;
+  else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg))
+    Opc = Mips::COPY_AC_DSP;
+  else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg))
+    Opc = Mips::COPY_AC128;
 
   assert(Opc && "Cannot copy registers");
 
@@ -152,10 +158,10 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 }
 
 void MipsSEInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                    unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC,
-                    const TargetRegisterInfo *TRI) const {
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                unsigned SrcReg, bool isKill, int FI,
+                const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+                int64_t Offset) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -166,6 +172,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
   else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+  else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64;
+  else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
+  else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
   else if (Mips::FGR32RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
   else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -175,15 +187,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   assert(Opc && "Register class not handled!");
   BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
-    .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 }
 
 void MipsSEInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                     unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC,
-                     const TargetRegisterInfo *TRI) const
-{
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                 unsigned DestReg, int FI, const TargetRegisterClass *RC,
+                 const TargetRegisterInfo *TRI, int64_t Offset) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -193,6 +203,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
   else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+  else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64;
+  else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
+  else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
   else if (Mips::FGR32RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
   else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -201,7 +217,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
 
   assert(Opc && "Register class not handled!");
-  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
     .addMemOperand(MMO);
 }
 
@@ -371,6 +387,7 @@ void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB,
   unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR;
   unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
   unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA;
+  unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9;
   unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
   unsigned OffsetReg = I->getOperand(0).getReg();
   unsigned TargetReg = I->getOperand(1).getReg();
@@ -378,6 +395,9 @@ void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB,
   // or   $ra, $v0, $zero
   // addu $sp, $sp, $v1
   // jr   $ra
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), T9)
+        .addReg(TargetReg).addReg(ZERO);
   BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), RA)
       .addReg(TargetReg).addReg(ZERO);
   BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 3e22b33ed745..0bf7876f0fe0 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -49,17 +49,19 @@ class MipsSEInstrInfo : public MipsInstrInfo {
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const;
 
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const;
-
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const;
+  virtual void storeRegToStack(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI,
+                               unsigned SrcReg, bool isKill, int FrameIndex,
+                               const TargetRegisterClass *RC,
+                               const TargetRegisterInfo *TRI,
+                               int64_t Offset) const;
+
+  virtual void loadRegFromStack(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI,
+                                unsigned DestReg, int FrameIndex,
+                                const TargetRegisterClass *RC,
+                                const TargetRegisterInfo *TRI,
+                                int64_t Offset) const;
 
   virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
 
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index a39b393e4e09..96967380b29d 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -54,6 +54,15 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
   return true;
 }
 
+const TargetRegisterClass *
+MipsSERegisterInfo::intRegClass(unsigned Size) const {
+  if (Size == 4)
+    return &Mips::CPURegsRegClass;
+
+  assert(Size == 8);
+  return &Mips::CPU64RegsRegClass;
+}
+
 void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
                                      unsigned OpNo, int FrameIndex,
                                      uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index f6827e966310..2f7c37bb460d 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -31,6 +31,8 @@ class MipsSERegisterInfo : public MipsRegisterInfo {
 
   bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
 
+  virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
+
 private:
   virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
                            int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index e11e5d142b74..14a2b2779512 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -11,29 +11,56 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "mips-subtarget"
+
+#include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
 #include "Mips.h"
 #include "MipsRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
 #include "MipsGenSubtargetInfo.inc"
 
+
 using namespace llvm;
 
+// FIXME: Maybe this should be on by default when Mips16 is specified
+//
+static cl::opt<bool> Mixed16_32(
+  "mips-mixed-16-32",
+  cl::init(false),
+  cl::desc("Allow for a mixture of Mips16 "
+           "and Mips32 code in a single source file"),
+  cl::Hidden);
+
+static cl::opt<bool> Mips_Os16(
+  "mips-os16",
+  cl::init(false),
+  cl::desc("Compile all functions that don' use "
+           "floating point as Mips 16"),
+  cl::Hidden);
+
 void MipsSubtarget::anchor() { }
 
 MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
                              const std::string &FS, bool little,
-                             Reloc::Model _RM) :
+                             Reloc::Model _RM, MipsTargetMachine *_TM) :
   MipsGenSubtargetInfo(TT, CPU, FS),
   MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
   IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
   IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
   HasBitCount(false), HasFPIdx(false),
   InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
-  RM(_RM)
+  AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+  RM(_RM), OverrideMode(NoOverride), TM(_TM)
 {
   std::string CPUName = CPU;
   if (CPUName.empty())
@@ -42,6 +69,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
 
+  PreviousInMips16Mode = InMips16Mode;
+
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
 
@@ -72,3 +101,48 @@ MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                             &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass);
   return OptLevel >= CodeGenOpt::Aggressive;
 }
+
+//FIXME: This logic for reseting the subtarget along with
+// the helper classes can probably be simplified but there are a lot of
+// cases so we will defer rewriting this to later.
+//
+void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
+  bool ChangeToMips16 = false, ChangeToNoMips16 = false;
+  DEBUG(dbgs() << "resetSubtargetFeatures" << "\n");
+  AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+  ChangeToMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                                        "mips16");
+  ChangeToNoMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                                        "nomips16");
+  assert (!(ChangeToMips16 & ChangeToNoMips16) &&
+          "mips16 and nomips16 specified on the same function");
+  if (ChangeToMips16) {
+    if (PreviousInMips16Mode)
+      return;
+    OverrideMode = Mips16Override;
+    PreviousInMips16Mode = true;
+    TM->setHelperClassesMips16();
+    return;
+  } else if (ChangeToNoMips16) {
+    if (!PreviousInMips16Mode)
+      return;
+    OverrideMode = NoMips16Override;
+    PreviousInMips16Mode = false;
+    TM->setHelperClassesMipsSE();
+    return;
+  } else {
+    if (OverrideMode == NoOverride)
+      return;
+    OverrideMode = NoOverride;
+    DEBUG(dbgs() << "back to default" << "\n");
+    if (inMips16Mode() && !PreviousInMips16Mode) {
+      TM->setHelperClassesMips16();
+      PreviousInMips16Mode = true;
+    } else if (!inMips16Mode() && PreviousInMips16Mode) {
+      TM->setHelperClassesMipsSE();
+      PreviousInMips16Mode = false;
+    }
+    return;
+  }
+}
+
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 7a2e47ce5a9d..f2f0e15887e4 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -16,7 +16,9 @@
 
 #include "MCTargetDesc/MipsReginfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -25,6 +27,8 @@
 namespace llvm {
 class StringRef;
 
+class MipsTargetMachine;
+
 class MipsSubtarget : public MipsGenSubtargetInfo {
   virtual void anchor();
 
@@ -89,12 +93,23 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   // InMips16 -- can process Mips16 instructions
   bool InMips16Mode;
 
+  // PreviousInMips16 -- the function we just processed was in Mips 16 Mode
+  bool PreviousInMips16Mode;
+
   // InMicroMips -- can process MicroMips instructions
   bool InMicroMipsMode;
 
   // HasDSP, HasDSPR2 -- supports DSP ASE.
   bool HasDSP, HasDSPR2;
 
+  // Allow mixed Mips16 and Mips32 in one source file
+  bool AllowMixed16_32;
+
+  // Optimize for space by compiling all functions as Mips 16 unless
+  // it needs floating point. Functions needing floating point are
+  // compiled as Mips32
+  bool Os16;
+
   InstrItineraryData InstrItins;
 
   // The instance to the register info section object
@@ -103,6 +118,12 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   // Relocation Model
   Reloc::Model RM;
 
+  // We can override the determination of whether we are in mips16 mode
+  // as from the command line
+  enum {NoOverride, Mips16Override, NoMips16Override} OverrideMode;
+
+  MipsTargetMachine *TM;
+
 public:
   virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                                      AntiDepBreakMode& Mode,
@@ -118,7 +139,8 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   MipsSubtarget(const std::string &TT, const std::string &CPU,
-                const std::string &FS, bool little, Reloc::Model RM);
+                const std::string &FS, bool little, Reloc::Model RM,
+                MipsTargetMachine *TM);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
@@ -137,7 +159,20 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   bool isSingleFloat() const { return IsSingleFloat; }
   bool isNotSingleFloat() const { return !IsSingleFloat; }
   bool hasVFPU() const { return HasVFPU; }
-  bool inMips16Mode() const { return InMips16Mode; }
+  bool inMips16Mode() const {
+    switch (OverrideMode) {
+    case NoOverride:
+      return InMips16Mode;
+    case Mips16Override:
+      return true;
+    case NoMips16Override:
+      return false;
+    }
+    llvm_unreachable("Unexpected mode");
+  }
+  bool inMips16ModeDefault() {
+    return InMips16Mode;
+  }
   bool inMicroMipsMode() const { return InMicroMipsMode; }
   bool hasDSP() const { return HasDSP; }
   bool hasDSPR2() const { return HasDSPR2; }
@@ -153,11 +188,20 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   bool hasBitCount()  const { return HasBitCount; }
   bool hasFPIdx()     const { return HasFPIdx; }
 
+  bool allowMixed16_32() const { return AllowMixed16_32;};
+
+  bool os16() const { return Os16;};
+
   // Grab MipsRegInfo object
   const MipsReginfo &getMReginfo() const { return MRI; }
 
   // Grab relocation model
   Reloc::Model getRelocationModel() const {return RM;}
+
+  /// \brief Reset the subtarget for the Mips target.
+  void resetSubtarget(MachineFunction *MF);
+
+
 };
 } // End llvm namespace
 
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 33363580aba7..ee28e2a122dd 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -15,11 +15,26 @@
 #include "Mips.h"
 #include "MipsFrameLowering.h"
 #include "MipsInstrInfo.h"
+#include "MipsModuleISelDAGToDAG.h"
+#include "MipsOs16.h"
+#include "MipsSEFrameLowering.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSEISelLowering.h"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips16FrameLowering.h"
+#include "Mips16InstrInfo.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips16ISelLowering.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
+
+
 extern "C" void LLVMInitializeMipsTarget() {
   // Register the target.
   RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
@@ -42,7 +57,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
                   CodeGenOpt::Level OL,
                   bool isLittle)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS, isLittle, RM),
+    Subtarget(TT, CPU, FS, isLittle, RM, this),
     DL(isLittle ?
                (Subtarget.isABI_N64() ?
                 "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
@@ -54,9 +69,46 @@ MipsTargetMachine(const Target &T, StringRef TT,
                 "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
     InstrInfo(MipsInstrInfo::create(*this)),
     FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
-    TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
+    TLInfo(MipsTargetLowering::create(*this)),
+    TSInfo(*this), JITInfo() {
+}
+
+
+void MipsTargetMachine::setHelperClassesMips16() {
+  InstrInfoSE.swap(InstrInfo);
+  FrameLoweringSE.swap(FrameLowering);
+  TLInfoSE.swap(TLInfo);
+  if (!InstrInfo16) {
+    InstrInfo.reset(MipsInstrInfo::create(*this));
+    FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
+    TLInfo.reset(MipsTargetLowering::create(*this));
+  } else {
+    InstrInfo16.swap(InstrInfo);
+    FrameLowering16.swap(FrameLowering);
+    TLInfo16.swap(TLInfo);
+  }
+  assert(TLInfo && "null target lowering 16");
+  assert(InstrInfo && "null instr info 16");
+  assert(FrameLowering && "null frame lowering 16");
 }
 
+void MipsTargetMachine::setHelperClassesMipsSE() {
+  InstrInfo16.swap(InstrInfo);
+  FrameLowering16.swap(FrameLowering);
+  TLInfo16.swap(TLInfo);
+  if (!InstrInfoSE) {
+    InstrInfo.reset(MipsInstrInfo::create(*this));
+    FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
+    TLInfo.reset(MipsTargetLowering::create(*this));
+  } else {
+    InstrInfoSE.swap(InstrInfo);
+    FrameLoweringSE.swap(FrameLowering);
+    TLInfoSE.swap(TLInfo);
+  }
+  assert(TLInfo && "null target lowering in SE");
+  assert(InstrInfo && "null instr info SE");
+  assert(FrameLowering && "null frame lowering SE");
+}
 void MipsebTargetMachine::anchor() { }
 
 MipsebTargetMachine::
@@ -90,6 +142,7 @@ class MipsPassConfig : public TargetPassConfig {
     return *getMipsTargetMachine().getSubtargetImpl();
   }
 
+  virtual void addIRPasses();
   virtual bool addInstSelector();
   virtual bool addPreEmitPass();
 };
@@ -99,24 +152,50 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new MipsPassConfig(this, PM);
 }
 
+void MipsPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  if (getMipsSubtarget().os16())
+    addPass(createMipsOs16(getMipsTargetMachine()));
+}
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
 bool MipsPassConfig::addInstSelector() {
-  addPass(createMipsISelDag(getMipsTargetMachine()));
+  if (getMipsSubtarget().allowMixed16_32()) {
+    addPass(createMipsModuleISelDag(getMipsTargetMachine()));
+    addPass(createMips16ISelDag(getMipsTargetMachine()));
+    addPass(createMipsSEISelDag(getMipsTargetMachine()));
+  } else {
+    addPass(createMipsISelDag(getMipsTargetMachine()));
+  }
   return false;
 }
 
+void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  if (Subtarget.allowMixed16_32()) {
+    DEBUG(errs() << "No ");
+    //FIXME: The Basic Target Transform Info
+    // pass needs to become a function pass instead of
+    // being an immutable pass and then this method as it exists now
+    // would be unnecessary.
+    PM.add(createNoTargetTransformInfoPass());
+  } else
+    LLVMTargetMachine::addAnalysisPasses(PM);
+  DEBUG(errs() << "Target Transform Info Pass Added\n");
+}
+
 // Implemented by targets that want to run passes immediately before
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
 bool MipsPassConfig::addPreEmitPass() {
   MipsTargetMachine &TM = getMipsTargetMachine();
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
   addPass(createMipsDelaySlotFillerPass(TM));
 
-  // NOTE: long branch has not been implemented for mips16.
-  if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
+  if (Subtarget.hasStandardEncoding() ||
+      Subtarget.allowMixed16_32())
     addPass(createMipsLongBranchPass(TM));
-  if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+  if (Subtarget.inMips16Mode() ||
+      Subtarget.allowMixed16_32())
     addPass(createMipsConstantIslandPass(TM));
 
   return true;
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 7e5f19226433..ee557084fbbf 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -21,6 +21,8 @@
 #include "MipsSelectionDAGInfo.h"
 #include "MipsSubtarget.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -35,6 +37,12 @@ class MipsTargetMachine : public LLVMTargetMachine {
   OwningPtr<const MipsInstrInfo> InstrInfo;
   OwningPtr<const MipsFrameLowering> FrameLowering;
   OwningPtr<const MipsTargetLowering> TLInfo;
+  OwningPtr<const MipsInstrInfo> InstrInfo16;
+  OwningPtr<const MipsFrameLowering> FrameLowering16;
+  OwningPtr<const MipsTargetLowering> TLInfo16;
+  OwningPtr<const MipsInstrInfo> InstrInfoSE;
+  OwningPtr<const MipsFrameLowering> FrameLoweringSE;
+  OwningPtr<const MipsTargetLowering> TLInfoSE;
   MipsSelectionDAGInfo TSInfo;
   MipsJITInfo JITInfo;
 
@@ -47,6 +55,8 @@ class MipsTargetMachine : public LLVMTargetMachine {
 
   virtual ~MipsTargetMachine() {}
 
+  virtual void addAnalysisPasses(PassManagerBase &PM);
+
   virtual const MipsInstrInfo *getInstrInfo() const
   { return InstrInfo.get(); }
   virtual const TargetFrameLowering *getFrameLowering() const
@@ -73,6 +83,13 @@ class MipsTargetMachine : public LLVMTargetMachine {
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
   virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+
+  // Set helper classes
+  void setHelperClassesMips16();
+
+  void setHelperClassesMipsSE();
+
+
 };
 
 /// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 47baef669611..7da2fed4cd57 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources
   NVPTXAllocaHoisting.cpp
   NVPTXAsmPrinter.cpp
   NVPTXUtilities.cpp
+  NVVMReflect.cpp
   )
 
 add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 454583850b71..b3e8b5d2622d 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -52,25 +52,24 @@ enum PropertyAnnotation {
 };
 
 const unsigned AnnotationNameLen = 8; // length of each annotation name
-const char
-PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
-  "maxntidx",               // PROPERTY_MAXNTID_X
-  "maxntidy",               // PROPERTY_MAXNTID_Y
-  "maxntidz",               // PROPERTY_MAXNTID_Z
-  "reqntidx",               // PROPERTY_REQNTID_X
-  "reqntidy",               // PROPERTY_REQNTID_Y
-  "reqntidz",               // PROPERTY_REQNTID_Z
-  "minctasm",               // PROPERTY_MINNCTAPERSM
-  "texture",                // PROPERTY_ISTEXTURE
-  "surface",                // PROPERTY_ISSURFACE
-  "sampler",                // PROPERTY_ISSAMPLER
-  "rdoimage",               // PROPERTY_ISREADONLY_IMAGE_PARAM
-  "wroimage",               // PROPERTY_ISWRITEONLY_IMAGE_PARAM
-  "kernel",                 // PROPERTY_ISKERNEL_FUNCTION
-  "align",                  // PROPERTY_ALIGN
+const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
+  "maxntidx",                         // PROPERTY_MAXNTID_X
+  "maxntidy",                         // PROPERTY_MAXNTID_Y
+  "maxntidz",                         // PROPERTY_MAXNTID_Z
+  "reqntidx",                         // PROPERTY_REQNTID_X
+  "reqntidy",                         // PROPERTY_REQNTID_Y
+  "reqntidz",                         // PROPERTY_REQNTID_Z
+  "minctasm",                         // PROPERTY_MINNCTAPERSM
+  "texture",                          // PROPERTY_ISTEXTURE
+  "surface",                          // PROPERTY_ISSURFACE
+  "sampler",                          // PROPERTY_ISSAMPLER
+  "rdoimage",                         // PROPERTY_ISREADONLY_IMAGE_PARAM
+  "wroimage",                         // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+  "kernel",                           // PROPERTY_ISKERNEL_FUNCTION
+  "align",                            // PROPERTY_ALIGN
 
-  // last property
-  "proplast",               // PROPERTY_LAST
+              // last property
+  "proplast", // PROPERTY_LAST
 };
 
 // name of named metadata used for global annotations
@@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
 // compiling those .cpp files, hence __attribute__((unused)).
 __attribute__((unused))
 #endif
-static const char* NamedMDForAnnotations = "nvvm.annotations";
+    static const char *NamedMDForAnnotations = "nvvm.annotations";
 
 }
 
-
 #endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 619181994ae8..459cd96cb0cd 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -23,10 +23,9 @@ bool CompileForDebugging;
 // compile for debugging
 static cl::opt<bool, true>
 Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
-      cl::location(CompileForDebugging),
-      cl::init(false));
+      cl::location(CompileForDebugging), cl::init(false));
 
-void NVPTXMCAsmInfo::anchor() { }
+void NVPTXMCAsmInfo::anchor() {}
 
 NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
   Triple TheTriple(TT);
@@ -55,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
   Data32bitsDirective = " .b32 ";
   Data64bitsDirective = " .b64 ";
   PrivateGlobalPrefix = "";
-  ZeroDirective =  " .b8";
+  ZeroDirective = " .b8";
   AsciiDirective = " .b8";
   AscizDirective = " .b8";
 
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 44aa01ca6e30..ccd29705df72 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -28,7 +28,6 @@
 #define GET_REGINFO_MC_DESC
 #include "NVPTXGenRegisterInfo.inc"
 
-
 using namespace llvm;
 
 static MCInstrInfo *createNVPTXMCInstrInfo() {
@@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                   StringRef FS) {
+static MCSubtargetInfo *
+createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
   return X;
 }
 
-static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM,
-                                               CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
+    StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-
 // Force static initialization.
 extern "C" void LLVMInitializeNVPTXTargetMC() {
   // Register the MC asm info.
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index b5684883fc95..d6c79b5110cc 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_SUPPORT_MANAGED_STRING_H
 #define LLVM_SUPPORT_MANAGED_STRING_H
 
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index b46ea881c4b2..6a53a443bfb6 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -41,18 +41,24 @@ enum CondCodes {
 
 inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
   switch (CC) {
-  case NVPTXCC::NE:  return "ne";
-  case NVPTXCC::EQ:   return "eq";
-  case NVPTXCC::LT:   return "lt";
-  case NVPTXCC::LE:  return "le";
-  case NVPTXCC::GT:  return "gt";
-  case NVPTXCC::GE:   return "ge";
+  case NVPTXCC::NE:
+    return "ne";
+  case NVPTXCC::EQ:
+    return "eq";
+  case NVPTXCC::LT:
+    return "lt";
+  case NVPTXCC::LE:
+    return "le";
+  case NVPTXCC::GT:
+    return "gt";
+  case NVPTXCC::GE:
+    return "ge";
   }
   llvm_unreachable("Unknown condition code");
 }
 
-FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
-                                 llvm::CodeGenOpt::Level OptLevel);
+FunctionPass *
+createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
 FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
@@ -62,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *);
 extern Target TheNVPTXTarget32;
 extern Target TheNVPTXTarget64;
 
-namespace NVPTX
-{
+namespace NVPTX {
 enum DrvInterface {
   NVCL,
   CUDA,
@@ -102,7 +107,7 @@ enum LoadStore {
 };
 
 namespace PTXLdStInstCode {
-enum AddressSpace{
+enum AddressSpace {
   GENERIC = 0,
   GLOBAL = 1,
   CONSTANT = 2,
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 7aee3595c625..d78b4e81a3e5 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td"
 //===----------------------------------------------------------------------===//
 
 // SM Versions
-def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10",
-                            "Target SM 1.0">;
-def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11",
-                            "Target SM 1.1">;
-def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12",
-                            "Target SM 1.2">;
-def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13",
-                            "Target SM 1.3">;
 def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20",
                             "Target SM 2.0">;
 def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
@@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, NoItineraries, Features>;
 
-def : Proc<"sm_10", [SM10]>;
-def : Proc<"sm_11", [SM11]>;
-def : Proc<"sm_12", [SM12]>;
-def : Proc<"sm_13", [SM13]>;
 def : Proc<"sm_20", [SM20]>;
 def : Proc<"sm_21", [SM21]>;
 def : Proc<"sm_30", [SM30]>;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 60f52a46daaa..0f792ec6826e 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -19,9 +19,9 @@
 namespace llvm {
 
 bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
-  bool               functionModified    = false;
-  Function::iterator I                   = function.begin();
-  TerminatorInst    *firstTerminatorInst = (I++)->getTerminator();
+  bool functionModified = false;
+  Function::iterator I = function.begin();
+  TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
 
   for (Function::iterator E = function.end(); I != E; ++I) {
     for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
@@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
 }
 
 char NVPTXAllocaHoisting::ID = 1;
-RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
-                                    "Hoisting alloca instructions in non-entry "
-                                    "blocks to the entry block");
+RegisterPass<NVPTXAllocaHoisting>
+X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
+                     "blocks to the entry block");
 
-FunctionPass *createAllocaHoisting() {
-  return new NVPTXAllocaHoisting();
-}
+FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
 
 } // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0115e1f5d3a8..ce5d78afa332 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -47,7 +47,6 @@
 #include <sstream>
 using namespace llvm;
 
-
 #include "NVPTXGenAsmWriter.inc"
 
 bool RegAllocNilUsed = true;
@@ -59,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers",
                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
                 cl::init(true));
 
-namespace llvm  {
-bool InterleaveSrcInPtx = false;
-}
-
-static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
-                                        cl::ZeroOrMore,
-                       cl::desc("NVPTX Specific: Emit source line in ptx file"),
-                                        cl::location(llvm::InterleaveSrcInPtx));
+namespace llvm { bool InterleaveSrcInPtx = false; }
 
+static cl::opt<bool, true>
+InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
+              cl::desc("NVPTX Specific: Emit source line in ptx file"),
+              cl::location(llvm::InterleaveSrcInPtx));
 
 namespace {
 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
 /// depends.
-void DiscoverDependentGlobals(Value *V,
-                              DenseSet<GlobalVariable*> &Globals) {
+void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     Globals.insert(GV);
   else {
@@ -88,12 +83,12 @@ void DiscoverDependentGlobals(Value *V,
 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
 /// instances to be emitted, but only after any dependents have been added
 /// first.
-void VisitGlobalVariableForEmission(GlobalVariable *GV,
-                                    SmallVectorImpl<GlobalVariable*> &Order,
-                                    DenseSet<GlobalVariable*> &Visited,
-                                    DenseSet<GlobalVariable*> &Visiting) {
+void VisitGlobalVariableForEmission(
+    GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
+    DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
   // Have we already visited this one?
-  if (Visited.count(GV)) return;
+  if (Visited.count(GV))
+    return;
 
   // Do we have a circular dependency?
   if (Visiting.count(GV))
@@ -103,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV,
   Visiting.insert(GV);
 
   // Make sure we visit all dependents first
-  DenseSet<GlobalVariable*> Others;
+  DenseSet<GlobalVariable *> Others;
   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
     DiscoverDependentGlobals(GV->getOperand(i), Others);
-  
-  for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
-       E = Others.end(); I != E; ++I)
+
+  for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
+                                            E = Others.end();
+       I != E; ++I)
     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
 
   // Now we can visit ourself
@@ -142,25 +138,23 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
   if (CE == 0)
     llvm_unreachable("Unknown constant value to lower!");
 
-
   switch (CE->getOpcode()) {
   default:
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C =
-        ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+    if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
       if (C != CE)
         return LowerConstant(C, AP);
 
     // Otherwise report the problem to the user.
     {
-        std::string S;
-        raw_string_ostream OS(S);
-        OS << "Unsupported expression in static initializer: ";
-        WriteAsOperand(OS, CE, /*PrintType=*/false,
-                       !AP.MF ? 0 : AP.MF->getFunction()->getParent());
-        report_fatal_error(OS.str());
+      std::string S;
+      raw_string_ostream OS(S);
+      OS << "Unsupported expression in static initializer: ";
+      WriteAsOperand(OS, CE, /*PrintType=*/ false,
+                     !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+      report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
     const DataLayout &TD = *AP.TM.getDataLayout();
@@ -182,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // expression properly.  This is important for differences between
     // blockaddress labels.  Since the two labels are in the same function, it
     // is reasonable to treat their delta as a 32-bit value.
-    // FALL THROUGH.
+  // FALL THROUGH.
   case Instruction::BitCast:
     return LowerConstant(CE->getOperand(0), AP);
 
@@ -192,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // integer type.  This promotes constant folding and simplifies this code.
     Constant *Op = CE->getOperand(0);
     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
-                                      false/*ZExt*/);
+                                      false /*ZExt*/);
     return LowerConstant(Op, AP);
   }
 
@@ -214,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // the high bits so we are sure to get a proper truncation if the input is
     // a constant expr.
     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
-    const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+    const MCExpr *MaskExpr =
+        MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
   }
 
-  // The MC library also has a right-shift operator, but it isn't consistently
+    // The MC library also has a right-shift operator, but it isn't consistently
   // signed or unsigned between different targets.
   case Instruction::Add:
   case Instruction::Sub:
@@ -232,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
     switch (CE->getOpcode()) {
-    default: llvm_unreachable("Unknown binary operator constant cast expr");
-    case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
-    case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
-    case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
-    case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
-    case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
-    case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
-    case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
-    case Instruction::Or:  return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
-    case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+    default:
+      llvm_unreachable("Unknown binary operator constant cast expr");
+    case Instruction::Add:
+      return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+    case Instruction::Sub:
+      return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+    case Instruction::Mul:
+      return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+    case Instruction::SDiv:
+      return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+    case Instruction::SRem:
+      return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+    case Instruction::Shl:
+      return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+    case Instruction::And:
+      return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+    case Instruction::Or:
+      return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
+    case Instruction::Xor:
+      return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
     }
   }
   }
 }
 
-
-void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
-{
+void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
   if (!EmitLineNumbers)
     return;
   if (ignoreLoc(MI))
@@ -268,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
   if (curLoc.isUnknown())
     return;
 
-
   const MachineFunction *MF = MI.getParent()->getParent();
   //const TargetMachine &TM = MF->getTarget();
 
@@ -289,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
   if (filenameMap.find(fileName.str()) == filenameMap.end())
     return;
 
-
   // Emit the line from the source file.
   if (llvm::InterleaveSrcInPtx)
     this->emitSrcInText(fileName.str(), curLoc.getLine());
 
   std::stringstream temp;
-  temp << "\t.loc " << filenameMap[fileName.str()]
-       << " " << curLoc.getLine() << " " << curLoc.getCol();
+  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+       << " " << curLoc.getCol();
   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
 }
 
@@ -309,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   OutStreamer.EmitRawText(OS.str());
 }
 
-void NVPTXAsmPrinter::printReturnValStr(const Function *F,
-                                        raw_ostream &O)
-{
+void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
   const DataLayout *TD = TM.getDataLayout();
   const TargetLowering *TLI = TM.getTargetLowering();
 
@@ -329,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
       unsigned size = 0;
       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
         size = ITy->getBitWidth();
-        if (size < 32) size = 32;
+        if (size < 32)
+          size = 32;
       } else {
-        assert(Ty->isFloatingPointTy() &&
-               "Floating point type expected here");
+        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
         size = Ty->getPrimitiveSizeInBits();
       }
 
       O << ".param .b" << size << " func_retval0";
-    }
-    else if (isa<PointerType>(Ty)) {
+    } else if (isa<PointerType>(Ty)) {
       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
-            << " func_retval0";
+        << " func_retval0";
     } else {
-      if ((Ty->getTypeID() == Type::StructTyID) ||
-          isa<VectorType>(Ty)) {
+      if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
         SmallVector<EVT, 16> vtparts;
         ComputeValueVTs(*TLI, Ty, vtparts);
         unsigned totalsz = 0;
-        for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+        for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
           unsigned elems = 1;
           EVT elemtype = vtparts[i];
           if (vtparts[i].isVector()) {
             elems = vtparts[i].getVectorNumElements();
             elemtype = vtparts[i].getVectorElementType();
           }
-          for (unsigned j=0, je=elems; j!=je; ++j) {
+          for (unsigned j = 0, je = elems; j != je; ++j) {
             unsigned sz = elemtype.getSizeInBits();
-            if (elemtype.isInteger() && (sz < 8)) sz = 8;
-            totalsz += sz/8;
+            if (elemtype.isInteger() && (sz < 8))
+              sz = 8;
+            totalsz += sz / 8;
           }
         }
         unsigned retAlignment = 0;
         if (!llvm::getAlign(*F, 0, retAlignment))
           retAlignment = TD->getABITypeAlignment(Ty);
-        O << ".param .align "
-            << retAlignment
-            << " .b8 func_retval0["
-            << totalsz << "]";
+        O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+          << "]";
       } else
-        assert(false &&
-               "Unknown return type");
+        assert(false && "Unknown return type");
     }
   } else {
     SmallVector<EVT, 16> vtparts;
     ComputeValueVTs(*TLI, Ty, vtparts);
     unsigned idx = 0;
-    for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
       unsigned elems = 1;
       EVT elemtype = vtparts[i];
       if (vtparts[i].isVector()) {
@@ -383,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
         elemtype = vtparts[i].getVectorElementType();
       }
 
-      for (unsigned j=0, je=elems; j!=je; ++j) {
+      for (unsigned j = 0, je = elems; j != je; ++j) {
         unsigned sz = elemtype.getSizeInBits();
-        if (elemtype.isInteger() && (sz < 32)) sz = 32;
+        if (elemtype.isInteger() && (sz < 32))
+          sz = 32;
         O << ".reg .b" << sz << " func_retval" << idx;
-        if (j<je-1) O << ", ";
+        if (j < je - 1)
+          O << ", ";
         ++idx;
       }
-      if (i < e-1)
+      if (i < e - 1)
         O << ", ";
     }
   }
@@ -411,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
   // Set up
   MRI = &MF->getRegInfo();
   F = MF->getFunction();
-  emitLinkageDirective(F,O);
+  emitLinkageDirective(F, O);
   if (llvm::isKernelFunction(*F))
     O << ".entry ";
   else {
@@ -434,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
   unsigned numRegClasses = TRI.getNumRegClasses();
-  VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
+  VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
   OutStreamer.EmitRawText(StringRef("{\n"));
   setAndEmitFunctionVirtualRegisters(*MF);
 
@@ -446,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() {
 
 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
   OutStreamer.EmitRawText(StringRef("}\n"));
-  delete []VRidGlobal2LocalMap;
+  delete[] VRidGlobal2LocalMap;
 }
 
-
-void
-NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
-                                              raw_ostream &O) const {
+void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
+                                                   raw_ostream &O) const {
   // If the NVVM IR has some of reqntid* specified, then output
   // the reqntid directive, and set the unspecified ones to 1.
   // If none of reqntid* is specified, don't output reqntid directive.
   unsigned reqntidx, reqntidy, reqntidz;
   bool specified = false;
-  if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
-  else specified = true;
-  if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
-  else specified = true;
-  if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
-  else specified = true;
+  if (llvm::getReqNTIDx(F, reqntidx) == false)
+    reqntidx = 1;
+  else
+    specified = true;
+  if (llvm::getReqNTIDy(F, reqntidy) == false)
+    reqntidy = 1;
+  else
+    specified = true;
+  if (llvm::getReqNTIDz(F, reqntidz) == false)
+    reqntidz = 1;
+  else
+    specified = true;
 
   if (specified)
-    O << ".reqntid " << reqntidx << ", "
-    << reqntidy << ", " << reqntidz << "\n";
+    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+      << "\n";
 
   // If the NVVM IR has some of maxntid* specified, then output
   // the maxntid directive, and set the unspecified ones to 1.
   // If none of maxntid* is specified, don't output maxntid directive.
   unsigned maxntidx, maxntidy, maxntidz;
   specified = false;
-  if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
-  else specified = true;
-  if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
-  else specified = true;
-  if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
-  else specified = true;
+  if (llvm::getMaxNTIDx(F, maxntidx) == false)
+    maxntidx = 1;
+  else
+    specified = true;
+  if (llvm::getMaxNTIDy(F, maxntidy) == false)
+    maxntidy = 1;
+  else
+    specified = true;
+  if (llvm::getMaxNTIDz(F, maxntidz) == false)
+    maxntidz = 1;
+  else
+    specified = true;
 
   if (specified)
-    O << ".maxntid " << maxntidx << ", "
-    << maxntidy << ", " << maxntidz << "\n";
+    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+      << "\n";
 
   unsigned mincta;
   if (llvm::getMinCTASm(F, mincta))
     O << ".minnctapersm " << mincta << "\n";
 }
 
-void
-NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
-                                        raw_ostream &O) {
-  const TargetRegisterClass * RC = MRI->getRegClass(vr);
+void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
+                                             raw_ostream &O) {
+  const TargetRegisterClass *RC = MRI->getRegClass(vr);
   unsigned id = RC->getID();
 
   std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
@@ -506,44 +512,38 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
   report_fatal_error("Bad register!");
 }
 
-void
-NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
-                                     raw_ostream &O) {
+void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+                                          raw_ostream &O) {
   getVirtualRegisterName(vr, isVec, O);
 }
 
-void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
-                                                const char *Modifier,
-                                                raw_ostream &O) {
-  static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
-  int Imm = (int)MO.getImm();
-  if(0 == strcmp(Modifier, "vecelem"))
+void NVPTXAsmPrinter::printVecModifiedImmediate(
+    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
+  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
+  int Imm = (int) MO.getImm();
+  if (0 == strcmp(Modifier, "vecelem"))
     O << "_" << vecelem[Imm];
-  else if(0 == strcmp(Modifier, "vecv4comm1")) {
-    if((Imm < 0) || (Imm > 3))
+  else if (0 == strcmp(Modifier, "vecv4comm1")) {
+    if ((Imm < 0) || (Imm > 3))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv4comm2")) {
-    if((Imm < 4) || (Imm > 7))
+  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
+    if ((Imm < 4) || (Imm > 7))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv4pos")) {
-    if(Imm < 0) Imm = 0;
-    O << "_" << vecelem[Imm%4];
-  }
-  else if(0 == strcmp(Modifier, "vecv2comm1")) {
-    if((Imm < 0) || (Imm > 1))
+  } else if (0 == strcmp(Modifier, "vecv4pos")) {
+    if (Imm < 0)
+      Imm = 0;
+    O << "_" << vecelem[Imm % 4];
+  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
+    if ((Imm < 0) || (Imm > 1))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv2comm2")) {
-    if((Imm < 2) || (Imm > 3))
+  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
+    if ((Imm < 2) || (Imm > 3))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv2pos")) {
-    if(Imm < 0) Imm = 0;
-    O << "_" << vecelem[Imm%2];
-  }
-  else
+  } else if (0 == strcmp(Modifier, "vecv2pos")) {
+    if (Imm < 0)
+      Imm = 0;
+    O << "_" << vecelem[Imm % 2];
+  } else
     llvm_unreachable("Unknown Modifier on immediate operand");
 }
 
@@ -565,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
           emitVirtualRegister(MO.getReg(), true, O);
         else
           llvm_unreachable(
-                 "Don't know how to handle the modifier on virtual register.");
+              "Don't know how to handle the modifier on virtual register.");
       }
     }
     return;
@@ -576,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     else if (strstr(Modifier, "vec") == Modifier)
       printVecModifiedImmediate(MO, Modifier, O);
     else
-      llvm_unreachable("Don't know how to handle modifier on immediate operand");
+      llvm_unreachable(
+          "Don't know how to handle modifier on immediate operand");
     return;
 
   case MachineOperand::MO_FPImmediate:
@@ -588,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     break;
 
   case MachineOperand::MO_ExternalSymbol: {
-    const char * symbname = MO.getSymbolName();
+    const char *symbname = MO.getSymbolName();
     if (strstr(symbname, ".PARAM") == symbname) {
       unsigned index;
-      sscanf(symbname+6, "%u[];", &index);
+      sscanf(symbname + 6, "%u[];", &index);
       printParamName(index, O);
-    }
-    else if (strstr(symbname, ".HLPPARAM") == symbname) {
+    } else if (strstr(symbname, ".HLPPARAM") == symbname) {
       unsigned index;
-      sscanf(symbname+9, "%u[];", &index);
+      sscanf(symbname + 9, "%u[];", &index);
       O << *CurrentFnSym << "_param_" << index << "_offset";
-    }
-    else
+    } else
       O << symbname;
     break;
   }
@@ -613,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   }
 }
 
-void NVPTXAsmPrinter::
-printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
+void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI,
+                                       raw_ostream &O) const {
 #ifndef __OPTIMIZE__
   O << "\t// Implicit def :";
   //printOperand(MI, 0);
@@ -628,32 +627,41 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
 
   if (Modifier && !strcmp(Modifier, "add")) {
     O << ", ";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum + 1, O);
   } else {
-    if (MI->getOperand(opNum+1).isImm() &&
-        MI->getOperand(opNum+1).getImm() == 0)
+    if (MI->getOperand(opNum + 1).isImm() &&
+        MI->getOperand(opNum + 1).getImm() == 0)
       return; // don't print ',0' or '+0'
     O << "+";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum + 1, O);
   }
 }
 
 void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
-                                    raw_ostream &O, const char *Modifier)
-{
+                                    raw_ostream &O, const char *Modifier) {
   if (Modifier) {
     const MachineOperand &MO = MI->getOperand(opNum);
-    int Imm = (int)MO.getImm();
+    int Imm = (int) MO.getImm();
     if (!strcmp(Modifier, "volatile")) {
       if (Imm)
         O << ".volatile";
     } else if (!strcmp(Modifier, "addsp")) {
       switch (Imm) {
-      case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
-      case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
-      case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
-      case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
-      case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
+      case NVPTX::PTXLdStInstCode::GLOBAL:
+        O << ".global";
+        break;
+      case NVPTX::PTXLdStInstCode::SHARED:
+        O << ".shared";
+        break;
+      case NVPTX::PTXLdStInstCode::LOCAL:
+        O << ".local";
+        break;
+      case NVPTX::PTXLdStInstCode::PARAM:
+        O << ".param";
+        break;
+      case NVPTX::PTXLdStInstCode::CONSTANT:
+        O << ".const";
+        break;
       case NVPTX::PTXLdStInstCode::GENERIC:
         if (!nvptxSubtarget.hasGenericLdSt())
           O << ".global";
@@ -661,31 +669,27 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
       default:
         llvm_unreachable("Wrong Address Space");
       }
-    }
-    else if (!strcmp(Modifier, "sign")) {
-      if (Imm==NVPTX::PTXLdStInstCode::Signed)
+    } else if (!strcmp(Modifier, "sign")) {
+      if (Imm == NVPTX::PTXLdStInstCode::Signed)
         O << "s";
-      else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
+      else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
         O << "u";
       else
         O << "f";
-    }
-    else if (!strcmp(Modifier, "vec")) {
-      if (Imm==NVPTX::PTXLdStInstCode::V2)
+    } else if (!strcmp(Modifier, "vec")) {
+      if (Imm == NVPTX::PTXLdStInstCode::V2)
         O << ".v2";
-      else if (Imm==NVPTX::PTXLdStInstCode::V4)
+      else if (Imm == NVPTX::PTXLdStInstCode::V4)
         O << ".v4";
-    }
-    else
+    } else
       llvm_unreachable("Unknown Modifier");
-  }
-  else
+  } else
     llvm_unreachable("Empty Modifier");
 }
 
-void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
 
-  emitLinkageDirective(F,O);
+  emitLinkageDirective(F, O);
   if (llvm::isKernelFunction(*F))
     O << ".entry ";
   else
@@ -696,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
   O << ";\n";
 }
 
-static bool usedInGlobalVarDef(const Constant *C)
-{
+static bool usedInGlobalVarDef(const Constant *C) {
   if (!C)
     return false;
 
@@ -707,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C)
     return true;
   }
 
-  for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
-      ui!=ue; ++ui) {
+  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+       ui != ue; ++ui) {
     const Constant *C = dyn_cast<Constant>(*ui);
     if (usedInGlobalVarDef(C))
       return true;
@@ -716,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C)
   return false;
 }
 
-static bool usedInOneFunc(const User *U, Function const *&oneFunc)
-{
+static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
     if (othergv->getName().str() == "llvm.used")
       return true;
@@ -730,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc)
         return false;
       oneFunc = curFunc;
       return true;
-    }
-    else
+    } else
       return false;
   }
 
   if (const MDNode *md = dyn_cast<MDNode>(U))
     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
-        (md->getName().str() == "llvm.dbg.sp")))
+                          (md->getName().str() == "llvm.dbg.sp")))
       return true;
 
-
-  for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
-      ui!=ue; ++ui) {
+  for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
+       ui != ue; ++ui) {
     if (usedInOneFunc(*ui, oneFunc) == false)
       return false;
   }
@@ -776,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
 
 static bool useFuncSeen(const Constant *C,
                         llvm::DenseMap<const Function *, bool> &seenMap) {
-  for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
-      ui!=ue; ++ui) {
+  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+       ui != ue; ++ui) {
     if (const Constant *cu = dyn_cast<Constant>(*ui)) {
       if (useFuncSeen(cu, seenMap))
         return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
       const BasicBlock *bb = I->getParent();
-      if (!bb) continue;
+      if (!bb)
+        continue;
       const Function *caller = bb->getParent();
-      if (!caller) continue;
+      if (!caller)
+        continue;
       if (seenMap.find(caller) != seenMap.end())
         return true;
     }
@@ -793,10 +795,9 @@ static bool useFuncSeen(const Constant *C,
   return false;
 }
 
-void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
   llvm::DenseMap<const Function *, bool> seenMap;
-  for (Module::const_iterator FI=M.begin(), FE=M.end();
-      FI!=FE; ++FI) {
+  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
     const Function *F = FI;
 
     if (F->isDeclaration()) {
@@ -808,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
       emitDeclaration(F, O);
       continue;
     }
-    for (Value::const_use_iterator iter=F->use_begin(),
-        iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
+    for (Value::const_use_iterator iter = F->use_begin(),
+                                   iterEnd = F->use_end();
+         iter != iterEnd; ++iter) {
       if (const Constant *C = dyn_cast<Constant>(*iter)) {
         if (usedInGlobalVarDef(C)) {
           // The use is in the initialization of a global variable
@@ -828,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
         }
       }
 
-      if (!isa<Instruction>(*iter)) continue;
+      if (!isa<Instruction>(*iter))
+        continue;
       const Instruction *instr = cast<Instruction>(*iter);
       const BasicBlock *bb = instr->getParent();
-      if (!bb) continue;
+      if (!bb)
+        continue;
       const Function *caller = bb->getParent();
-      if (!caller) continue;
+      if (!caller)
+        continue;
 
       // If a caller has already been seen, then the caller is
       // appearing in the module before the callee. so print out
@@ -852,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(M);
 
-  unsigned i=1;
+  unsigned i = 1;
   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
-      E = DbgFinder.compile_unit_end(); I != E; ++I) {
+                                 E = DbgFinder.compile_unit_end();
+       I != E; ++I) {
     DICompileUnit DIUnit(*I);
     StringRef Filename(DIUnit.getFilename());
     StringRef Dirname(DIUnit.getDirectory());
@@ -871,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   }
 
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
-      E = DbgFinder.subprogram_end(); I != E; ++I) {
+                                 E = DbgFinder.subprogram_end();
+       I != E; ++I) {
     DISubprogram SP(*I);
     StringRef Filename(SP.getFilename());
     StringRef Dirname(SP.getDirectory());
@@ -887,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   }
 }
 
-bool NVPTXAsmPrinter::doInitialization (Module &M) {
+bool NVPTXAsmPrinter::doInitialization(Module &M) {
 
   SmallString<128> Str1;
   raw_svector_ostream OS1(Str1);
@@ -899,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   //bool Result = AsmPrinter::doInitialization(M);
 
   // Initialize TargetLoweringObjectFile.
-  const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
-          .Initialize(OutContext, TM);
+  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+      .Initialize(OutContext, TM);
 
   Mang = new Mangler(OutContext, *TM.getDataLayout());
 
@@ -908,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   emitHeader(M, OS1);
   OutStreamer.EmitRawText(OS1.str());
 
-
   // Already commented out
   //bool Result = AsmPrinter::doInitialization(M);
 
-
   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
     recordAndEmitFilenames(M);
 
@@ -926,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   // global variable in order, and ensure that we emit it *after* its dependent
   // globals. We use a little extra memory maintaining both a set and a list to
   // have fast searches while maintaining a strict ordering.
-  SmallVector<GlobalVariable*,8> Globals;
-  DenseSet<GlobalVariable*> GVVisited;
-  DenseSet<GlobalVariable*> GVVisiting;
+  SmallVector<GlobalVariable *, 8> Globals;
+  DenseSet<GlobalVariable *> GVVisited;
+  DenseSet<GlobalVariable *> GVVisiting;
 
   // Visit each global variable, in order
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+       ++I)
     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
 
-  assert(GVVisited.size() == M.getGlobalList().size() && 
+  assert(GVVisited.size() == M.getGlobalList().size() &&
          "Missed a global variable");
   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
 
@@ -946,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   OS2 << '\n';
 
   OutStreamer.EmitRawText(OS2.str());
-  return false;  // success
+  return false; // success
 }
 
-void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
   O << "//\n";
   O << "// Generated by LLVM NVPTX Back-End\n";
   O << "//\n";
@@ -989,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
 
   Module::GlobalListType &global_list = M.getGlobalList();
   int i, n = global_list.size();
-  GlobalVariable **gv_array = new GlobalVariable* [n];
+  GlobalVariable **gv_array = new GlobalVariable *[n];
 
   // first, back-up GlobalVariable in gv_array
   i = 0;
   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
-      I != E; ++I)
+       I != E; ++I)
     gv_array[i++] = &*I;
 
   // second, empty global_list
@@ -1005,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
   bool ret = AsmPrinter::doFinalization(M);
 
   // now we restore global variables
-  for (i = 0; i < n; i ++)
+  for (i = 0; i < n; i++)
     global_list.insert(global_list.end(), gv_array[i]);
 
   delete[] gv_array;
   return ret;
 
-
   //bool Result = AsmPrinter::doFinalization(M);
   // Instead of calling the parents doFinalization, we may
   // clone parents doFinalization and customize here.
@@ -1031,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
 // external without init                  -> .extern
 // appending                              -> not allowed, assert.
 
-void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
-{
+void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
+                                           raw_ostream &O) {
   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
     if (V->hasExternalLinkage()) {
       if (isa<GlobalVariable>(V)) {
@@ -1059,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
   }
 }
 
-
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
                                          bool processDemoted) {
 
   // Skip meta data
@@ -1111,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
     if (Initializer)
       CI = dyn_cast<ConstantInt>(Initializer);
     if (CI) {
-      unsigned sample=CI->getZExtValue();
+      unsigned sample = CI->getZExtValue();
 
       O << " = { ";
 
-      for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
-          __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
+      for (int i = 0,
+               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
+           i < 3; i++) {
         O << "addr_mode_" << i << " = ";
         switch (addr) {
-        case 0: O << "wrap"; break;
-        case 1: O << "clamp_to_border"; break;
-        case 2: O << "clamp_to_edge"; break;
-        case 3: O << "wrap"; break;
-        case 4: O << "mirror"; break;
+        case 0:
+          O << "wrap";
+          break;
+        case 1:
+          O << "clamp_to_border";
+          break;
+        case 2:
+          O << "clamp_to_edge";
+          break;
+        case 3:
+          O << "wrap";
+          break;
+        case 4:
+          O << "mirror";
+          break;
         }
-        O <<", ";
+        O << ", ";
       }
       O << "filter_mode = ";
-      switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
-      case 0: O << "nearest"; break;
-      case 1: O << "linear";  break;
-      case 2: assert ( 0 && "Anisotropic filtering is not supported");
-      default: O << "nearest"; break;
+      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
+      case 0:
+        O << "nearest";
+        break;
+      case 1:
+        O << "linear";
+        break;
+      case 2:
+        assert(0 && "Anisotropic filtering is not supported");
+      default:
+        O << "nearest";
+        break;
       }
-      if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
+      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
         O << ", force_unnormalized_coords = 1";
       }
       O << " }";
@@ -1176,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
   else
     O << " .align " << GVar->getAlignment();
 
-
   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
     O << " .";
     O << getPTXFundamentalTypeStr(ETy, false);
@@ -1186,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
     // Ptx allows variable initilization only for constant and global state
     // spaces.
     if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
-        (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
-        (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
-        && GVar->hasInitializer()) {
+         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+        GVar->hasInitializer()) {
       Constant *Initializer = GVar->getInitializer();
       if (!Initializer->isNullValue()) {
-        O << " = " ;
+        O << " = ";
         printScalarConstant(Initializer, O);
       }
     }
   } else {
-    unsigned int ElementSize =0;
+    unsigned int ElementSize = 0;
 
     // Although PTX has direct support for struct type and array type and
     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
@@ -1210,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
       // Ptx allows variable initilization only for constant and
       // global state spaces.
       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
-          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
-          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
-          && GVar->hasInitializer()) {
+           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+          GVar->hasInitializer()) {
         Constant *Initializer = GVar->getInitializer();
-        if (!isa<UndefValue>(Initializer) &&
-            !Initializer->isNullValue()) {
+        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
           AggBuffer aggBuffer(ElementSize, O, *this);
           bufferAggregateConstant(Initializer, &aggBuffer);
           if (aggBuffer.numSymbols) {
             if (nvptxSubtarget.is64Bit()) {
-              O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
-              O << ElementSize/8;
-            }
-            else {
-              O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
-              O << ElementSize/4;
+              O << " .u64 " << *Mang->getSymbol(GVar) << "[";
+              O << ElementSize / 8;
+            } else {
+              O << " .u32 " << *Mang->getSymbol(GVar) << "[";
+              O << ElementSize / 4;
             }
             O << "]";
-          }
-          else {
-            O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+          } else {
+            O << " .b8 " << *Mang->getSymbol(GVar) << "[";
             O << ElementSize;
             O << "]";
           }
-          O << " = {" ;
+          O << " = {";
           aggBuffer.print();
           O << "}";
-        }
-        else {
-          O << " .b8 " << *Mang->getSymbol(GVar) ;
+        } else {
+          O << " .b8 " << *Mang->getSymbol(GVar);
           if (ElementSize) {
-            O <<"[" ;
+            O << "[";
             O << ElementSize;
             O << "]";
           }
         }
-      }
-      else {
+      } else {
         O << " .b8 " << *Mang->getSymbol(GVar);
         if (ElementSize) {
-          O <<"[" ;
+          O << "[";
           O << ElementSize;
           O << "]";
         }
       }
       break;
     default:
-      assert( 0 && "type not supported yet");
+      assert(0 && "type not supported yet");
     }
 
   }
@@ -1270,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
 
   std::vector<GlobalVariable *> &gvars = localDecls[f];
 
-  for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
     O << "\t// demoted variable\n\t";
     printModuleLevelGV(gvars[i], O, true);
   }
@@ -1280,24 +1295,24 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
                                           raw_ostream &O) const {
   switch (AddressSpace) {
   case llvm::ADDRESS_SPACE_LOCAL:
-    O << "local" ;
+    O << "local";
     break;
   case llvm::ADDRESS_SPACE_GLOBAL:
-    O << "global" ;
+    O << "global";
     break;
   case llvm::ADDRESS_SPACE_CONST:
     // This logic should be consistent with that in
     // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
     if (nvptxSubtarget.hasGenericLdSt())
-      O << "global" ;
+      O << "global";
     else
-      O << "const" ;
+      O << "const";
     break;
   case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
-    O << "const" ;
+    O << "const";
     break;
   case llvm::ADDRESS_SPACE_SHARED:
-    O << "shared" ;
+    O << "shared";
     break;
   default:
     report_fatal_error("Bad address space found while emitting PTX");
@@ -1305,8 +1320,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
   }
 }
 
-std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
-                                                      bool useB4PTR) const {
+std::string
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
   switch (Ty->getTypeID()) {
   default:
     llvm_unreachable("unexpected type");
@@ -1330,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
     return "f64";
   case Type::PointerTyID:
     if (nvptxSubtarget.is64Bit())
-      if (useB4PTR) return "b64";
-      else return "u64";
+      if (useB4PTR)
+        return "b64";
+      else
+        return "u64";
+    else if (useB4PTR)
+      return "b32";
     else
-      if (useB4PTR) return "b32";
-      else return "u32";
+      return "u32";
   }
   llvm_unreachable("unexpected type");
   return NULL;
 }
 
-void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
+void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
                                             raw_ostream &O) {
 
   const DataLayout *TD = TM.getDataLayout();
@@ -1364,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
     return;
   }
 
-  int64_t ElementSize =0;
+  int64_t ElementSize = 0;
 
   // Although PTX has direct support for struct type and array type and LLVM IR
   // is very similar to PTX, the LLVM CodeGen does not support for targets that
@@ -1375,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
   case Type::ArrayTyID:
   case Type::VectorTyID:
     ElementSize = TD->getTypeStoreSize(ETy);
-    O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+    O << " .b8 " << *Mang->getSymbol(GVar) << "[";
     if (ElementSize) {
-      O << itostr(ElementSize) ;
+      O << itostr(ElementSize);
     }
     O << "]";
     break;
   default:
-    assert( 0 && "type not supported yet");
+    assert(0 && "type not supported yet");
   }
-  return ;
+  return;
 }
 
-
-static unsigned int
-getOpenCLAlignment(const DataLayout *TD,
-                   Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
   if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
     return TD->getPrefTypeAlignment(Ty);
 
@@ -1404,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD,
     unsigned int numE = VTy->getNumElements();
     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
     if (numE == 3)
-      return 4*alignE;
+      return 4 * alignE;
     else
-      return numE*alignE;
+      return numE * alignE;
   }
 
   const StructType *STy = dyn_cast<StructType>(Ty);
@@ -1414,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD,
     unsigned int alignStruct = 1;
     // Go through each element of the struct and find the
     // largest alignment.
-    for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
       Type *ETy = STy->getElementType(i);
       unsigned int align = getOpenCLAlignment(TD, ETy);
       if (align > alignStruct)
@@ -1458,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   }
 
   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
-    if (i==paramIndex) {
+    if (i == paramIndex) {
       printParamName(I, paramIndex, O);
       return;
     }
@@ -1466,8 +1481,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   llvm_unreachable("paramIndex out of bound");
 }
 
-void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
-                                            raw_ostream &O) {
+void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   const DataLayout *TD = TM.getDataLayout();
   const AttributeSet &PAL = F->getAttributes();
   const TargetLowering *TLI = TM.getTargetLowering();
@@ -1481,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
   O << "(\n";
 
   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
-    const Type *Ty = I->getType();
+    Type *Ty = I->getType();
 
     if (!first)
       O << ",\n";
@@ -1496,14 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
         else // Default image is read_only
           O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
-      }
-      else // Should be llvm::isSampler(*I)
+      } else // Should be llvm::isSampler(*I)
         O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
-        << paramIndex;
+          << paramIndex;
       continue;
     }
 
-    if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) {
+    if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+      if (Ty->isVectorTy()) {
+        // Just print .param .b8 .align <a> .param[size];
+        // <a> = PAL.getparamalignment
+        // size = typeallocsize of element type
+        unsigned align = PAL.getParamAlignment(paramIndex + 1);
+        if (align == 0)
+          align = TD->getABITypeAlignment(Ty);
+
+        unsigned sz = TD->getTypeAllocSize(Ty);
+        O << "\t.param .align " << align << " .b8 ";
+        printParamName(I, paramIndex, O);
+        O << "[" << sz << "]";
+
+        continue;
+      }
       // Just a scalar
       const PointerType *PTy = dyn_cast<PointerType>(Ty);
       if (isKernelFunc) {
@@ -1514,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
             Type *ETy = PTy->getElementType();
             int addrSpace = PTy->getAddressSpace();
-            switch(addrSpace) {
+            switch (addrSpace) {
             default:
               O << ".ptr ";
               break;
@@ -1529,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
               O << ".ptr .global ";
               break;
             }
-            O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
+            O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
           }
           printParamName(I, paramIndex, O);
           continue;
         }
 
         // non-pointer scalar to kernel func
-        O << "\t.param ."
-            << getPTXFundamentalTypeStr(Ty) << " ";
+        O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
         printParamName(I, paramIndex, O);
         continue;
       }
@@ -1546,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
       unsigned sz = 0;
       if (isa<IntegerType>(Ty)) {
         sz = cast<IntegerType>(Ty)->getBitWidth();
-        if (sz < 32) sz = 32;
-      }
-      else if (isa<PointerType>(Ty))
+        if (sz < 32)
+          sz = 32;
+      } else if (isa<PointerType>(Ty))
         sz = thePointerTy.getSizeInBits();
       else
         sz = Ty->getPrimitiveSizeInBits();
@@ -1562,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
 
     // param has byVal attribute. So should be a pointer
     const PointerType *PTy = dyn_cast<PointerType>(Ty);
-    assert(PTy &&
-           "Param with byval attribute should be a pointer type");
+    assert(PTy && "Param with byval attribute should be a pointer type");
     Type *ETy = PTy->getElementType();
 
     if (isABI || isKernelFunc) {
       // Just print .param .b8 .align <a> .param[size];
       // <a> = PAL.getparamalignment
       // size = typeallocsize of element type
-      unsigned align = PAL.getParamAlignment(paramIndex+1);
+      unsigned align = PAL.getParamAlignment(paramIndex + 1);
       if (align == 0)
         align = TD->getABITypeAlignment(ETy);
 
       unsigned sz = TD->getTypeAllocSize(ETy);
-      O << "\t.param .align " << align
-          << " .b8 ";
+      O << "\t.param .align " << align << " .b8 ";
       printParamName(I, paramIndex, O);
       O << "[" << sz << "]";
       continue;
@@ -1587,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
       // each vector element.
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*TLI, ETy, vtparts);
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -1595,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0,je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << "\t.reg .b" << sz << " ";
           printParamName(I, paramIndex, O);
-          if (j<je-1) O << ",\n";
+          if (j < je - 1)
+            O << ",\n";
           ++paramIndex;
         }
-        if (i<e-1)
+        if (i < e - 1)
           O << ",\n";
       }
       --paramIndex;
@@ -1620,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
   emitFunctionParamList(F, O);
 }
 
-
-void NVPTXAsmPrinter::
-setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
+void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
+    const MachineFunction &MF) {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
 
@@ -1635,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   int NumBytes = (int) MFI->getStackSize();
   if (NumBytes) {
-    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
-        << DEPOTNAME
-        << getFunctionNumber() << "[" << NumBytes << "];\n";
+    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
+      << getFunctionNumber() << "[" << NumBytes << "];\n";
     if (nvptxSubtarget.is64Bit()) {
       O << "\t.reg .b64 \t%SP;\n";
       O << "\t.reg .b64 \t%SPL;\n";
-    }
-    else {
+    } else {
       O << "\t.reg .b32 \t%SP;\n";
       O << "\t.reg .b32 \t%SPL;\n";
     }
@@ -1653,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   // register number and the per class virtual register number.
   // We use the per class virtual register number in the ptx output.
   unsigned int numVRs = MRI->getNumVirtRegs();
-  for (unsigned i=0; i< numVRs; i++) {
+  for (unsigned i = 0; i < numVRs; i++) {
     unsigned int vr = TRI->index2VirtReg(i);
     const TargetRegisterClass *RC = MRI->getRegClass(vr);
     std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
     int n = regmap.size();
-    regmap.insert(std::make_pair(vr, n+1));
+    regmap.insert(std::make_pair(vr, n + 1));
   }
 
   // Emit register declarations
@@ -1702,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   OutStreamer.EmitRawText(O.str());
 }
 
-
 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
-  APFloat APF = APFloat(Fp->getValueAPF());  // make a copy
+  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
   bool ignored;
   unsigned int numHex;
   const char *lead;
 
-  if (Fp->getType()->getTypeID()==Type::FloatTyID) {
+  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
     numHex = 8;
     lead = "0f";
-    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
-                &ignored);
+    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
     numHex = 16;
     lead = "0d";
-    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
-                &ignored);
+    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
   } else
     llvm_unreachable("unsupported fp type");
 
@@ -1760,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
   llvm_unreachable("Not scalar type found in printScalarConstant()");
 }
 
-
 void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
                                    AggBuffer *aggBuffer) {
 
@@ -1768,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
 
   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
     int s = TD->getTypeAllocSize(CPV->getType());
-    if (s<Bytes)
+    if (s < Bytes)
       s = Bytes;
     aggBuffer->addZeros(s);
     return;
@@ -1779,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
 
   case Type::IntegerTyID: {
     const Type *ETy = CPV->getType();
-    if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
+    if (ETy == Type::getInt8Ty(CPV->getContext())) {
       unsigned char c =
           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
       ptr = &c;
       aggBuffer->addBytes(ptr, 1, Bytes);
-    } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
-      short int16 =
-          (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
-      ptr = (unsigned char*)&int16;
+    } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
+      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      ptr = (unsigned char *)&int16;
       aggBuffer->addBytes(ptr, 2, Bytes);
-    } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
+    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
       if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
-        int int32 =(int)(constInt->getZExtValue());
-        ptr = (unsigned char*)&int32;
+        int int32 = (int)(constInt->getZExtValue());
+        ptr = (unsigned char *)&int32;
         aggBuffer->addBytes(ptr, 4, Bytes);
         break;
       } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (ConstantInt *constInt =
-            dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
-                Cexpr, TD))) {
-          int int32 =(int)(constInt->getZExtValue());
-          ptr = (unsigned char*)&int32;
+        if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+                ConstantFoldConstantExpression(Cexpr, TD))) {
+          int int32 = (int)(constInt->getZExtValue());
+          ptr = (unsigned char *)&int32;
           aggBuffer->addBytes(ptr, 4, Bytes);
           break;
         }
@@ -1812,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
         }
       }
       llvm_unreachable("unsupported integer const type");
-    } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
+    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
       if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
-        long long int64 =(long long)(constInt->getZExtValue());
-        ptr = (unsigned char*)&int64;
+        long long int64 = (long long)(constInt->getZExtValue());
+        ptr = (unsigned char *)&int64;
         aggBuffer->addBytes(ptr, 8, Bytes);
         break;
       } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
         if (ConstantInt *constInt = dyn_cast<ConstantInt>(
-            ConstantFoldConstantExpression(Cexpr, TD))) {
-          long long int64 =(long long)(constInt->getZExtValue());
-          ptr = (unsigned char*)&int64;
+                ConstantFoldConstantExpression(Cexpr, TD))) {
+          long long int64 = (long long)(constInt->getZExtValue());
+          ptr = (unsigned char *)&int64;
           aggBuffer->addBytes(ptr, 8, Bytes);
           break;
         }
@@ -1841,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   case Type::FloatTyID:
   case Type::DoubleTyID: {
     ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
-    const Type* Ty = CFP->getType();
+    const Type *Ty = CFP->getType();
     if (Ty == Type::getFloatTy(CPV->getContext())) {
-      float float32 = (float)CFP->getValueAPF().convertToFloat();
-      ptr = (unsigned char*)&float32;
+      float float32 = (float) CFP->getValueAPF().convertToFloat();
+      ptr = (unsigned char *)&float32;
       aggBuffer->addBytes(ptr, 4, Bytes);
     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
       double float64 = CFP->getValueAPF().convertToDouble();
-      ptr = (unsigned char*)&float64;
+      ptr = (unsigned char *)&float64;
       aggBuffer->addBytes(ptr, 8, Bytes);
-    }
-    else {
+    } else {
       llvm_unreachable("unsupported fp const type");
     }
     break;
@@ -1859,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   case Type::PointerTyID: {
     if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
       aggBuffer->addSymbol(GVar);
-    }
-    else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+    } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
       Value *v = Cexpr->stripPointerCasts();
       aggBuffer->addSymbol(v);
     }
@@ -1876,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
         isa<ConstantStruct>(CPV)) {
       int ElementSize = TD->getTypeAllocSize(CPV->getType());
       bufferAggregateConstant(CPV, aggBuffer);
-      if ( Bytes > ElementSize )
-        aggBuffer->addZeros(Bytes-ElementSize);
-    }
-    else if (isa<ConstantAggregateZero>(CPV))
+      if (Bytes > ElementSize)
+        aggBuffer->addZeros(Bytes - ElementSize);
+    } else if (isa<ConstantAggregateZero>(CPV))
       aggBuffer->addZeros(Bytes);
     else
       llvm_unreachable("Unexpected Constant type");
@@ -1905,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
   }
 
   if (const ConstantDataSequential *CDS =
-      dyn_cast<ConstantDataSequential>(CPV)) {
+          dyn_cast<ConstantDataSequential>(CPV)) {
     if (CDS->getNumElements())
       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
@@ -1913,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
     return;
   }
 
-
   if (isa<ConstantStruct>(CPV)) {
     if (CPV->getNumOperands()) {
       StructType *ST = cast<StructType>(CPV->getType());
       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
-        if ( i == (e - 1))
+        if (i == (e - 1))
           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
-          TD->getTypeAllocSize(ST)
-          - TD->getStructLayout(ST)->getElementOffset(i);
+                  TD->getTypeAllocSize(ST) -
+                  TD->getStructLayout(ST)->getElementOffset(i);
         else
-          Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
-          TD->getStructLayout(ST)->getElementOffset(i);
-        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
-                     aggBuffer);
+          Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
+                  TD->getStructLayout(ST)->getElementOffset(i);
+        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
       }
     }
     return;
@@ -1937,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
 // buildTypeNameMap - Run through symbol table looking for type names.
 //
 
-
 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
 
   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
 
-  if (PI != TypeNameMap.end() &&
-      (!PI->second.compare("struct._image1d_t") ||
-          !PI->second.compare("struct._image2d_t") ||
-          !PI->second.compare("struct._image3d_t")))
+  if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
+                                  !PI->second.compare("struct._image2d_t") ||
+                                  !PI->second.compare("struct._image3d_t")))
     return true;
 
   return false;
@@ -1955,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
 ///
 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                       unsigned AsmVariant,
-                                      const char *ExtraCode,
-                                      raw_ostream &O) {
+                                      const char *ExtraCode, raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
 
     switch (ExtraCode[0]) {
     default:
@@ -1974,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
-bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo,
-                                            unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
+    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
+    const char *ExtraCode, raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
-    return true;  // Unknown modifier
+    return true; // Unknown modifier
 
   O << '[';
   printMemOperand(MI, OpNo, O);
@@ -1989,41 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
-{
-  switch(MI.getOpcode()) {
+bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default:
     return false;
-  case NVPTX::CallArgBeginInst:  case NVPTX::CallArgEndInst0:
-  case NVPTX::CallArgEndInst1:  case NVPTX::CallArgF32:
-  case NVPTX::CallArgF64:  case NVPTX::CallArgI16:
-  case NVPTX::CallArgI32:  case NVPTX::CallArgI32imm:
-  case NVPTX::CallArgI64:  case NVPTX::CallArgI8:
-  case NVPTX::CallArgParam:  case NVPTX::CallVoidInst:
-  case NVPTX::CallVoidInstReg:  case NVPTX::Callseq_End:
+  case NVPTX::CallArgBeginInst:
+  case NVPTX::CallArgEndInst0:
+  case NVPTX::CallArgEndInst1:
+  case NVPTX::CallArgF32:
+  case NVPTX::CallArgF64:
+  case NVPTX::CallArgI16:
+  case NVPTX::CallArgI32:
+  case NVPTX::CallArgI32imm:
+  case NVPTX::CallArgI64:
+  case NVPTX::CallArgI8:
+  case NVPTX::CallArgParam:
+  case NVPTX::CallVoidInst:
+  case NVPTX::CallVoidInstReg:
+  case NVPTX::Callseq_End:
   case NVPTX::CallVoidInstReg64:
-  case NVPTX::DeclareParamInst:  case NVPTX::DeclareRetMemInst:
-  case NVPTX::DeclareRetRegInst:  case NVPTX::DeclareRetScalarInst:
-  case NVPTX::DeclareScalarParamInst:  case NVPTX::DeclareScalarRegInst:
-  case NVPTX::StoreParamF32:  case NVPTX::StoreParamF64:
-  case NVPTX::StoreParamI16:  case NVPTX::StoreParamI32:
-  case NVPTX::StoreParamI64:  case NVPTX::StoreParamI8:
-  case NVPTX::StoreParamS32I8:  case NVPTX::StoreParamU32I8:
-  case NVPTX::StoreParamS32I16:  case NVPTX::StoreParamU32I16:
-  case NVPTX::StoreRetvalF32:  case NVPTX::StoreRetvalF64:
-  case NVPTX::StoreRetvalI16:  case NVPTX::StoreRetvalI32:
-  case NVPTX::StoreRetvalI64:  case NVPTX::StoreRetvalI8:
-  case NVPTX::LastCallArgF32:  case NVPTX::LastCallArgF64:
-  case NVPTX::LastCallArgI16:  case NVPTX::LastCallArgI32:
-  case NVPTX::LastCallArgI32imm:  case NVPTX::LastCallArgI64:
-  case NVPTX::LastCallArgI8:  case NVPTX::LastCallArgParam:
-  case NVPTX::LoadParamMemF32:  case NVPTX::LoadParamMemF64:
-  case NVPTX::LoadParamMemI16:  case NVPTX::LoadParamMemI32:
-  case NVPTX::LoadParamMemI64:  case NVPTX::LoadParamMemI8:
-  case NVPTX::LoadParamRegF32:  case NVPTX::LoadParamRegF64:
-  case NVPTX::LoadParamRegI16:  case NVPTX::LoadParamRegI32:
-  case NVPTX::LoadParamRegI64:  case NVPTX::LoadParamRegI8:
-  case NVPTX::PrototypeInst:   case NVPTX::DBG_VALUE:
+  case NVPTX::DeclareParamInst:
+  case NVPTX::DeclareRetMemInst:
+  case NVPTX::DeclareRetRegInst:
+  case NVPTX::DeclareRetScalarInst:
+  case NVPTX::DeclareScalarParamInst:
+  case NVPTX::DeclareScalarRegInst:
+  case NVPTX::StoreParamF32:
+  case NVPTX::StoreParamF64:
+  case NVPTX::StoreParamI16:
+  case NVPTX::StoreParamI32:
+  case NVPTX::StoreParamI64:
+  case NVPTX::StoreParamI8:
+  case NVPTX::StoreParamS32I8:
+  case NVPTX::StoreParamU32I8:
+  case NVPTX::StoreParamS32I16:
+  case NVPTX::StoreParamU32I16:
+  case NVPTX::StoreRetvalF32:
+  case NVPTX::StoreRetvalF64:
+  case NVPTX::StoreRetvalI16:
+  case NVPTX::StoreRetvalI32:
+  case NVPTX::StoreRetvalI64:
+  case NVPTX::StoreRetvalI8:
+  case NVPTX::LastCallArgF32:
+  case NVPTX::LastCallArgF64:
+  case NVPTX::LastCallArgI16:
+  case NVPTX::LastCallArgI32:
+  case NVPTX::LastCallArgI32imm:
+  case NVPTX::LastCallArgI64:
+  case NVPTX::LastCallArgI8:
+  case NVPTX::LastCallArgParam:
+  case NVPTX::LoadParamMemF32:
+  case NVPTX::LoadParamMemF64:
+  case NVPTX::LoadParamMemI16:
+  case NVPTX::LoadParamMemI32:
+  case NVPTX::LoadParamMemI64:
+  case NVPTX::LoadParamMemI8:
+  case NVPTX::LoadParamRegF32:
+  case NVPTX::LoadParamRegF64:
+  case NVPTX::LoadParamRegI16:
+  case NVPTX::LoadParamRegI32:
+  case NVPTX::LoadParamRegI64:
+  case NVPTX::LoadParamRegI8:
+  case NVPTX::PrototypeInst:
+  case NVPTX::DBG_VALUE:
     return true;
   }
   return false;
@@ -2035,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
 }
 
-
 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   std::stringstream temp;
-  LineReader * reader = this->getReader(filename.str());
+  LineReader *reader = this->getReader(filename.str());
   temp << "\n//";
   temp << filename.str();
   temp << ":";
@@ -2049,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   this->OutStreamer.EmitRawText(Twine(temp.str()));
 }
 
-
 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
-  if (reader == NULL)  {
-    reader =  new LineReader(filename);
+  if (reader == NULL) {
+    reader = new LineReader(filename);
   }
 
   if (reader->fileName() != filename) {
     delete reader;
-    reader =  new LineReader(filename);
+    reader = new LineReader(filename);
   }
 
   return reader;
 }
 
-
-std::string
-LineReader::readLine(unsigned lineNum) {
+std::string LineReader::readLine(unsigned lineNum) {
   if (lineNum < theCurLine) {
     theCurLine = 0;
-    fstr.seekg(0,std::ios::beg);
+    fstr.seekg(0, std::ios::beg);
   }
   while (theCurLine < lineNum) {
-    fstr.getline(buff,500);
+    fstr.getline(buff, 500);
     theCurLine++;
   }
   return buff;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 42498f0bf7c5..6dc9fc0ffeff 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -43,15 +43,15 @@
 // This is defined in AsmPrinter.cpp.
 // Used to process the constant expressions in initializers.
 namespace nvptx {
-const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
-                                  llvm::AsmPrinter &AP) ;
+const llvm::MCExpr *
+LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
 }
 
 namespace llvm {
 
 class LineReader {
 private:
-  unsigned theCurLine ;
+  unsigned theCurLine;
   std::ifstream fstr;
   char buff[512];
   std::string theFileName;
@@ -63,17 +63,12 @@ class LineReader {
     theFileName = filename;
   }
   std::string fileName() { return theFileName; }
-  ~LineReader() {
-    fstr.close();
-  }
+  ~LineReader() { fstr.close(); }
   std::string readLine(unsigned line);
 };
 
-
-
 class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
-
   class AggBuffer {
     // Used to buffer the emitted string for initializing global
     // aggregates.
@@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     // Once we have this AggBuffer setup, we can choose how to print
     // it out.
   public:
-    unsigned size;   // size of the buffer in bytes
+    unsigned size;         // size of the buffer in bytes
     unsigned char *buffer; // the buffer
     unsigned numSymbols;   // number of symbol addresses
     SmallVector<unsigned, 4> symbolPosInBuffer;
@@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
   public:
     AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
-    :O(_O),AP(_AP) {
+        : O(_O), AP(_AP) {
       buffer = new unsigned char[_size];
       size = _size;
       curpos = 0;
       numSymbols = 0;
     }
-    ~AggBuffer() {
-      delete [] buffer;
-    }
+    ~AggBuffer() { delete[] buffer; }
     unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
-      assert((curpos+Num) <= size);
-      assert((curpos+Bytes) <= size);
-      for ( int i= 0; i < Num; ++i) {
+      assert((curpos + Num) <= size);
+      assert((curpos + Bytes) <= size);
+      for (int i = 0; i < Num; ++i) {
         buffer[curpos] = Ptr[i];
-        curpos ++;
+        curpos++;
       }
-      for ( int i=Num; i < Bytes ; ++i) {
+      for (int i = Num; i < Bytes; ++i) {
         buffer[curpos] = 0;
-        curpos ++;
+        curpos++;
       }
       return curpos;
     }
     unsigned addZeros(int Num) {
-      assert((curpos+Num) <= size);
-      for ( int i= 0; i < Num; ++i) {
+      assert((curpos + Num) <= size);
+      for (int i = 0; i < Num; ++i) {
         buffer[curpos] = 0;
-        curpos ++;
+        curpos++;
       }
       return curpos;
     }
@@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     void print() {
       if (numSymbols == 0) {
         // print out in bytes
-        for (unsigned i=0; i<size; i++) {
+        for (unsigned i = 0; i < size; i++) {
           if (i)
             O << ", ";
-          O << (unsigned int)buffer[i];
+          O << (unsigned int) buffer[i];
         }
       } else {
         // print out in 4-bytes or 8-bytes
@@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
         unsigned int nBytes = 4;
         if (AP.nvptxSubtarget.is64Bit())
           nBytes = 8;
-        for (pos=0; pos<size; pos+=nBytes) {
+        for (pos = 0; pos < size; pos += nBytes) {
           if (pos)
             O << ", ";
           if (pos == nextSymbolPos) {
@@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
             if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
               MCSymbol *Name = AP.Mang->getSymbol(GVar);
               O << *Name;
-            }
-            else if (ConstantExpr *Cexpr =
-                dyn_cast<ConstantExpr>(v)) {
+            } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
               O << *nvptx::LowerConstant(Cexpr, AP);
             } else
               llvm_unreachable("symbol type unknown");
             nSym++;
             if (nSym >= numSymbols)
-              nextSymbolPos = size+1;
+              nextSymbolPos = size + 1;
             else
               nextSymbolPos = symbolPosInBuffer[nSym];
-          } else
-            if (nBytes == 4)
-              O << *(unsigned int*)(buffer+pos);
-            else
-              O << *(unsigned long long*)(buffer+pos);
+          } else if (nBytes == 4)
+            O << *(unsigned int *)(buffer + pos);
+          else
+            O << *(unsigned long long *)(buffer + pos);
         }
       }
     }
@@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
   virtual void emitSrcInText(StringRef filename, unsigned line);
 
-private :
-  virtual const char *getPassName() const {
-    return "NVPTX Assembly Printer";
-  }
+private:
+  virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
 
   const Function *F;
   std::string CurrentFnName;
@@ -207,31 +195,28 @@ private :
 
   void printGlobalVariable(const GlobalVariable *GVar);
   void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                    const char *Modifier=0);
+                    const char *Modifier = 0);
   void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
-                     const char *Modifier=0);
-  void printVecModifiedImmediate(const MachineOperand &MO,
-                                 const char *Modifier, raw_ostream &O);
+                     const char *Modifier = 0);
+  void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
+                                 raw_ostream &O);
   void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                       const char *Modifier=0);
+                       const char *Modifier = 0);
   void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
   // definition autogenerated.
   void printInstruction(const MachineInstr *MI, raw_ostream &O);
-  void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
-                          bool=false);
+  void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
   void printParamName(int paramIndex, raw_ostream &O);
   void printParamName(Function::const_arg_iterator I, int paramIndex,
                       raw_ostream &O);
   void emitHeader(Module &M, raw_ostream &O);
-  void emitKernelFunctionDirectives(const Function& F,
-                                    raw_ostream &O) const;
+  void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
   void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
   void emitFunctionExternParamList(const MachineFunction &MF);
   void emitFunctionParamList(const Function *, raw_ostream &O);
   void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
   void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
-  void emitFunctionTempData(const MachineFunction &MF,
-                            unsigned &FrameSize);
+  void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
   bool isImageType(const Type *Ty);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
@@ -269,17 +254,16 @@ private :
   void recordAndEmitFilenames(Module &);
 
   void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
-  void emitPTXAddressSpace(unsigned int AddressSpace,
-                           raw_ostream &O) const;
-  std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
-  void printScalarConstant(Constant *CPV, raw_ostream &O) ;
-  void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
-  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
-  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
+  void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
+  std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+  void printScalarConstant(Constant *CPV, raw_ostream &O);
+  void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
+  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
 
   void printOperandProper(const MachineOperand &MO);
 
-  void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
+  void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
   void emitDeclarations(Module &, raw_ostream &O);
   void emitDeclaration(const Function *, raw_ostream &O);
 
@@ -289,10 +273,9 @@ private :
   LineReader *reader;
   LineReader *getReader(std::string);
 public:
-  NVPTXAsmPrinter(TargetMachine &TM,
-                  MCStreamer &Streamer)
-  : AsmPrinter(TM, Streamer),
-    nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+  NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer),
+        nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
     CurrentBankselLabelInBasicBlock = "";
     VRidGlobal2LocalMap = NULL;
     reader = NULL;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index bb2c55ceed8a..6533da5102b0 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -25,9 +25,7 @@
 
 using namespace llvm;
 
-bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
-  return true;
-}
+bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
 
 void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
   if (MF.getFrameInfo()->hasStackObjects()) {
@@ -42,46 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
       // mov %SPL, %depot;
       // cvta.local %SP, %SPL;
       if (is64bit) {
-        MachineInstr *MI = BuildMI(MBB, MBBI, dl,
-                               tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
-                                   NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
-        BuildMI(MBB, MI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
-        .addReg(NVPTX::VRDepot);
+        MachineInstr *MI = BuildMI(
+            MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+            NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+        BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+                NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
       } else {
-        MachineInstr *MI = BuildMI(MBB, MBBI, dl,
-                                  tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
-                                   NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
-        BuildMI(MBB, MI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
-        .addReg(NVPTX::VRDepot);
+        MachineInstr *MI = BuildMI(
+            MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+            NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+        BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+                NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
       }
-    }
-    else {
+    } else {
       // mov %SP, %depot;
       if (is64bit)
-        BuildMI(MBB, MBBI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
-                .addReg(NVPTX::VRDepot);
+        BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+                NVPTX::VRFrame).addReg(NVPTX::VRDepot);
       else
-        BuildMI(MBB, MBBI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
-                .addReg(NVPTX::VRDepot);
+        BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+                NVPTX::VRFrame).addReg(NVPTX::VRDepot);
     }
   }
 }
 
 void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {
-}
+                                      MachineBasicBlock &MBB) const {}
 
 // This function eliminates ADJCALLSTACKDOWN,
 // ADJCALLSTACKUP pseudo instructions
-void NVPTXFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
+void NVPTXFrameLowering::eliminateCallFramePseudoInstr(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator I) const {
   // Simply discard ADJCALLSTACKDOWN,
   // ADJCALLSTACKUP instructions.
   MBB.erase(I);
 }
-
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index d34e7bec1da0..819f1dd3f4be 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -16,7 +16,6 @@
 
 #include "llvm/Target/TargetFrameLowering.h"
 
-
 namespace llvm {
 class NVPTXTargetMachine;
 
@@ -26,13 +25,12 @@ class NVPTXFrameLowering : public TargetFrameLowering {
 
 public:
   explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
-  : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
-    tm(_tm), is64bit(_is64bit) {}
+      : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
+        is64bit(_is64bit) {}
 
   virtual bool hasFP(const MachineFunction &MF) const;
   virtual void emitPrologue(MachineFunction &MF) const;
-  virtual void emitEpilogue(MachineFunction &MF,
-                            MachineBasicBlock &MBB) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 481f13afd187..0f4c8dbce5e3 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "NVPTXISelDAGToDAG.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instructions.h"
@@ -26,27 +25,22 @@
 
 using namespace llvm;
 
-
-static cl::opt<bool>
-UseFMADInstruction("nvptx-mad-enable",
-                   cl::ZeroOrMore,
-                cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
-                   cl::init(false));
+static cl::opt<bool> UseFMADInstruction(
+    "nvptx-mad-enable", cl::ZeroOrMore,
+    cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
+    cl::init(false));
 
 static cl::opt<int>
-FMAContractLevel("nvptx-fma-level",
-                 cl::ZeroOrMore,
+FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
                  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
-                     " 1: do it  2: do it aggressively"),
-                     cl::init(2));
-
+                          " 1: do it  2: do it aggressively"),
+                 cl::init(2));
 
-static cl::opt<int>
-UsePrecDivF32("nvptx-prec-divf32",
-              cl::ZeroOrMore,
-             cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
-                  " IEEE Compliant F32 div.rnd if avaiable."),
-                  cl::init(2));
+static cl::opt<int> UsePrecDivF32(
+    "nvptx-prec-divf32", cl::ZeroOrMore,
+    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+             " IEEE Compliant F32 div.rnd if avaiable."),
+    cl::init(2));
 
 /// createNVPTXISelDag - This pass converts a legalized DAG into a
 /// NVPTX-specific DAG, ready for instruction scheduling.
@@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
   return new NVPTXDAGToDAGISel(TM, OptLevel);
 }
 
-
 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
                                      CodeGenOpt::Level OptLevel)
-: SelectionDAGISel(tm, OptLevel),
-  Subtarget(tm.getSubtarget<NVPTXSubtarget>())
-{
+    : SelectionDAGISel(tm, OptLevel),
+      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
   // Always do fma.f32 fpcontract if the target supports the instruction.
   // Always do fma.f64 fpcontract if the target supports the instruction.
   // Do mad.f32 is nvptx-mad-enable is specified and the target does not
   // support fma.f32.
 
   doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
-  doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
-      (FMAContractLevel>=1);
-  doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
-      (FMAContractLevel>=1);
-  doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
-      (FMAContractLevel==2);
-  doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
-      (FMAContractLevel==2);
+  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
+  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
+  doFMAF32AGG =
+      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
+  doFMAF64AGG =
+      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
 
   allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
 
@@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
 
 /// Select - Select instructions not customized! Used for
 /// expanded, promoted and normal instructions.
-SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
 
   if (N->isMachineOpcode())
-    return NULL;   // Already selected.
+    return NULL; // Already selected.
 
   SDNode *ResNode = NULL;
   switch (N->getOpcode()) {
@@ -119,30 +109,34 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
   case NVPTXISD::StoreV4:
     ResNode = SelectStoreVector(N);
     break;
-  default: break;
+  default:
+    break;
   }
   if (ResNode)
     return ResNode;
   return SelectCode(N);
 }
 
-
-static unsigned int
-getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
-{
+static unsigned int getCodeAddrSpace(MemSDNode *N,
+                                     const NVPTXSubtarget &Subtarget) {
   const Value *Src = N->getSrcValue();
   if (!Src)
     return NVPTX::PTXLdStInstCode::LOCAL;
 
   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
     switch (PT->getAddressSpace()) {
-    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
-    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
-    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+    case llvm::ADDRESS_SPACE_LOCAL:
+      return NVPTX::PTXLdStInstCode::LOCAL;
+    case llvm::ADDRESS_SPACE_GLOBAL:
+      return NVPTX::PTXLdStInstCode::GLOBAL;
+    case llvm::ADDRESS_SPACE_SHARED:
+      return NVPTX::PTXLdStInstCode::SHARED;
     case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
       return NVPTX::PTXLdStInstCode::CONSTANT;
-    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
-    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+    case llvm::ADDRESS_SPACE_GENERIC:
+      return NVPTX::PTXLdStInstCode::GENERIC;
+    case llvm::ADDRESS_SPACE_PARAM:
+      return NVPTX::PTXLdStInstCode::PARAM;
     case llvm::ADDRESS_SPACE_CONST:
       // If the arch supports generic address space, translate it to GLOBAL
       // for correctness.
@@ -153,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
         return NVPTX::PTXLdStInstCode::GLOBAL;
       else
         return NVPTX::PTXLdStInstCode::CONSTANT;
-    default: break;
+    default:
+      break;
     }
   }
   return NVPTX::PTXLdStInstCode::LOCAL;
 }
 
-
-SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   LoadSDNode *LD = cast<LoadSDNode>(N);
   EVT LoadedVT = LD->getMemoryVT();
-  SDNode *NVPTXLD= NULL;
+  SDNode *NVPTXLD = NULL;
 
   // do not support pre/post inc/dec
   if (LD->isIndexed())
@@ -204,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
   //          type is integer
   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
   MVT ScalarVT = SimpleVT.getScalarType();
-  unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned fromTypeWidth = ScalarVT.getSizeInBits();
   unsigned int fromType;
   if ((LD->getExtensionType() == ISD::SEXTLOAD))
     fromType = NVPTX::PTXLdStInstCode::Signed;
@@ -223,105 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
 
   if (SelectDirectAddr(N1, Addr)) {
     switch (TargetVT) {
-    case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
-    case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
-    case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
-    case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
-    case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
-    case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::LD_i8_avar;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::LD_i16_avar;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::LD_i32_avar;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::LD_i64_avar;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::LD_f32_avar;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::LD_f64_avar;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Addr, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 7);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRsi64(N1.getNode(), N1, Base, Offset):
-      SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Addr, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
     switch (TargetVT) {
-    case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
-    case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
-    case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
-    case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
-    case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
-    case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::LD_i8_asi;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::LD_i16_asi;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::LD_i32_asi;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::LD_i64_asi;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::LD_f32_asi;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::LD_f64_asi;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 8);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRri64(N1.getNode(), N1, Base, Offset):
-      SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
     if (Subtarget.is64Bit()) {
       switch (TargetVT) {
-      case MVT::i8:    Opcode = NVPTX::LD_i8_ari_64; break;
-      case MVT::i16:   Opcode = NVPTX::LD_i16_ari_64; break;
-      case MVT::i32:   Opcode = NVPTX::LD_i32_ari_64; break;
-      case MVT::i64:   Opcode = NVPTX::LD_i64_ari_64; break;
-      case MVT::f32:   Opcode = NVPTX::LD_f32_ari_64; break;
-      case MVT::f64:   Opcode = NVPTX::LD_f64_ari_64; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_ari_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_ari_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_ari_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_ari_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_ari_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_ari_64;
+        break;
+      default:
+        return NULL;
       }
     } else {
       switch (TargetVT) {
-      case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
-      case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
-      case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
-      case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
-      case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
-      case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_ari;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_ari;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_ari;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_ari;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_ari;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_ari;
+        break;
+      default:
+        return NULL;
       }
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 8);
-  }
-  else {
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+  } else {
     if (Subtarget.is64Bit()) {
       switch (TargetVT) {
-      case MVT::i8:    Opcode = NVPTX::LD_i8_areg_64; break;
-      case MVT::i16:   Opcode = NVPTX::LD_i16_areg_64; break;
-      case MVT::i32:   Opcode = NVPTX::LD_i32_areg_64; break;
-      case MVT::i64:   Opcode = NVPTX::LD_i64_areg_64; break;
-      case MVT::f32:   Opcode = NVPTX::LD_f32_areg_64; break;
-      case MVT::f64:   Opcode = NVPTX::LD_f64_areg_64; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_areg_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_areg_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_areg_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_areg_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_areg_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_areg_64;
+        break;
+      default:
+        return NULL;
       }
     } else {
       switch (TargetVT) {
-      case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
-      case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
-      case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
-      case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
-      case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
-      case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_areg;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_areg;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_areg;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_areg;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_areg;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_areg;
+        break;
+      default:
+        return NULL;
       }
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      N1, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 7);
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), N1, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   }
 
   if (NVPTXLD != NULL) {
@@ -344,9 +399,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
   MemSDNode *MemSD = cast<MemSDNode>(N);
   EVT LoadedVT = MemSD->getMemoryVT();
 
-
   if (!LoadedVT.isSimple())
-     return NULL;
+    return NULL;
 
   // Address Space Setting
   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
@@ -369,11 +423,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
   //          type is integer
   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
   MVT ScalarVT = SimpleVT.getScalarType();
-  unsigned FromTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned FromTypeWidth = ScalarVT.getSizeInBits();
   unsigned int FromType;
   // The last operand holds the original LoadSDNode::getExtensionType() value
-  unsigned ExtensionType =
-    cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
+  unsigned ExtensionType = cast<ConstantSDNode>(
+      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
   if (ExtensionType == ISD::SEXTLOAD)
     FromType = NVPTX::PTXLdStInstCode::Signed;
   else if (ScalarVT.isFloatingPoint())
@@ -384,198 +438,329 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
   unsigned VecType;
 
   switch (N->getOpcode()) {
-  case NVPTXISD::LoadV2:  VecType = NVPTX::PTXLdStInstCode::V2; break;
-  case NVPTXISD::LoadV4:  VecType = NVPTX::PTXLdStInstCode::V4; break;
-  default: return NULL;
+  case NVPTXISD::LoadV2:
+    VecType = NVPTX::PTXLdStInstCode::V2;
+    break;
+  case NVPTXISD::LoadV4:
+    VecType = NVPTX::PTXLdStInstCode::V4;
+    break;
+  default:
+    return NULL;
   }
 
   EVT EltVT = N->getValueType(0);
 
   if (SelectDirectAddr(Op1, Addr)) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::LoadV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_avar; break;
-      case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_avar; break;
-      case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_avar; break;
-      case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_avar; break;
-      case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_avar; break;
-      case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_avar; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v2_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v2_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v2_avar;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LDV_i64_v2_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v2_avar;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LDV_f64_v2_avar;
+        break;
       }
       break;
     case NVPTXISD::LoadV4:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_avar; break;
-      case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_avar; break;
-      case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_avar; break;
-      case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_avar; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v4_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v4_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v4_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v4_avar;
+        break;
       }
       break;
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile),
-                      getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType),
-                      getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth),
-                      Addr, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
-  } else if (Subtarget.is64Bit()?
-             SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
-             SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Addr, Chain };
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::LoadV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_asi; break;
-      case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_asi; break;
-      case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_asi; break;
-      case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_asi; break;
-      case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_asi; break;
-      case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_asi; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v2_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v2_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v2_asi;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LDV_i64_v2_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v2_asi;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LDV_f64_v2_asi;
+        break;
       }
       break;
     case NVPTXISD::LoadV4:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_asi; break;
-      case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_asi; break;
-      case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_asi; break;
-      case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_asi; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v4_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v4_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v4_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v4_asi;
+        break;
       }
       break;
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile),
-                      getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType),
-                      getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth),
-                      Base, Offset, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
-  } else if (Subtarget.is64Bit()?
-             SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
-             SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::LoadV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_ari_64; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_ari_64; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_ari_64; break;
-        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_ari_64; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_ari_64; break;
-        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_ari_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_ari_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_ari_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_ari_64;
+          break;
         }
         break;
       case NVPTXISD::LoadV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_ari_64; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_ari_64; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_ari_64; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_ari_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_ari_64;
+          break;
         }
         break;
       }
     } else {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::LoadV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_ari; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_ari; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_ari; break;
-        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_ari; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_ari; break;
-        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_ari; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_ari;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_ari;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_ari;
+          break;
         }
         break;
       case NVPTXISD::LoadV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_ari; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_ari; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_ari; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_ari; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_ari;
+          break;
         }
         break;
       }
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile),
-                      getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType),
-                      getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth),
-                      Base, Offset, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
 
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::LoadV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_areg_64; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_areg_64; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_areg_64; break;
-        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_areg_64; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_areg_64; break;
-        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_areg_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_areg_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_areg_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_areg_64;
+          break;
         }
         break;
       case NVPTXISD::LoadV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_areg_64; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_areg_64; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_areg_64; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_areg_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_areg_64;
+          break;
         }
         break;
       }
     } else {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::LoadV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_areg; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_areg; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_areg; break;
-        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_areg; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_areg; break;
-        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_areg; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_areg;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_areg;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_areg;
+          break;
         }
         break;
       case NVPTXISD::LoadV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_areg; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_areg; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_areg; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_areg; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_areg;
+          break;
         }
         break;
       }
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile),
-                      getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType),
-                      getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth),
-                      Op1, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Op1, Chain };
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   }
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -598,96 +783,186 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
   // Select opcode
   if (Subtarget.is64Bit()) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::LDGV2:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
-      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
-      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
+        break;
       }
       break;
     case NVPTXISD::LDGV4:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
+        break;
       }
       break;
     case NVPTXISD::LDUV2:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
-      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
-      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
+        break;
       }
       break;
     case NVPTXISD::LDUV4:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
+        break;
       }
       break;
     }
   } else {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::LDGV2:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
-      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
-      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
+        break;
       }
       break;
     case NVPTXISD::LDGV4:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
+        break;
       }
       break;
     case NVPTXISD::LDUV2:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
-      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
-      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
+        break;
       }
       break;
     case NVPTXISD::LDUV4:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
+        break;
       }
       break;
     }
   }
 
   SDValue Ops[] = { Op1, Chain };
-  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
@@ -696,8 +971,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
   return LD;
 }
 
-
-SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   StoreSDNode *ST = cast<StoreSDNode>(N);
   EVT StoreVT = ST->getMemoryVT();
@@ -738,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   // - for integer type, always use 'u'
   //
   MVT ScalarVT = SimpleVT.getScalarType();
-  unsigned toTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned toTypeWidth = ScalarVT.getSizeInBits();
   unsigned int toType;
   if (ScalarVT.isFloatingPoint())
     toType = NVPTX::PTXLdStInstCode::Float;
@@ -757,108 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
 
   if (SelectDirectAddr(N2, Addr)) {
     switch (SourceVT) {
-    case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
-    case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
-    case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
-    case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
-    case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
-    case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::ST_i8_avar;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::ST_i16_avar;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::ST_i32_avar;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::ST_i64_avar;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::ST_f32_avar;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::ST_f64_avar;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Addr, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 8);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRsi64(N2.getNode(), N2, Base, Offset):
-      SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Addr, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
     switch (SourceVT) {
-    case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
-    case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
-    case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
-    case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
-    case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
-    case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::ST_i8_asi;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::ST_i16_asi;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::ST_i32_asi;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::ST_i64_asi;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::ST_f32_asi;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::ST_f64_asi;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 9);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRri64(N2.getNode(), N2, Base, Offset):
-      SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Base, Offset, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
     if (Subtarget.is64Bit()) {
       switch (SourceVT) {
-      case MVT::i8:    Opcode = NVPTX::ST_i8_ari_64; break;
-      case MVT::i16:   Opcode = NVPTX::ST_i16_ari_64; break;
-      case MVT::i32:   Opcode = NVPTX::ST_i32_ari_64; break;
-      case MVT::i64:   Opcode = NVPTX::ST_i64_ari_64; break;
-      case MVT::f32:   Opcode = NVPTX::ST_f32_ari_64; break;
-      case MVT::f64:   Opcode = NVPTX::ST_f64_ari_64; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_ari_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_ari_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_ari_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_ari_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_ari_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_ari_64;
+        break;
+      default:
+        return NULL;
       }
     } else {
       switch (SourceVT) {
-      case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
-      case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
-      case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
-      case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
-      case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
-      case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_ari;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_ari;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_ari;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_ari;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_ari;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_ari;
+        break;
+      default:
+        return NULL;
       }
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 9);
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Base, Offset, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   } else {
     if (Subtarget.is64Bit()) {
       switch (SourceVT) {
-      case MVT::i8:    Opcode = NVPTX::ST_i8_areg_64; break;
-      case MVT::i16:   Opcode = NVPTX::ST_i16_areg_64; break;
-      case MVT::i32:   Opcode = NVPTX::ST_i32_areg_64; break;
-      case MVT::i64:   Opcode = NVPTX::ST_i64_areg_64; break;
-      case MVT::f32:   Opcode = NVPTX::ST_f32_areg_64; break;
-      case MVT::f64:   Opcode = NVPTX::ST_f64_areg_64; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_areg_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_areg_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_areg_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_areg_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_areg_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_areg_64;
+        break;
+      default:
+        return NULL;
       }
     } else {
       switch (SourceVT) {
-      case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
-      case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
-      case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
-      case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
-      case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
-      case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_areg;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_areg;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_areg;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_areg;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_areg;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_areg;
+        break;
+      default:
+        return NULL;
       }
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      N2, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 8);
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), N2, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   }
 
   if (NVPTXST != NULL) {
@@ -901,14 +1233,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
   // - for integer type, always use 'u'
   assert(StoreVT.isSimple() && "Store value is not simple");
   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
-  unsigned ToTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
   unsigned ToType;
   if (ScalarVT.isFloatingPoint())
     ToType = NVPTX::PTXLdStInstCode::Float;
   else
     ToType = NVPTX::PTXLdStInstCode::Unsigned;
 
-
   SmallVector<SDValue, 12> StOps;
   SDValue N2;
   unsigned VecType;
@@ -928,7 +1259,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
     StOps.push_back(N->getOperand(4));
     N2 = N->getOperand(5);
     break;
-  default: return NULL;
+  default:
+    return NULL;
   }
 
   StOps.push_back(getI32Imm(IsVolatile));
@@ -939,105 +1271,197 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
 
   if (SelectDirectAddr(N2, Addr)) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::StoreV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::STV_i8_v2_avar; break;
-      case MVT::i16:  Opcode = NVPTX::STV_i16_v2_avar; break;
-      case MVT::i32:  Opcode = NVPTX::STV_i32_v2_avar; break;
-      case MVT::i64:  Opcode = NVPTX::STV_i64_v2_avar; break;
-      case MVT::f32:  Opcode = NVPTX::STV_f32_v2_avar; break;
-      case MVT::f64:  Opcode = NVPTX::STV_f64_v2_avar; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v2_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v2_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v2_avar;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::STV_i64_v2_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v2_avar;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::STV_f64_v2_avar;
+        break;
       }
       break;
     case NVPTXISD::StoreV4:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::STV_i8_v4_avar; break;
-      case MVT::i16:  Opcode = NVPTX::STV_i16_v4_avar; break;
-      case MVT::i32:  Opcode = NVPTX::STV_i32_v4_avar; break;
-      case MVT::f32:  Opcode = NVPTX::STV_f32_v4_avar; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v4_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v4_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v4_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v4_avar;
+        break;
       }
       break;
     }
     StOps.push_back(Addr);
-  } else if (Subtarget.is64Bit()?
-             SelectADDRsi64(N2.getNode(), N2, Base, Offset):
-             SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::StoreV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::STV_i8_v2_asi; break;
-      case MVT::i16:  Opcode = NVPTX::STV_i16_v2_asi; break;
-      case MVT::i32:  Opcode = NVPTX::STV_i32_v2_asi; break;
-      case MVT::i64:  Opcode = NVPTX::STV_i64_v2_asi; break;
-      case MVT::f32:  Opcode = NVPTX::STV_f32_v2_asi; break;
-      case MVT::f64:  Opcode = NVPTX::STV_f64_v2_asi; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v2_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v2_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v2_asi;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::STV_i64_v2_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v2_asi;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::STV_f64_v2_asi;
+        break;
       }
       break;
     case NVPTXISD::StoreV4:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::STV_i8_v4_asi; break;
-      case MVT::i16:  Opcode = NVPTX::STV_i16_v4_asi; break;
-      case MVT::i32:  Opcode = NVPTX::STV_i32_v4_asi; break;
-      case MVT::f32:  Opcode = NVPTX::STV_f32_v4_asi; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v4_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v4_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v4_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v4_asi;
+        break;
       }
       break;
     }
     StOps.push_back(Base);
     StOps.push_back(Offset);
-  } else if (Subtarget.is64Bit()?
-             SelectADDRri64(N2.getNode(), N2, Base, Offset):
-             SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::StoreV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_ari_64; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_ari_64; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_ari_64; break;
-        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_ari_64; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_ari_64; break;
-        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_ari_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_ari_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_ari_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_ari_64;
+          break;
         }
         break;
       case NVPTXISD::StoreV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_ari_64; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_ari_64; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_ari_64; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_ari_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_ari_64;
+          break;
         }
         break;
       }
     } else {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::StoreV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_ari; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_ari; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_ari; break;
-        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_ari; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_ari; break;
-        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_ari; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_ari;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_ari;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_ari;
+          break;
         }
         break;
       case NVPTXISD::StoreV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_ari; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_ari; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_ari; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_ari; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_ari;
+          break;
         }
         break;
       }
@@ -1047,49 +1471,95 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
   } else {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::StoreV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_areg_64; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_areg_64; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_areg_64; break;
-        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_areg_64; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_areg_64; break;
-        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_areg_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_areg_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_areg_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_areg_64;
+          break;
         }
         break;
       case NVPTXISD::StoreV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_areg_64; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_areg_64; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_areg_64; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_areg_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_areg_64;
+          break;
         }
         break;
       }
     } else {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::StoreV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_areg; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_areg; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_areg; break;
-        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_areg; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_areg; break;
-        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_areg; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_areg;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_areg;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_areg;
+          break;
         }
         break;
       case NVPTXISD::StoreV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_areg; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_areg; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_areg; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_areg; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_areg;
+          break;
         }
         break;
       }
@@ -1099,7 +1569,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
 
   StOps.push_back(Chain);
 
-  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
@@ -1112,8 +1582,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
 // A direct address could be a globaladdress or externalsymbol.
 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
   // Return true if TGA or ES.
-  if (N.getOpcode() == ISD::TargetGlobalAddress
-      || N.getOpcode() == ISD::TargetExternalSymbol) {
+  if (N.getOpcode() == ISD::TargetGlobalAddress ||
+      N.getOpcode() == ISD::TargetExternalSymbol) {
     Address = N;
     return true;
   }
@@ -1131,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
 }
 
 // symbol+offset
-bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
-                                         SDValue &Base, SDValue &Offset,
-                                         MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
+    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   if (Addr.getOpcode() == ISD::ADD) {
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      SDValue base=Addr.getOperand(0);
+      SDValue base = Addr.getOperand(0);
       if (SelectDirectAddr(base, Base)) {
         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
         return true;
@@ -1159,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
 }
 
 // register+offset
-bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
-                                         SDValue &Base, SDValue &Offset,
-                                         MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRri_imp(
+    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
     Offset = CurDAG->getTargetConstant(0, mvt);
@@ -1169,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
   }
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
       Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // direct calls.
+    return false; // direct calls.
 
   if (Addr.getOpcode() == ISD::ADD) {
     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
@@ -1177,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
     }
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
       if (FrameIndexSDNode *FIN =
-          dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
         // Constant offset from frame ref.
         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
       else
@@ -1209,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   // (See SelectionDAGNodes.h). So we need to check for both.
   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
     Src = mN->getSrcValue();
-  }
-  else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
+  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
     Src = mN->getSrcValue();
   }
   if (!Src)
@@ -1222,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
 
 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 /// inline asm expressions.
-bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                                     char ConstraintCode,
-                                                 std::vector<SDValue> &OutOps) {
+bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
+    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
   SDValue Op0, Op1;
   switch (ConstraintCode) {
-  default: return true;
-  case 'm':   // memory
+  default:
+    return true;
+  case 'm': // memory
     if (SelectDirectAddr(Op, Op0)) {
       OutOps.push_back(Op0);
       OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
@@ -1251,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
 // pattern matcher inserts a bunch of IMOVi8rr to convert
 // the imm to i8imm, and this causes instruction selection
 // to fail.
-bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
-                                   SDValue &Retval) {
-  if (!(N.getOpcode() == ISD::UNDEF) &&
-      !(N.getOpcode() == ISD::Constant))
+bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
+  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
     return false;
 
   if (N.getOpcode() == ISD::UNDEF)
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 4ec924117a0b..70e8e464297d 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -64,11 +64,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
 
   const NVPTXSubtarget &Subtarget;
 
-  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                            char ConstraintCode,
-                                            std::vector<SDValue> &OutOps);
+  virtual bool SelectInlineAsmMemoryOperand(
+      const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
 private:
-  // Include the pieces autogenerated from the target description.
+// Include the pieces autogenerated from the target description.
 #include "NVPTXGenDAGISel.inc"
 
   SDNode *Select(SDNode *N);
@@ -99,7 +98,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
   bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
                       SDValue &Offset);
 
-
   bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
 
   bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index e9a9fbfd04fb..6e01a5a82071 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "NVPTXISelLowering.h"
 #include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
@@ -44,14 +43,14 @@ using namespace llvm;
 
 static unsigned int uniqueCallSite = 0;
 
-static cl::opt<bool>
-sched4reg("nvptx-sched4reg",
-          cl::desc("NVPTX Specific: schedule for register pressue"),
-          cl::init(false));
+static cl::opt<bool> sched4reg(
+    "nvptx-sched4reg",
+    cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
 
 static bool IsPTXVectorType(MVT VT) {
   switch (VT.SimpleTy) {
-  default: return false;
+  default:
+    return false;
   case MVT::v2i8:
   case MVT::v4i8:
   case MVT::v2i16:
@@ -62,22 +61,21 @@ static bool IsPTXVectorType(MVT VT) {
   case MVT::v2f32:
   case MVT::v4f32:
   case MVT::v2f64:
-  return true;
+    return true;
   }
 }
 
 // NVPTXTargetLowering Constructor.
 NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
-: TargetLowering(TM, new NVPTXTargetObjectFile()),
-  nvTM(&TM),
-  nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+    : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+      nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
 
   // always lower memset, memcpy, and memmove intrinsics to load/store
   // instructions, rather
   // then generating calls to memset, mempcy or memmove.
-  MaxStoresPerMemset = (unsigned)0xFFFFFFFF;
-  MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
-  MaxStoresPerMemmove = (unsigned)0xFFFFFFFF;
+  MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
+  MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
+  MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
 
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 
@@ -100,52 +98,50 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
 
   // Operations not directly supported by NVPTX.
-  setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::f32, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::f64, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i1,  Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i8,  Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i16, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i32, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i64, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i8, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i64, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
   if (nvptxSubtarget.hasROT64()) {
-    setOperationAction(ISD::ROTL , MVT::i64, Legal);
-    setOperationAction(ISD::ROTR , MVT::i64, Legal);
-  }
-  else {
-    setOperationAction(ISD::ROTL , MVT::i64, Expand);
-    setOperationAction(ISD::ROTR , MVT::i64, Expand);
+    setOperationAction(ISD::ROTL, MVT::i64, Legal);
+    setOperationAction(ISD::ROTR, MVT::i64, Legal);
+  } else {
+    setOperationAction(ISD::ROTL, MVT::i64, Expand);
+    setOperationAction(ISD::ROTR, MVT::i64, Expand);
   }
   if (nvptxSubtarget.hasROT32()) {
-    setOperationAction(ISD::ROTL , MVT::i32, Legal);
-    setOperationAction(ISD::ROTR , MVT::i32, Legal);
-  }
-  else {
-    setOperationAction(ISD::ROTL , MVT::i32, Expand);
-    setOperationAction(ISD::ROTR , MVT::i32, Expand);
+    setOperationAction(ISD::ROTL, MVT::i32, Legal);
+    setOperationAction(ISD::ROTR, MVT::i32, Legal);
+  } else {
+    setOperationAction(ISD::ROTL, MVT::i32, Expand);
+    setOperationAction(ISD::ROTR, MVT::i32, Expand);
   }
 
-  setOperationAction(ISD::ROTL , MVT::i16, Expand);
-  setOperationAction(ISD::ROTR , MVT::i16, Expand);
-  setOperationAction(ISD::ROTL , MVT::i8, Expand);
-  setOperationAction(ISD::ROTR , MVT::i8, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i16, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i32, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+  setOperationAction(ISD::ROTL, MVT::i16, Expand);
+  setOperationAction(ISD::ROTR, MVT::i16, Expand);
+  setOperationAction(ISD::ROTL, MVT::i8, Expand);
+  setOperationAction(ISD::ROTR, MVT::i8, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i16, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i64, Expand);
 
   // Indirect branch is not supported.
   // This also disables Jump Table creation.
-  setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
-  setOperationAction(ISD::BRIND,             MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
 
-  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
-  setOperationAction(ISD::GlobalAddress   , MVT::i64  , Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 
   // We want to legalize constant related memmove and memcopy
   // intrinsics.
@@ -168,16 +164,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
   setTruncStoreAction(MVT::i8, MVT::i1, Expand);
 
   // This is legal in NVPTX
-  setOperationAction(ISD::ConstantFP,         MVT::f64, Legal);
-  setOperationAction(ISD::ConstantFP,         MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 
   // TRAP can be lowered to PTX trap
-  setOperationAction(ISD::TRAP,               MVT::Other, Legal);
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
   // Register custom handling for vector loads/stores
-  for (int i = MVT::FIRST_VECTOR_VALUETYPE;
-       i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT VT = (MVT::SimpleValueType)i;
+  for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
+       ++i) {
+    MVT VT = (MVT::SimpleValueType) i;
     if (IsPTXVectorType(VT)) {
       setOperationAction(ISD::LOAD, VT, Custom);
       setOperationAction(ISD::STORE, VT, Custom);
@@ -190,49 +186,86 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
   computeRegisterProperties();
 }
 
-
 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
-  default: return 0;
-  case NVPTXISD::CALL:            return "NVPTXISD::CALL";
-  case NVPTXISD::RET_FLAG:        return "NVPTXISD::RET_FLAG";
-  case NVPTXISD::Wrapper:         return "NVPTXISD::Wrapper";
-  case NVPTXISD::NVBuiltin:       return "NVPTXISD::NVBuiltin";
-  case NVPTXISD::DeclareParam:    return "NVPTXISD::DeclareParam";
+  default:
+    return 0;
+  case NVPTXISD::CALL:
+    return "NVPTXISD::CALL";
+  case NVPTXISD::RET_FLAG:
+    return "NVPTXISD::RET_FLAG";
+  case NVPTXISD::Wrapper:
+    return "NVPTXISD::Wrapper";
+  case NVPTXISD::NVBuiltin:
+    return "NVPTXISD::NVBuiltin";
+  case NVPTXISD::DeclareParam:
+    return "NVPTXISD::DeclareParam";
   case NVPTXISD::DeclareScalarParam:
     return "NVPTXISD::DeclareScalarParam";
-  case NVPTXISD::DeclareRet:      return "NVPTXISD::DeclareRet";
-  case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
-  case NVPTXISD::PrintCall:       return "NVPTXISD::PrintCall";
-  case NVPTXISD::LoadParam:       return "NVPTXISD::LoadParam";
-  case NVPTXISD::StoreParam:      return "NVPTXISD::StoreParam";
-  case NVPTXISD::StoreParamS32:   return "NVPTXISD::StoreParamS32";
-  case NVPTXISD::StoreParamU32:   return "NVPTXISD::StoreParamU32";
-  case NVPTXISD::MoveToParam:     return "NVPTXISD::MoveToParam";
-  case NVPTXISD::CallArgBegin:    return "NVPTXISD::CallArgBegin";
-  case NVPTXISD::CallArg:         return "NVPTXISD::CallArg";
-  case NVPTXISD::LastCallArg:     return "NVPTXISD::LastCallArg";
-  case NVPTXISD::CallArgEnd:      return "NVPTXISD::CallArgEnd";
-  case NVPTXISD::CallVoid:        return "NVPTXISD::CallVoid";
-  case NVPTXISD::CallVal:         return "NVPTXISD::CallVal";
-  case NVPTXISD::CallSymbol:      return "NVPTXISD::CallSymbol";
-  case NVPTXISD::Prototype:       return "NVPTXISD::Prototype";
-  case NVPTXISD::MoveParam:       return "NVPTXISD::MoveParam";
-  case NVPTXISD::MoveRetval:      return "NVPTXISD::MoveRetval";
-  case NVPTXISD::MoveToRetval:    return "NVPTXISD::MoveToRetval";
-  case NVPTXISD::StoreRetval:     return "NVPTXISD::StoreRetval";
-  case NVPTXISD::PseudoUseParam:  return "NVPTXISD::PseudoUseParam";
-  case NVPTXISD::RETURN:          return "NVPTXISD::RETURN";
-  case NVPTXISD::CallSeqBegin:    return "NVPTXISD::CallSeqBegin";
-  case NVPTXISD::CallSeqEnd:      return "NVPTXISD::CallSeqEnd";
-  case NVPTXISD::LoadV2:          return "NVPTXISD::LoadV2";
-  case NVPTXISD::LoadV4:          return "NVPTXISD::LoadV4";
-  case NVPTXISD::LDGV2:           return "NVPTXISD::LDGV2";
-  case NVPTXISD::LDGV4:           return "NVPTXISD::LDGV4";
-  case NVPTXISD::LDUV2:           return "NVPTXISD::LDUV2";
-  case NVPTXISD::LDUV4:           return "NVPTXISD::LDUV4";
-  case NVPTXISD::StoreV2:         return "NVPTXISD::StoreV2";
-  case NVPTXISD::StoreV4:         return "NVPTXISD::StoreV4";
+  case NVPTXISD::DeclareRet:
+    return "NVPTXISD::DeclareRet";
+  case NVPTXISD::DeclareRetParam:
+    return "NVPTXISD::DeclareRetParam";
+  case NVPTXISD::PrintCall:
+    return "NVPTXISD::PrintCall";
+  case NVPTXISD::LoadParam:
+    return "NVPTXISD::LoadParam";
+  case NVPTXISD::StoreParam:
+    return "NVPTXISD::StoreParam";
+  case NVPTXISD::StoreParamS32:
+    return "NVPTXISD::StoreParamS32";
+  case NVPTXISD::StoreParamU32:
+    return "NVPTXISD::StoreParamU32";
+  case NVPTXISD::MoveToParam:
+    return "NVPTXISD::MoveToParam";
+  case NVPTXISD::CallArgBegin:
+    return "NVPTXISD::CallArgBegin";
+  case NVPTXISD::CallArg:
+    return "NVPTXISD::CallArg";
+  case NVPTXISD::LastCallArg:
+    return "NVPTXISD::LastCallArg";
+  case NVPTXISD::CallArgEnd:
+    return "NVPTXISD::CallArgEnd";
+  case NVPTXISD::CallVoid:
+    return "NVPTXISD::CallVoid";
+  case NVPTXISD::CallVal:
+    return "NVPTXISD::CallVal";
+  case NVPTXISD::CallSymbol:
+    return "NVPTXISD::CallSymbol";
+  case NVPTXISD::Prototype:
+    return "NVPTXISD::Prototype";
+  case NVPTXISD::MoveParam:
+    return "NVPTXISD::MoveParam";
+  case NVPTXISD::MoveRetval:
+    return "NVPTXISD::MoveRetval";
+  case NVPTXISD::MoveToRetval:
+    return "NVPTXISD::MoveToRetval";
+  case NVPTXISD::StoreRetval:
+    return "NVPTXISD::StoreRetval";
+  case NVPTXISD::PseudoUseParam:
+    return "NVPTXISD::PseudoUseParam";
+  case NVPTXISD::RETURN:
+    return "NVPTXISD::RETURN";
+  case NVPTXISD::CallSeqBegin:
+    return "NVPTXISD::CallSeqBegin";
+  case NVPTXISD::CallSeqEnd:
+    return "NVPTXISD::CallSeqEnd";
+  case NVPTXISD::LoadV2:
+    return "NVPTXISD::LoadV2";
+  case NVPTXISD::LoadV4:
+    return "NVPTXISD::LoadV4";
+  case NVPTXISD::LDGV2:
+    return "NVPTXISD::LDGV2";
+  case NVPTXISD::LDGV4:
+    return "NVPTXISD::LDGV4";
+  case NVPTXISD::LDUV2:
+    return "NVPTXISD::LDUV2";
+  case NVPTXISD::LDUV4:
+    return "NVPTXISD::LDUV4";
+  case NVPTXISD::StoreV2:
+    return "NVPTXISD::StoreV2";
+  case NVPTXISD::StoreV4:
+    return "NVPTXISD::StoreV4";
   }
 }
 
@@ -248,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
 }
 
-std::string NVPTXTargetLowering::getPrototype(Type *retTy,
-                                              const ArgListTy &Args,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                              unsigned retAlignment) const {
+std::string NVPTXTargetLowering::getPrototype(
+    Type *retTy, const ArgListTy &Args,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
@@ -267,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
         unsigned size = 0;
         if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
           size = ITy->getBitWidth();
-          if (size < 32) size = 32;
-        }
-        else {
+          if (size < 32)
+            size = 32;
+        } else {
           assert(retTy->isFloatingPointTy() &&
                  "Floating point type expected here");
           size = retTy->getPrimitiveSizeInBits();
         }
 
         O << ".param .b" << size << " _";
-      }
-      else if (isa<PointerType>(retTy))
-        O << ".param .b" << getPointerTy().getSizeInBits()
-        << " _";
+      } else if (isa<PointerType>(retTy))
+        O << ".param .b" << getPointerTy().getSizeInBits() << " _";
       else {
         if ((retTy->getTypeID() == Type::StructTyID) ||
             isa<VectorType>(retTy)) {
           SmallVector<EVT, 16> vtparts;
           ComputeValueVTs(*this, retTy, vtparts);
           unsigned totalsz = 0;
-          for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+          for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
             unsigned elems = 1;
             EVT elemtype = vtparts[i];
             if (vtparts[i].isVector()) {
               elems = vtparts[i].getVectorNumElements();
               elemtype = vtparts[i].getVectorElementType();
             }
-            for (unsigned j=0, je=elems; j!=je; ++j) {
+            for (unsigned j = 0, je = elems; j != je; ++j) {
               unsigned sz = elemtype.getSizeInBits();
-              if (elemtype.isInteger() && (sz < 8)) sz = 8;
-              totalsz += sz/8;
+              if (elemtype.isInteger() && (sz < 8))
+                sz = 8;
+              totalsz += sz / 8;
             }
           }
-          O << ".param .align "
-              << retAlignment
-              << " .b8 _["
-              << totalsz << "]";
-        }
-        else {
-          assert(false &&
-                 "Unknown return type");
+          O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+        } else {
+          assert(false && "Unknown return type");
         }
       }
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*this, retTy, vtparts);
       unsigned idx = 0;
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -322,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0, je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << ".reg .b" << sz << " _";
-          if (j<je-1) O << ", ";
+          if (j < je - 1)
+            O << ", ";
           ++idx;
         }
-        if (i < e-1)
+        if (i < e - 1)
           O << ", ";
       }
     }
@@ -340,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
   bool first = true;
   MVT thePointerTy = getPointerTy();
 
-  for (unsigned i=0,e=Args.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
     const Type *Ty = Args[i].Ty;
     if (!first) {
       O << ", ";
@@ -351,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
       unsigned sz = 0;
       if (isa<IntegerType>(Ty)) {
         sz = cast<IntegerType>(Ty)->getBitWidth();
-        if (sz < 32) sz = 32;
-      }
-      else if (isa<PointerType>(Ty))
+        if (sz < 32)
+          sz = 32;
+      } else if (isa<PointerType>(Ty))
         sz = thePointerTy.getSizeInBits();
       else
         sz = Ty->getPrimitiveSizeInBits();
@@ -365,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
       continue;
     }
     const PointerType *PTy = dyn_cast<PointerType>(Ty);
-    assert(PTy &&
-           "Param with byval attribute should be a pointer type");
+    assert(PTy && "Param with byval attribute should be a pointer type");
     Type *ETy = PTy->getElementType();
 
     if (isABI) {
       unsigned align = Outs[i].Flags.getByValAlign();
       unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
-      O << ".param .align " << align
-          << " .b8 ";
+      O << ".param .align " << align << " .b8 ";
       O << "_";
       O << "[" << sz << "]";
       continue;
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*this, ETy, vtparts);
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -389,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0,je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << ".reg .b" << sz << " ";
           O << "_";
-          if (j<je-1) O << ", ";
+          if (j < je - 1)
+            O << ", ";
         }
-        if (i<e-1)
+        if (i < e - 1)
           O << ", ";
       }
       continue;
@@ -406,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
   return O.str();
 }
 
-
-SDValue
-NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                               SmallVectorImpl<SDValue> &InVals) const {
-  SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
+SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                       SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  DebugLoc &dl = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
-  SDValue Chain                         = CLI.Chain;
-  SDValue Callee                        = CLI.Callee;
-  bool &isTailCall                      = CLI.IsTailCall;
-  ArgListTy &Args                       = CLI.Args;
-  Type *retTy                           = CLI.RetTy;
-  ImmutableCallSite *CS                 = CLI.CS;
+  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &isTailCall = CLI.IsTailCall;
+  ArgListTy &Args = CLI.Args;
+  Type *retTy = CLI.RetTy;
+  ImmutableCallSite *CS = CLI.CS;
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
   SDValue tempChain = Chain;
-  Chain = DAG.getCALLSEQ_START(Chain,
-                               DAG.getIntPtrConstant(uniqueCallSite, true));
+  Chain =
+      DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true));
   SDValue InFlag = Chain.getValue(1);
 
   assert((Outs.size() == Args.size()) &&
@@ -434,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   unsigned paramCount = 0;
   // Declare the .params or .reg need to pass values
   // to the function
-  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
     EVT VT = Outs[i].VT;
 
     if (Outs[i].Flags.isByVal() == false) {
@@ -445,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       if (isABI)
         isReg = 0;
       unsigned sz = VT.getSizeInBits();
-      if (VT.isInteger() && (sz < 32)) sz = 32;
+      if (VT.isInteger() && (sz < 32))
+        sz = 32;
       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue DeclareParamOps[] = { Chain,
                                     DAG.getConstant(paramCount, MVT::i32),
                                     DAG.getConstant(sz, MVT::i32),
-                                    DAG.getConstant(isReg, MVT::i32),
-                                    InFlag };
+                                    DAG.getConstant(isReg, MVT::i32), InFlag };
       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                           DeclareParamOps, 5);
       InFlag = Chain.getValue(1);
       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
-                             DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
+                                 DAG.getConstant(0, MVT::i32), OutVals[i],
+                                 InFlag };
 
       unsigned opcode = NVPTXISD::StoreParam;
       if (isReg)
@@ -477,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // struct or vector
     SmallVector<EVT, 16> vtparts;
     const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
-    assert(PTy &&
-           "Type of a byval parameter should be pointer");
+    assert(PTy && "Type of a byval parameter should be pointer");
     ComputeValueVTs(*this, PTy->getElementType(), vtparts);
 
     if (isABI) {
@@ -488,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
       // don't need to
       // worry about natural alignment or not. See TargetLowering::LowerCallTo()
-      SDValue DeclareParamOps[] = { Chain,
-                       DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
-                                    DAG.getConstant(paramCount, MVT::i32),
-                                    DAG.getConstant(sz, MVT::i32),
-                                    InFlag };
+      SDValue DeclareParamOps[] = {
+        Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
+        DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
+        InFlag
+      };
       Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
                           DeclareParamOps, 5);
       InFlag = Chain.getValue(1);
       unsigned curOffset = 0;
-      for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
         unsigned elems = 1;
         EVT elemtype = vtparts[j];
         if (vtparts[j].isVector()) {
           elems = vtparts[j].getVectorNumElements();
           elemtype = vtparts[j].getVectorElementType();
         }
-        for (unsigned k=0,ke=elems; k!=ke; ++k) {
+        for (unsigned k = 0, ke = elems; k != ke; ++k) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 8)) sz = 8;
-          SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                        OutVals[i],
-                                        DAG.getConstant(curOffset,
-                                                        getPointerTy()));
-          SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
-                                MachinePointerInfo(), false, false, false, 0);
+          if (elemtype.isInteger() && (sz < 8))
+            sz = 8;
+          SDValue srcAddr =
+              DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+                          DAG.getConstant(curOffset, getPointerTy()));
+          SDValue theVal =
+              DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+                          MachinePointerInfo(), false, false, false, 0);
           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-          SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
-                                                            MVT::i32),
-                                           DAG.getConstant(curOffset, MVT::i32),
-                                                            theVal, InFlag };
+          SDValue CopyParamOps[] = { Chain,
+                                     DAG.getConstant(paramCount, MVT::i32),
+                                     DAG.getConstant(curOffset, MVT::i32),
+                                     theVal, InFlag };
           Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
                               CopyParamOps, 5);
           InFlag = Chain.getValue(1);
-          curOffset += sz/8;
+          curOffset += sz / 8;
         }
       }
       ++paramCount;
@@ -530,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // Non-abi, struct or vector
     // Declare a bunch or .reg .b<size> .param<n>
     unsigned curOffset = 0;
-    for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+    for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
       unsigned elems = 1;
       EVT elemtype = vtparts[j];
       if (vtparts[j].isVector()) {
         elems = vtparts[j].getVectorNumElements();
         elemtype = vtparts[j].getVectorElementType();
       }
-      for (unsigned k=0,ke=elems; k!=ke; ++k) {
+      for (unsigned k = 0, ke = elems; k != ke; ++k) {
         unsigned sz = elemtype.getSizeInBits();
-        if (elemtype.isInteger() && (sz < 32)) sz = 32;
+        if (elemtype.isInteger() && (sz < 32))
+          sz = 32;
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
-                                                             MVT::i32),
-                                                  DAG.getConstant(sz, MVT::i32),
-                                                   DAG.getConstant(1, MVT::i32),
-                                                             InFlag };
+        SDValue DeclareParamOps[] = { Chain,
+                                      DAG.getConstant(paramCount, MVT::i32),
+                                      DAG.getConstant(sz, MVT::i32),
+                                      DAG.getConstant(1, MVT::i32), InFlag };
         Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                             DeclareParamOps, 5);
         InFlag = Chain.getValue(1);
-        SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
-                                      DAG.getConstant(curOffset,
-                                                      getPointerTy()));
-        SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
-                                  MachinePointerInfo(), false, false, false, 0);
+        SDValue srcAddr =
+            DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+                        DAG.getConstant(curOffset, getPointerTy()));
+        SDValue theVal =
+            DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
+                        false, false, false, 0);
         SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
         SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
                                    DAG.getConstant(0, MVT::i32), theVal,
@@ -578,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
     // individual .reg .b<size> func_retval<0..> for non ABI
     unsigned resultsz = 0;
-    for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
+    for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
       unsigned elems = 1;
       EVT elemtype = resvtparts[i];
       if (resvtparts[i].isVector()) {
         elems = resvtparts[i].getVectorNumElements();
         elemtype = resvtparts[i].getVectorElementType();
       }
-      for (unsigned j=0,je=elems; j!=je; ++j) {
+      for (unsigned j = 0, je = elems; j != je; ++j) {
         unsigned sz = elemtype.getSizeInBits();
         if (isABI == false) {
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
-        }
-        else {
-          if (elemtype.isInteger() && (sz < 8)) sz = 8;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
+        } else {
+          if (elemtype.isInteger() && (sz < 8))
+            sz = 8;
         }
         if (isABI == false) {
           SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -609,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     }
     if (isABI) {
       if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
-          retTy->isPointerTy() ) {
+          retTy->isPointerTy()) {
         // Scalar needs to be at least 32bit wide
         if (resultsz < 32)
           resultsz = 32;
@@ -620,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
                             DeclareRetOps, 5);
         InFlag = Chain.getValue(1);
-      }
-      else {
+      } else {
         if (Func) { // direct call
           if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
@@ -631,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
         }
         SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
-                                                           MVT::i32),
-                                          DAG.getConstant(resultsz/8, MVT::i32),
-                                         DAG.getConstant(0, MVT::i32), InFlag };
+        SDValue DeclareRetOps[] = { Chain,
+                                    DAG.getConstant(retAlignment, MVT::i32),
+                                    DAG.getConstant(resultsz / 8, MVT::i32),
+                                    DAG.getConstant(0, MVT::i32), InFlag };
         Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
                             DeclareRetOps, 5);
         InFlag = Chain.getValue(1);
@@ -652,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // INLINEASM SDNode.
     SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
     std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
-    const char *asmstr = nvTM->getManagedStrPool()->
-        getManagedString(proto_string.c_str())->c_str();
-    SDValue InlineAsmOps[] = { Chain,
-                               DAG.getTargetExternalSymbol(asmstr,
-                                                           getPointerTy()),
-                                                           DAG.getMDNode(0),
-                                   DAG.getTargetConstant(0, MVT::i32), InFlag };
+    const char *asmstr = nvTM->getManagedStrPool()
+        ->getManagedString(proto_string.c_str())->c_str();
+    SDValue InlineAsmOps[] = {
+      Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
+      DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
+    };
     Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
     InFlag = Chain.getValue(1);
   }
   // Op to just print "call"
   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue PrintCallOps[] = { Chain,
-                             DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
-                                 : retCount, MVT::i32),
-                                   InFlag };
-  Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
-      PrintCallVTs, PrintCallOps, 3);
+  SDValue PrintCallOps[] = {
+    Chain,
+    DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
+    InFlag
+  };
+  Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
+                      dl, PrintCallVTs, PrintCallOps, 3);
   InFlag = Chain.getValue(1);
 
   // Ops to print out the function name
@@ -685,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                       CallArgBeginOps, 2);
   InFlag = Chain.getValue(1);
 
-  for (unsigned i=0, e=paramCount; i!=e; ++i) {
+  for (unsigned i = 0, e = paramCount; i != e; ++i) {
     unsigned opcode;
-    if (i==(e-1))
+    if (i == (e - 1))
       opcode = NVPTXISD::LastCallArg;
     else
       opcode = NVPTXISD::CallArg;
     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
-                             DAG.getConstant(i, MVT::i32),
-                             InFlag };
+                             DAG.getConstant(i, MVT::i32), InFlag };
     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
     InFlag = Chain.getValue(1);
   }
   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue CallArgEndOps[] = { Chain,
-                              DAG.getConstant(Func ? 1 : 0, MVT::i32),
+  SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
                               InFlag };
-  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
-                      3);
+  Chain =
+      DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
   InFlag = Chain.getValue(1);
 
   if (!Func) {
     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-    SDValue PrototypeOps[] = { Chain,
-                               DAG.getConstant(uniqueCallSite, MVT::i32),
+    SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
                                InFlag };
     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
     InFlag = Chain.getValue(1);
@@ -719,32 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (Ins.size() > 0) {
     if (isABI) {
       unsigned resoffset = 0;
-      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         unsigned sz = Ins[i].VT.getSizeInBits();
-        if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
+        if (Ins[i].VT.isInteger() && (sz < 8))
+          sz = 8;
         EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
-        SDValue LoadRetOps[] = {
-          Chain,
-          DAG.getConstant(1, MVT::i32),
-          DAG.getConstant(resoffset, MVT::i32),
-          InFlag
-        };
+        SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+                                 DAG.getConstant(resoffset, MVT::i32), InFlag };
         SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
                                      LoadRetOps, array_lengthof(LoadRetOps));
         Chain = retval.getValue(1);
         InFlag = retval.getValue(2);
         InVals.push_back(retval);
-        resoffset += sz/8;
+        resoffset += sz / 8;
       }
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> resvtparts;
       ComputeValueVTs(*this, retTy, resvtparts);
 
       assert(Ins.size() == resvtparts.size() &&
              "Unexpected number of return values in non-ABI case");
       unsigned paramNum = 0;
-      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         assert(EVT(Ins[i].VT) == resvtparts[i] &&
                "Unexpected EVT type in non-ABI case");
         unsigned numelems = 1;
@@ -754,14 +773,11 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           elemtype = Ins[i].VT.getVectorElementType();
         }
         std::vector<SDValue> tempRetVals;
-        for (unsigned j=0; j<numelems; ++j) {
+        for (unsigned j = 0; j < numelems; ++j) {
           EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
-          SDValue MoveRetOps[] = {
-            Chain,
-            DAG.getConstant(0, MVT::i32),
-            DAG.getConstant(paramNum, MVT::i32),
-            InFlag
-          };
+          SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
+                                   DAG.getConstant(paramNum, MVT::i32),
+                                   InFlag };
           SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
                                        MoveRetOps, array_lengthof(MoveRetOps));
           Chain = retval.getValue(1);
@@ -777,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       }
     }
   }
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getIntPtrConstant(uniqueCallSite, true),
-                             DAG.getIntPtrConstant(uniqueCallSite+1, true),
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
+                             DAG.getIntPtrConstant(uniqueCallSite + 1, true),
                              InFlag);
   uniqueCallSite++;
 
@@ -792,45 +807,51 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
 // (see LegalizeDAG.cpp). This is slow and uses local memory.
 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
-SDValue NVPTXTargetLowering::
-LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   SmallVector<SDValue, 8> Ops;
   unsigned NumOperands = Node->getNumOperands();
-  for (unsigned i=0; i < NumOperands; ++i) {
+  for (unsigned i = 0; i < NumOperands; ++i) {
     SDValue SubOp = Node->getOperand(i);
     EVT VVT = SubOp.getNode()->getValueType(0);
     EVT EltVT = VVT.getVectorElementType();
     unsigned NumSubElem = VVT.getVectorNumElements();
-    for (unsigned j=0; j < NumSubElem; ++j) {
+    for (unsigned j = 0; j < NumSubElem; ++j) {
       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
                                 DAG.getIntPtrConstant(j)));
     }
   }
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
-                     &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
+                     Ops.size());
 }
 
-SDValue NVPTXTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
-  case ISD::RETURNADDR: return SDValue();
-  case ISD::FRAMEADDR:  return SDValue();
-  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::INTRINSIC_W_CHAIN: return Op;
+  case ISD::RETURNADDR:
+    return SDValue();
+  case ISD::FRAMEADDR:
+    return SDValue();
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return Op;
   case ISD::BUILD_VECTOR:
   case ISD::EXTRACT_SUBVECTOR:
     return Op;
-  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
-  case ISD::STORE: return LowerSTORE(Op, DAG);
-  case ISD::LOAD: return LowerLOAD(Op, DAG);
+  case ISD::CONCAT_VECTORS:
+    return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::STORE:
+    return LowerSTORE(Op, DAG);
+  case ISD::LOAD:
+    return LowerLOAD(Op, DAG);
   default:
     llvm_unreachable("Custom lowering not defined for operation");
   }
 }
 
-
 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   if (Op.getValueType() == MVT::i1)
     return LowerLOADi1(Op, DAG);
@@ -842,24 +863,22 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 //   =>
 // v1 = ld i8* addr
 // v = trunc v1 to i1
-SDValue NVPTXTargetLowering::
-LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   LoadSDNode *LD = cast<LoadSDNode>(Node);
   DebugLoc dl = Node->getDebugLoc();
-  assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
   assert(Node->getValueType(0) == MVT::i1 &&
          "Custom lowering for i1 load only");
-  SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
-                              LD->getPointerInfo(),
-                              LD->isVolatile(), LD->isNonTemporal(),
-                              LD->isInvariant(),
-                              LD->getAlignment());
+  SDValue newLD =
+      DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+                  LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
+                  LD->isInvariant(), LD->getAlignment());
   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
   // The legalizer (the caller) is expecting two values from the legalized
   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
   // in LegalizeDAG.cpp which also uses MergeValues.
-  SDValue Ops[] = {result, LD->getChain()};
+  SDValue Ops[] = { result, LD->getChain() };
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
@@ -887,7 +906,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
     if (!ValVT.isSimple())
       return SDValue();
     switch (ValVT.getSimpleVT().SimpleTy) {
-    default: return SDValue();
+    default:
+      return SDValue();
     case MVT::v2i8:
     case MVT::v2i16:
     case MVT::v2i32:
@@ -914,7 +934,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
       NeedExt = true;
 
     switch (NumElts) {
-    default:  return SDValue();
+    default:
+      return SDValue();
     case 2:
       Opcode = NVPTXISD::StoreV2;
       break;
@@ -947,11 +968,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
 
     MemSDNode *MemSD = cast<MemSDNode>(N);
 
-    SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
-                                            DAG.getVTList(MVT::Other), &Ops[0],
-                                            Ops.size(), MemSD->getMemoryVT(),
-                                            MemSD->getMemOperand());
-
+    SDValue NewSt = DAG.getMemIntrinsicNode(
+        Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
+        MemSD->getMemoryVT(), MemSD->getMemOperand());
 
     //return DCI.CombineTo(N, NewSt, true);
     return NewSt;
@@ -964,8 +983,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
 //    =>
 // v1 = zxt v to i8
 // st i8, addr
-SDValue NVPTXTargetLowering::
-LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   StoreSDNode *ST = cast<StoreSDNode>(Node);
@@ -976,18 +994,14 @@ LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
   unsigned Alignment = ST->getAlignment();
   bool isVolatile = ST->isVolatile();
   bool isNonTemporal = ST->isNonTemporal();
-  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
-                     MVT::i8, Tmp3);
-  SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
-                                ST->getPointerInfo(), isVolatile,
-                                isNonTemporal, Alignment);
+  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
+  SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                isVolatile, isNonTemporal, Alignment);
   return Result;
 }
 
-
-SDValue
-NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
-                                EVT v) const {
+SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
+                                        int idx, EVT v) const {
   std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
   std::stringstream suffix;
   suffix << idx;
@@ -1000,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
   return getExtSymb(DAG, ".PARAM", idx, v);
 }
 
-SDValue
-NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
+SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
   return getExtSymb(DAG, ".HLPPARAM", idx);
 }
 
 // Check to see if the kernel argument is image*_t or sampler_t
 
 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
-  static const char *const specialTypes[] = {
-                                             "struct._image2d_t",
-                                             "struct._image3d_t",
-                                             "struct._sampler_t"
-  };
+  static const char *const specialTypes[] = { "struct._image2d_t",
+                                              "struct._image3d_t",
+                                              "struct._sampler_t" };
 
   const Type *Ty = arg->getType();
   const PointerType *PTy = dyn_cast<PointerType>(Ty);
@@ -1033,12 +1044,10 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
   return false;
 }
 
-SDValue
-NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
-                                        CallingConv::ID CallConv, bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                          DebugLoc dl, SelectionDAG &DAG,
-                                       SmallVectorImpl<SDValue> &InVals) const {
+SDValue NVPTXTargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const DataLayout *TD = getDataLayout();
 
@@ -1054,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
   std::vector<Type *> argTypes;
   std::vector<const Argument *> theArgs;
   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-      I != E; ++I) {
+       I != E; ++I) {
     theArgs.push_back(I);
     argTypes.push_back(I->getType());
   }
-  assert(argTypes.size() == Ins.size() &&
-         "Ins types and function types did not match");
+  //assert(argTypes.size() == Ins.size() &&
+  //       "Ins types and function types did not match");
 
   int idx = 0;
-  for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
+  for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) {
     Type *Ty = argTypes[i];
     EVT ObjectVT = getValueType(Ty);
-    assert(ObjectVT == Ins[i].VT &&
-           "Ins type did not match function type");
+    //assert(ObjectVT == Ins[i].VT &&
+    //       "Ins type did not match function type");
 
     // If the kernel argument is image*_t or sampler_t, convert it to
     // a i32 constant holding the parameter position. This can later
     // matched in the AsmPrinter to output the correct mangled name.
-    if (isImageOrSamplerVal(theArgs[i],
-                           (theArgs[i]->getParent() ?
-                               theArgs[i]->getParent()->getParent() : 0))) {
+    if (isImageOrSamplerVal(
+            theArgs[i],
+            (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
+                                     : 0))) {
       assert(isKernel && "Only kernels can have image/sampler params");
-      InVals.push_back(DAG.getConstant(i+1, MVT::i32));
+      InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
       continue;
     }
 
     if (theArgs[i]->use_empty()) {
       // argument is dead
-      InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+      if (ObjectVT.isVector()) {
+        EVT EltVT = ObjectVT.getVectorElementType();
+        unsigned NumElts = ObjectVT.getVectorNumElements();
+        for (unsigned vi = 0; vi < NumElts; ++vi) {
+          InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT));
+        }
+      } else {
+        InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+      }
       continue;
     }
 
@@ -1089,31 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
     // to newly created nodes. The SDNOdes for params have to
     // appear in the same order as their order of appearance
     // in the original function. "idx+1" holds that order.
-    if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) {
+    if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+      if (ObjectVT.isVector()) {
+        unsigned NumElts = ObjectVT.getVectorNumElements();
+        EVT EltVT = ObjectVT.getVectorElementType();
+        unsigned Offset = 0;
+        for (unsigned vi = 0; vi < NumElts; ++vi) {
+          SDValue A = getParamSymbol(DAG, idx, getPointerTy());
+          SDValue B = DAG.getIntPtrConstant(Offset);
+          SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                     //getParamSymbol(DAG, idx, EltVT),
+                                     //DAG.getConstant(Offset, getPointerTy()));
+                                     A, B);
+          Value *SrcValue = Constant::getNullValue(PointerType::get(
+              EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
+          SDValue Ld = DAG.getLoad(
+              EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false,
+              false,
+              TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+          Offset += EltVT.getStoreSizeInBits() / 8;
+          InVals.push_back(Ld);
+        }
+        continue;
+      }
+
       // A plain scalar.
       if (isABI || isKernel) {
         // If ABI, load from the param symbol
         SDValue Arg = getParamSymbol(DAG, idx);
         // Conjure up a value that we can get the address space from.
         // FIXME: Using a constant here is a hack.
-        Value *srcValue = Constant::getNullValue(PointerType::get(
-                              ObjectVT.getTypeForEVT(F->getContext()),
-                              llvm::ADDRESS_SPACE_PARAM));
-        SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
-                                MachinePointerInfo(srcValue), false, false,
-                                false,
-                                TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
-                                  F->getContext())));
+        Value *srcValue = Constant::getNullValue(
+            PointerType::get(ObjectVT.getTypeForEVT(F->getContext()),
+                             llvm::ADDRESS_SPACE_PARAM));
+        SDValue p = DAG.getLoad(
+            ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
+            false,
+            TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
         if (p.getNode())
-          DAG.AssignOrdering(p.getNode(), idx+1);
+          DAG.AssignOrdering(p.getNode(), idx + 1);
         InVals.push_back(p);
-      }
-      else {
+      } else {
         // If no ABI, just move the param symbol
         SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
         SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
         if (p.getNode())
-          DAG.AssignOrdering(p.getNode(), idx+1);
+          DAG.AssignOrdering(p.getNode(), idx + 1);
         InVals.push_back(p);
       }
       continue;
@@ -1130,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
       SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
       SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
       if (p.getNode())
-        DAG.AssignOrdering(p.getNode(), idx+1);
+        DAG.AssignOrdering(p.getNode(), idx + 1);
       if (isKernel)
         InVals.push_back(p);
       else {
-        SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
-                    DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
-                                 p);
+        SDValue p2 = DAG.getNode(
+            ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+            DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
         InVals.push_back(p2);
       }
     } else {
       // Have to move a set of param symbols to registers and
       // store them locally and return the local pointer in InVals
       const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
-      assert(elemPtrType &&
-             "Byval parameter should be a pointer type");
+      assert(elemPtrType && "Byval parameter should be a pointer type");
       Type *elemType = elemPtrType->getElementType();
       // Compute the constituent parts
       SmallVector<EVT, 16> vtparts;
       SmallVector<uint64_t, 16> offsets;
       ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
       unsigned totalsize = 0;
-      for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j)
         totalsize += vtparts[j].getStoreSizeInBits();
-      SDValue localcopy =  DAG.getFrameIndex(MF.getFrameInfo()->
-                                      CreateStackObject(totalsize/8, 16, false),
-                                             getPointerTy());
+      SDValue localcopy = DAG.getFrameIndex(
+          MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false),
+          getPointerTy());
       unsigned sizesofar = 0;
       std::vector<SDValue> theChains;
-      for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
         unsigned numElems = 1;
-        if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
-        for (unsigned k=0, ke=numElems; k!=ke; ++k) {
+        if (vtparts[j].isVector())
+          numElems = vtparts[j].getVectorNumElements();
+        for (unsigned k = 0, ke = numElems; k != ke; ++k) {
           EVT tmpvt = vtparts[j];
-          if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
+          if (tmpvt.isVector())
+            tmpvt = tmpvt.getVectorElementType();
           SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
                                     getParamSymbol(DAG, idx, tmpvt));
-          SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
-                                    DAG.getConstant(sizesofar, getPointerTy()));
-          theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
-                                        MachinePointerInfo(), false, false, 0));
-          sizesofar += tmpvt.getStoreSizeInBits()/8;
+          SDValue addr =
+              DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
+                          DAG.getConstant(sizesofar, getPointerTy()));
+          theChains.push_back(DAG.getStore(
+              Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0));
+          sizesofar += tmpvt.getStoreSizeInBits() / 8;
           ++idx;
         }
       }
@@ -1190,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
   //}
 
   if (!OutChains.empty())
-    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                            &OutChains[0], OutChains.size()));
+    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
+                            OutChains.size()));
 
   return Chain;
 }
 
-SDValue
-NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-                                 bool isVarArg,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerReturn(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+    SelectionDAG &DAG) const {
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
   unsigned sizesofar = 0;
   unsigned idx = 0;
-  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
     SDValue theVal = OutVals[i];
     EVT theValType = theVal.getValueType();
     unsigned numElems = 1;
-    if (theValType.isVector()) numElems = theValType.getVectorNumElements();
-    for (unsigned j=0,je=numElems; j!=je; ++j) {
+    if (theValType.isVector())
+      numElems = theValType.getVectorNumElements();
+    for (unsigned j = 0, je = numElems; j != je; ++j) {
       SDValue tmpval = theVal;
       if (theValType.isVector())
         tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                             theValType.getVectorElementType(),
-                             tmpval, DAG.getIntPtrConstant(j));
-      Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
-          dl, MVT::Other,
-          Chain,
-          DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
+                             theValType.getVectorElementType(), tmpval,
+                             DAG.getIntPtrConstant(j));
+      Chain = DAG.getNode(
+          isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl,
+          MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
           tmpval);
       if (theValType.isVector())
-        sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
+        sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8;
       else
-        sizesofar += theValType.getStoreSizeInBits()/8;
+        sizesofar += theValType.getStoreSizeInBits() / 8;
       ++idx;
     }
   }
@@ -1234,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
 }
 
-void
-NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
-                                                  std::string &Constraint,
-                                                  std::vector<SDValue> &Ops,
-                                                  SelectionDAG &DAG) const
-{
+void NVPTXTargetLowering::LowerAsmOperandForConstraint(
+    SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+    SelectionDAG &DAG) const {
   if (Constraint.length() > 1)
     return;
   else
@@ -1249,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 // NVPTX suuport vector of legal types of any length in Intrinsics because the
 // NVPTX specific type legalizer
 // will legalize them to the PTX supported length.
-bool
-NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
+bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
   if (isTypeLegal(VT))
     return true;
   if (VT.isVector()) {
@@ -1261,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
   return false;
 }
 
-
 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
 // TgtMemIntrinsic
 // because we need the information that is only available in the "Value" type
 // of destination
 // pointer. In particular, the address space information.
-bool
-NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
-                                        unsigned Intrinsic) const {
+bool NVPTXTargetLowering::getTgtMemIntrinsic(
+    IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
   switch (Intrinsic) {
   default:
     return false;
@@ -1325,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
 /// Used to guide target specific optimizations, like loop strength reduction
 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
 /// (CodeGenPrepare.cpp)
-bool
-NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                           Type *Ty) const {
+bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                                Type *Ty) const {
 
   // AddrMode - This represents an addressing mode of:
   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
@@ -1345,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   }
 
   switch (AM.Scale) {
-  case 0:  // "r", "r+i" or "i" is allowed
+  case 0: // "r", "r+i" or "i" is allowed
     break;
   case 1:
-    if (AM.HasBaseReg)  // "r+r+i" or "r+r" is not allowed.
+    if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
       return false;
     // Otherwise we have r+i.
     break;
@@ -1385,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
-
-std::pair<unsigned, const TargetRegisterClass*>
+std::pair<unsigned, const TargetRegisterClass *>
 NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                   EVT VT) const {
   if (Constraint.size() == 1) {
@@ -1409,8 +1441,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-
-
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
   return 4;
@@ -1418,7 +1448,7 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
 
 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue>& Results) {
+                              SmallVectorImpl<SDValue> &Results) {
   EVT ResVT = N->getValueType(0);
   DebugLoc DL = N->getDebugLoc();
 
@@ -1429,7 +1459,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   // but I'm leaving that as a TODO for now.
   assert(ResVT.isSimple() && "Can only handle simple types");
   switch (ResVT.getSimpleVT().SimpleTy) {
-  default: return;
+  default:
+    return;
   case MVT::v2i8:
   case MVT::v2i16:
   case MVT::v2i32:
@@ -1460,7 +1491,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   SDVTList LdResVTs;
 
   switch (NumElts) {
-  default:  return;
+  default:
+    return;
   case 2:
     Opcode = NVPTXISD::LoadV2;
     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
@@ -1500,14 +1532,14 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
 
   SDValue LoadChain = NewLD.getValue(NumElts);
 
-  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+  SDValue BuildVec =
+      DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
 
   Results.push_back(BuildVec);
   Results.push_back(LoadChain);
 }
 
-static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
-                                     SelectionDAG &DAG,
+static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &Results) {
   SDValue Chain = N->getOperand(0);
   SDValue Intrin = N->getOperand(1);
@@ -1515,8 +1547,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
 
   // Get the intrinsic ID
   unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
-  switch(IntrinNo) {
-  default: return;
+  switch (IntrinNo) {
+  default:
+    return;
   case Intrinsic::nvvm_ldg_global_i:
   case Intrinsic::nvvm_ldg_global_f:
   case Intrinsic::nvvm_ldg_global_p:
@@ -1544,10 +1577,12 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
       SDVTList LdResVTs;
 
       switch (NumElts) {
-      default:  return;
+      default:
+        return;
       case 2:
-        switch(IntrinNo) {
-        default: return;
+        switch (IntrinNo) {
+        default:
+          return;
         case Intrinsic::nvvm_ldg_global_i:
         case Intrinsic::nvvm_ldg_global_f:
         case Intrinsic::nvvm_ldg_global_p:
@@ -1562,8 +1597,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
         break;
       case 4: {
-        switch(IntrinNo) {
-        default: return;
+        switch (IntrinNo) {
+        default:
+          return;
         case Intrinsic::nvvm_ldg_global_i:
         case Intrinsic::nvvm_ldg_global_f:
         case Intrinsic::nvvm_ldg_global_p:
@@ -1586,29 +1622,31 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
       // Copy regular operands
 
       OtherOps.push_back(Chain); // Chain
-      // Skip operand 1 (intrinsic ID)
-      // Others
+                                 // Skip operand 1 (intrinsic ID)
+                                 // Others
       for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
         OtherOps.push_back(N->getOperand(i));
 
       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
 
-      SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
-                                              OtherOps.size(), MemSD->getMemoryVT(),
-                                              MemSD->getMemOperand());
+      SDValue NewLD = DAG.getMemIntrinsicNode(
+          Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
+          MemSD->getMemoryVT(), MemSD->getMemOperand());
 
       SmallVector<SDValue, 4> ScalarRes;
 
       for (unsigned i = 0; i < NumElts; ++i) {
         SDValue Res = NewLD.getValue(i);
         if (NeedTrunc)
-          Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+          Res =
+              DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
         ScalarRes.push_back(Res);
       }
 
       SDValue LoadChain = NewLD.getValue(NumElts);
 
-      SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+      SDValue BuildVec =
+          DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
 
       Results.push_back(BuildVec);
       Results.push_back(LoadChain);
@@ -1629,10 +1667,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
 
       // We make sure the memory type is i8, which will be used during isel
       // to select the proper instruction.
-      SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
-                                              LdResVTs, &Ops[0],
-                                              Ops.size(), MVT::i8,
-                                              MemSD->getMemOperand());
+      SDValue NewLD =
+          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
+                                  Ops.size(), MVT::i8, MemSD->getMemOperand());
 
       Results.push_back(NewLD.getValue(0));
       Results.push_back(NewLD.getValue(1));
@@ -1641,11 +1678,11 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
   }
 }
 
-void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
-                                             SmallVectorImpl<SDValue> &Results,
-                                             SelectionDAG &DAG) const {
+void NVPTXTargetLowering::ReplaceNodeResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
   switch (N->getOpcode()) {
-  default: report_fatal_error("Unhandled custom legalization");
+  default:
+    report_fatal_error("Unhandled custom legalization");
   case ISD::LOAD:
     ReplaceLoadVector(N, DAG, Results);
     return;
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 14afc148cbbd..3cd49d38af76 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -87,7 +87,7 @@ class NVPTXTargetLowering : public TargetLowering {
 
   bool isTypeSupportedInIntrinsic(MVT VT) const;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           unsigned Intrinsic) const;
 
   /// isLegalAddressingMode - Return true if the addressing mode represented
@@ -107,14 +107,13 @@ class NVPTXTargetLowering : public TargetLowering {
   }
 
   ConstraintType getConstraintType(const std::string &Constraint) const;
-  std::pair<unsigned, const TargetRegisterClass*>
+  std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
 
-  virtual SDValue
-  LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-                       const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
-                       SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals) const;
+  virtual SDValue LowerFormalArguments(
+      SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+      const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+      SmallVectorImpl<SDValue> &InVals) const;
 
   virtual SDValue
   LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
@@ -136,17 +135,15 @@ class NVPTXTargetLowering : public TargetLowering {
   NVPTXTargetMachine *nvTM;
 
   // PTX always uses 32-bit shift amounts
-  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const {
-    return MVT::i32;
-  }
+  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
   virtual bool shouldSplitVectorElementType(EVT VT) const;
 
 private:
-  const NVPTXSubtarget &nvptxSubtarget;  // cache the subtarget here
+  const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
 
-  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
-                         MVT::i32) const;
+  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
+                     EVT = MVT::i32) const;
   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
   SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
 
@@ -159,8 +156,7 @@ class NVPTXTargetLowering : public TargetLowering {
   SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
 
-  virtual void ReplaceNodeResults(SDNode *N,
-                                  SmallVectorImpl<SDValue> &Results,
+  virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                   SelectionDAG &DAG) const;
 };
 } // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 9e73d80c2851..33a63c26f4e2 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -23,61 +23,55 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include <cstdio>
 
-
 using namespace llvm;
 
 // FIXME: Add the subtarget support on this constructor.
 NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
-: NVPTXGenInstrInfo(),
-  TM(tm),
-  RegInfo(*this, *TM.getSubtargetImpl()) {}
-
+    : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {}
 
-void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I, DebugLoc DL,
-                                  unsigned DestReg, unsigned SrcReg,
-                                  bool KillSrc) const {
+void NVPTXInstrInfo::copyPhysReg(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+    unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
   if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
       NVPTX::Int32RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
-      NVPTX::Int8RegsRegClass.contains(SrcReg))
+           NVPTX::Int8RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
-      NVPTX::Int1RegsRegClass.contains(SrcReg))
+           NVPTX::Int1RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
-      NVPTX::Float32RegsRegClass.contains(SrcReg))
+           NVPTX::Float32RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
-      NVPTX::Int16RegsRegClass.contains(SrcReg))
+           NVPTX::Int16RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
-      NVPTX::Int64RegsRegClass.contains(SrcReg))
+           NVPTX::Int64RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
-      NVPTX::Float64RegsRegClass.contains(SrcReg))
+           NVPTX::Float64RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else {
     llvm_unreachable("Don't know how to copy a register");
   }
 }
 
-bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
-                                 unsigned &SrcReg,
+bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
                                  unsigned &DestReg) const {
   // Look for the appropriate part of TSFlags
   bool isMove = false;
 
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
-      NVPTX::SimpleMoveShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift;
   isMove = (TSFlags == 1);
 
   if (isMove) {
@@ -94,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
   return false;
 }
 
-bool  NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
-{
+bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
   switch (MI.getOpcode()) {
-  default: return false;
+  default:
+    return false;
   case NVPTX::INT_PTX_SREG_NTID_X:
   case NVPTX::INT_PTX_SREG_NTID_Y:
   case NVPTX::INT_PTX_SREG_NTID_Z:
@@ -115,12 +109,11 @@ bool  NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
   }
 }
 
-
 bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
                                  unsigned &AddrSpace) const {
   bool isLoad = false;
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
-      NVPTX::isLoadShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift;
   isLoad = (TSFlags == 1);
   if (isLoad)
     AddrSpace = getLdStCodeAddrSpace(MI);
@@ -130,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
 bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
                                   unsigned &AddrSpace) const {
   bool isStore = false;
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
-      NVPTX::isStoreShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift;
   isStore = (TSFlags == 1);
   if (isStore)
     AddrSpace = getLdStCodeAddrSpace(MI);
   return isStore;
 }
 
-
 bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
   unsigned addrspace = 0;
   if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
@@ -152,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
   return true;
 }
 
-
 /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
 /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
 /// implemented for a target).  Upon success, this returns false and returns
@@ -176,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
 /// Note that RemoveBranch and InsertBranch must be implemented to support
 /// cases where this method returns success.
 ///
-bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                   MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const {
+bool NVPTXInstrInfo::AnalyzeBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+    SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
@@ -208,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   MachineInstr *SecondLastInst = I;
 
   // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() &&
-      isUnpredicatedTerminator(--I))
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
     return true;
 
   // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
   if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
       LastInst->getOpcode() == NVPTX::GOTO) {
-    TBB =  SecondLastInst->getOperand(1).getMBB();
+    TBB = SecondLastInst->getOperand(1).getMBB();
     Cond.push_back(SecondLastInst->getOperand(0));
     FBB = LastInst->getOperand(0).getMBB();
     return false;
@@ -238,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
 unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
+  if (I == MBB.begin())
+    return 0;
   --I;
   if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
     return 0;
@@ -248,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
   I = MBB.end();
 
-  if (I == MBB.begin()) return 1;
+  if (I == MBB.begin())
+    return 1;
   --I;
   if (I->getOpcode() != NVPTX::CBranch)
     return 1;
@@ -258,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return 2;
 }
 
-unsigned
-NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                             MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond,
-                             DebugLoc DL) const {
+unsigned NVPTXInstrInfo::InsertBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -270,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 
   // One-way branch.
   if (FBB == 0) {
-    if (Cond.empty())   // Unconditional branch
+    if (Cond.empty()) // Unconditional branch
       BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
-    else                // Conditional branch
-      BuildMI(&MBB, DL, get(NVPTX::CBranch))
-      .addReg(Cond[0].getReg()).addMBB(TBB);
+    else // Conditional branch
+      BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
+          .addMBB(TBB);
     return 1;
   }
 
   // Two-way Conditional Branch.
-  BuildMI(&MBB, DL, get(NVPTX::CBranch))
-  .addReg(Cond[0].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
   BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
   return 2;
 }
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 7b8e218b05b6..b1972e9b7254 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -23,8 +23,7 @@
 
 namespace llvm {
 
-class NVPTXInstrInfo : public NVPTXGenInstrInfo
-{
+class NVPTXInstrInfo : public NVPTXGenInstrInfo {
   NVPTXTargetMachine &TM;
   const NVPTXRegisterInfo RegInfo;
 public:
@@ -50,30 +49,26 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo
    *                               const TargetRegisterClass *RC) const;
    */
 
-  virtual void copyPhysReg(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator I, DebugLoc DL,
-                           unsigned DestReg, unsigned SrcReg,
-                           bool KillSrc) const ;
-  virtual bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg,
+  virtual void copyPhysReg(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+      unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
+  virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
                            unsigned &DestReg) const;
   bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
   bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
   bool isReadSpecialReg(MachineInstr &MI) const;
 
-  virtual bool CanTailMerge(const MachineInstr *MI) const ;
+  virtual bool CanTailMerge(const MachineInstr *MI) const;
   // Branch analysis.
-  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                             MachineBasicBlock *&FBB,
-                             SmallVectorImpl<MachineOperand> &Cond,
-                             bool AllowModify) const;
+  virtual bool AnalyzeBranch(
+      MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+      SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
   virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-  virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
-                                MachineBasicBlock *FBB,
-                                const SmallVectorImpl<MachineOperand> &Cond,
-                                DebugLoc DL) const;
+  virtual unsigned InsertBranch(
+      MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+      const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
   unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
-    return  MI.getOperand(2).getImm();
+    return MI.getOperand(2).getImm();
   }
 
 };
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f7fa7aa61df5..7c257b4c6a89 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -25,18 +25,15 @@
 
 using namespace llvm;
 
-namespace llvm {
-FunctionPass *createLowerAggrCopies();
-}
+namespace llvm { FunctionPass *createLowerAggrCopies(); }
 
 char NVPTXLowerAggrCopies::ID = 0;
 
 // Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
-                                  Value *dstAddr, Value *len,
-                                  //unsigned numLoads,
-                                  bool srcVolatile, bool dstVolatile,
-                                  LLVMContext &Context, Function &F) {
+static void convertTransferToLoop(
+    Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
+    //unsigned numLoads,
+    bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
   Type *indType = len->getType();
 
   BasicBlock *origBB = splitAt->getParent();
@@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
 
   // srcAddr and dstAddr are expected to be pointer types,
   // so no check is made here.
-  unsigned srcAS =
-      dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
-  unsigned dstAS =
-      dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
+  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointers to (char *)
   srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   origBB->getTerminator()->setSuccessor(0, loopBB);
   IRBuilder<> builder(origBB, origBB->getTerminator());
 
-  unsigned dstAS =
-      dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointer to the type of value getting stored
-  dstAddr = builder.CreateBitCast(dstAddr,
-                                  PointerType::get(val->getType(), dstAS));
+  dstAddr =
+      builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
 
   IRBuilder<> loop(loopBB);
   PHINode *ind = loop.CreatePHI(len->getType(), 0);
@@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
     //BasicBlock *bb = BI;
     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
-        ++II) {
-      if (LoadInst * load = dyn_cast<LoadInst>(II)) {
+         ++II) {
+      if (LoadInst *load = dyn_cast<LoadInst>(II)) {
 
-        if (load->hasOneUse() == false) continue;
+        if (load->hasOneUse() == false)
+          continue;
 
-        if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
+        if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+          continue;
 
         User *use = *(load->use_begin());
-        if (StoreInst * store = dyn_cast<StoreInst>(use)) {
+        if (StoreInst *store = dyn_cast<StoreInst>(use)) {
           if (store->getOperand(0) != load) //getValueOperand
-          continue;
+            continue;
           aggrLoads.push_back(load);
         }
-      } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
+      } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
         Value *len = intr->getLength();
         // If the number of elements being copied is greater
         // than MaxAggrCopySize, lower it to a loop
-        if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
+        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
           if (len_int->getZExtValue() >= MaxAggrCopySize) {
             aggrMemcpys.push_back(intr);
           }
@@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
           // turn variable length memcpy/memmov into loop
           aggrMemcpys.push_back(intr);
         }
-      } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
+      } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
         Value *len = memsetintr->getLength();
-        if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
+        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
           if (len_int->getZExtValue() >= MaxAggrCopySize) {
             aggrMemsets.push_back(memsetintr);
           }
@@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
       }
     }
   }
-  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
-      && (aggrMemsets.size() == 0)) return false;
+  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
+      (aggrMemsets.size() == 0))
+    return false;
 
   //
   // Do the transformation of an aggr load/copy/set to a loop
diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h
index b4a4dbce98a9..a95c16b1e67e 100644
--- a/lib/Target/NVPTX/NVPTXNumRegisters.h
+++ b/lib/Target/NVPTX/NVPTXNumRegisters.h
@@ -11,10 +11,6 @@
 #ifndef NVPTX_NUM_REGISTERS_H
 #define NVPTX_NUM_REGISTERS_H
 
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = 396;
-
-}
+namespace llvm { const unsigned NVPTXNumRegisters = 396; }
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 350a2c555158..282465359b07 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -23,69 +23,54 @@
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
-
 using namespace llvm;
 
-namespace llvm
-{
-std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
+namespace llvm {
+std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
   if (RC == &NVPTX::Float32RegsRegClass) {
     return ".f32";
   }
   if (RC == &NVPTX::Float64RegsRegClass) {
     return ".f64";
-  }
-  else if (RC == &NVPTX::Int64RegsRegClass) {
+  } else if (RC == &NVPTX::Int64RegsRegClass) {
     return ".s64";
-  }
-  else if (RC == &NVPTX::Int32RegsRegClass) {
+  } else if (RC == &NVPTX::Int32RegsRegClass) {
     return ".s32";
-  }
-  else if (RC == &NVPTX::Int16RegsRegClass) {
+  } else if (RC == &NVPTX::Int16RegsRegClass) {
     return ".s16";
   }
-  // Int8Regs become 16-bit registers in PTX
-  else if (RC == &NVPTX::Int8RegsRegClass) {
+      // Int8Regs become 16-bit registers in PTX
+      else if (RC == &NVPTX::Int8RegsRegClass) {
     return ".s16";
-  }
-  else if (RC == &NVPTX::Int1RegsRegClass) {
+  } else if (RC == &NVPTX::Int1RegsRegClass) {
     return ".pred";
-  }
-  else if (RC == &NVPTX::SpecialRegsRegClass) {
+  } else if (RC == &NVPTX::SpecialRegsRegClass) {
     return "!Special!";
-  }
-  else {
+  } else {
     return "INTERNAL";
   }
   return "";
 }
 
-std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
+std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
   if (RC == &NVPTX::Float32RegsRegClass) {
     return "%f";
   }
   if (RC == &NVPTX::Float64RegsRegClass) {
     return "%fd";
-  }
-  else if (RC == &NVPTX::Int64RegsRegClass) {
+  } else if (RC == &NVPTX::Int64RegsRegClass) {
     return "%rd";
-  }
-  else if (RC == &NVPTX::Int32RegsRegClass) {
+  } else if (RC == &NVPTX::Int32RegsRegClass) {
     return "%r";
-  }
-  else if (RC == &NVPTX::Int16RegsRegClass) {
+  } else if (RC == &NVPTX::Int16RegsRegClass) {
     return "%rs";
-  }
-  else if (RC == &NVPTX::Int8RegsRegClass) {
+  } else if (RC == &NVPTX::Int8RegsRegClass) {
     return "%rc";
-  }
-  else if (RC == &NVPTX::Int1RegsRegClass) {
+  } else if (RC == &NVPTX::Int1RegsRegClass) {
     return "%p";
-  }
-  else if (RC == &NVPTX::SpecialRegsRegClass) {
+  } else if (RC == &NVPTX::SpecialRegsRegClass) {
     return "!Special!";
-  }
-  else {
+  } else {
     return "INTERNAL";
   }
   return "";
@@ -94,23 +79,22 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
 
 NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
                                      const NVPTXSubtarget &st)
-  : NVPTXGenRegisterInfo(0),
-    Is64Bit(st.is64Bit()) {}
+    : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
 
 #define GET_REGINFO_TARGET_DESC
 #include "NVPTXGenRegisterInfo.inc"
 
 /// NVPTX Callee Saved Registers
-const uint16_t* NVPTXRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const {
+const uint16_t *
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   static const uint16_t CalleeSavedRegs[] = { 0 };
   return CalleeSavedRegs;
 }
 
 // NVPTX Callee Saved Reg Classes
-const TargetRegisterClass* const*
+const TargetRegisterClass *const *
 NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
-  static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+  static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
   return CalleeSavedRegClasses;
 }
 
@@ -119,10 +103,9 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-void NVPTXRegisterInfo::
-eliminateFrameIndex(MachineBasicBlock::iterator II,
-                    int SPAdj, unsigned FIOperandNum,
-                    RegScavenger *RS) const {
+void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                            int SPAdj, unsigned FIOperandNum,
+                                            RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   MachineInstr &MI = *II;
@@ -130,15 +113,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   MachineFunction &MF = *MI.getParent()->getParent();
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-      MI.getOperand(FIOperandNum+1).getImm();
+               MI.getOperand(FIOperandNum + 1).getImm();
 
   // Using I0 as the frame pointer
   MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
-  MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
 }
 
-int NVPTXRegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
+int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return 0;
 }
 
@@ -146,7 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return NVPTX::VRFrame;
 }
 
-unsigned NVPTXRegisterInfo::getRARegister() const {
-  return 0;
-}
-
+unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 69f73f213c34..d40682066142 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -17,7 +17,6 @@
 #include "ManagedStringPool.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
-
 #define GET_REGINFO_HEADER
 #include "NVPTXGenRegisterInfo.inc"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -33,30 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
 private:
   bool Is64Bit;
   // Hold Strings that can be free'd all together with NVPTXRegisterInfo
-  ManagedStringPool     ManagedStrPool;
+  ManagedStringPool ManagedStrPool;
 
 public:
-  NVPTXRegisterInfo(const TargetInstrInfo &tii,
-                    const NVPTXSubtarget &st);
-
+  NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st);
 
   //------------------------------------------------------
   // Pure virtual functions from TargetRegisterInfo
   //------------------------------------------------------
 
   // NVPTX callee saved registers
-  virtual const uint16_t*
+  virtual const uint16_t *
   getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   // NVPTX callee saved register classes
-  virtual const TargetRegisterClass* const *
+  virtual const TargetRegisterClass *const *
   getCalleeSavedRegClasses(const MachineFunction *MF) const;
 
   virtual BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                                   int SPAdj, unsigned FIOperandNum,
-                                   RegScavenger *RS=NULL) const;
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+                                   unsigned FIOperandNum,
+                                   RegScavenger *RS = NULL) const;
 
   virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
   virtual unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -74,11 +71,9 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
 
 };
 
-
-std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
-std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
+std::string getNVPTXRegClassName(const TargetRegisterClass *RC);
+std::string getNVPTXRegClassStr(const TargetRegisterClass *RC);
 
 } // end namespace llvm
 
-
 #endif
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index e166be5a68e4..e57ace92e850 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -32,7 +32,8 @@ class NVPTXSection : public MCSection {
   /// Override this as NVPTX has its own way of printing switching
   /// to a section.
   virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                    raw_ostream &OS) const {}
+                                    raw_ostream &OS,
+                                    const MCExpr *Subsection) const {}
 
   /// Base address of PTX sections is zero.
   virtual bool isBaseAddressKnownZero() const { return true; }
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index babe29500dfd..83dfe120899a 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -21,9 +21,7 @@
 
 using namespace llvm;
 
-namespace llvm {
-FunctionPass *createSplitBBatBarPass();
-}
+namespace llvm { FunctionPass *createSplitBBatBarPass(); }
 
 char NVPTXSplitBBatBar::ID = 0;
 
@@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
 // This interface will most likely not be necessary, because this pass will
 // not be invoked by the driver, but will be used as a prerequisite to
 // another pass.
-FunctionPass *llvm::createSplitBBatBarPass() {
-  return new NVPTXSplitBBatBar();
-}
+FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); }
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 7b62cce2c65c..2dcd73dcff9c 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -22,27 +22,23 @@ using namespace llvm;
 // Select Driver Interface
 #include "llvm/Support/CommandLine.h"
 namespace {
-cl::opt<NVPTX::DrvInterface>
-DriverInterface(cl::desc("Choose driver interface:"),
-                cl::values(
-                    clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
-                    clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
-                    clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
-                    clEnumValEnd),
-                    cl::init(NVPTX::NVCL));
+cl::opt<NVPTX::DrvInterface> DriverInterface(
+    cl::desc("Choose driver interface:"),
+    cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
+               clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
+               clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd),
+    cl::init(NVPTX::NVCL));
 }
 
 NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
                                const std::string &FS, bool is64Bit)
-: NVPTXGenSubtargetInfo(TT, CPU, FS),
-  Is64Bit(is64Bit),
-  PTXVersion(0),
-  SmVersion(10) {
+    : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
+      SmVersion(20) {
 
   drvInterface = DriverInterface;
 
   // Provide the default CPU if none
-  std::string defCPU = "sm_10";
+  std::string defCPU = "sm_20";
 
   ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS);
 
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index beea77e38d8a..670077daaa69 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -25,7 +25,7 @@
 namespace llvm {
 
 class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
-  
+
   std::string TargetName;
   NVPTX::DrvInterface drvInterface;
   bool Is64Bit;
@@ -61,13 +61,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasLDU() const { return SmVersion >= 20; }
   bool hasGenericLdSt() const { return SmVersion >= 20; }
   inline bool hasHWROT32() const { return false; }
-  inline bool hasSWROT32() const {
-    return true;
-  }
-  inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
+  inline bool hasSWROT32() const { return true; }
+  inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
   inline bool hasROT64() const { return SmVersion >= 20; }
 
-
   bool is64Bit() const { return Is64Bit; }
 
   unsigned int getSmVersion() const { return SmVersion; }
@@ -96,4 +93,4 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
 
 } // End llvm namespace
 
-#endif  // NVPTXSUBTARGET_H
+#endif // NVPTXSUBTARGET_H
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index cd765fa8cb1b..67ca6b58e5a6 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -45,9 +45,11 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
 
-
 using namespace llvm;
 
+namespace llvm {
+void initializeNVVMReflectPass(PassRegistry&);
+}
 
 extern "C" void LLVMInitializeNVPTXTarget() {
   // Register the target.
@@ -57,52 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
   RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
 
+  // FIXME: This pass is really intended to be invoked during IR optimization,
+  // but it's very NVPTX-specific.
+  initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
 }
 
-NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
-                                       StringRef TT,
-                                       StringRef CPU,
-                                       StringRef FS,
-                                       const TargetOptions& Options,
-                                       Reloc::Model RM,
-                                       CodeModel::Model CM,
-                                       CodeGenOpt::Level OL,
-                                       bool is64bit)
-: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-  Subtarget(TT, CPU, FS, is64bit),
-  DL(Subtarget.getDataLayout()),
-  InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
-/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
-}
-
-
+NVPTXTargetMachine::NVPTXTargetMachine(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL, bool is64bit)
+    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+      Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
+      InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+      FrameLowering(
+          *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
 
 void NVPTXTargetMachine32::anchor() {}
 
-NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
-                                           StringRef CPU, StringRef FS,
-                                           const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
-                                           CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
-}
+NVPTXTargetMachine32::NVPTXTargetMachine32(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 void NVPTXTargetMachine64::anchor() {}
 
-NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
-                                           StringRef CPU, StringRef FS,
-                                           const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
-                                           CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
-}
-
+NVPTXTargetMachine64::NVPTXTargetMachine64(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 namespace llvm {
 class NVPTXPassConfig : public TargetPassConfig {
 public:
   NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
-  : TargetPassConfig(TM, PM) {}
+      : TargetPassConfig(TM, PM) {}
 
   NVPTXTargetMachine &getNVPTXTargetMachine() const {
     return getTM<NVPTXTargetMachine>();
@@ -126,6 +118,4 @@ bool NVPTXPassConfig::addInstSelector() {
   return false;
 }
 
-bool NVPTXPassConfig::addPreRegAlloc() {
-  return false;
-}
+bool NVPTXPassConfig::addPreRegAlloc() { return false; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 1a732be1ade3..5fbcf735b48f 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef NVPTX_TARGETMACHINE_H
 #define NVPTX_TARGETMACHINE_H
 
@@ -31,42 +30,40 @@ namespace llvm {
 /// NVPTXTargetMachine
 ///
 class NVPTXTargetMachine : public LLVMTargetMachine {
-  NVPTXSubtarget        Subtarget;
-  const DataLayout      DL;       // Calculates type size & alignment
-  NVPTXInstrInfo        InstrInfo;
-  NVPTXTargetLowering   TLInfo;
-  TargetSelectionDAGInfo   TSInfo;
+  NVPTXSubtarget Subtarget;
+  const DataLayout DL; // Calculates type size & alignment
+  NVPTXInstrInfo InstrInfo;
+  NVPTXTargetLowering TLInfo;
+  TargetSelectionDAGInfo TSInfo;
 
   // NVPTX does not have any call stack frame, but need a NVPTX specific
   // FrameLowering class because TargetFrameLowering is abstract.
-  NVPTXFrameLowering       FrameLowering;
+  NVPTXFrameLowering FrameLowering;
 
   // Hold Strings that can be free'd all together with NVPTXTargetMachine
-  ManagedStringPool     ManagedStrPool;
+  ManagedStringPool ManagedStrPool;
 
   //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
   //                            bool DisableVerify, MCContext *&OutCtx);
 
 public:
-  NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
-                     StringRef FS, const TargetOptions &Options,
-                     Reloc::Model RM, CodeModel::Model CM,
-                     CodeGenOpt::Level OP,
-                     bool is64bit);
+  NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                     const TargetOptions &Options, Reloc::Model RM,
+                     CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
 
   virtual const TargetFrameLowering *getFrameLowering() const {
     return &FrameLowering;
   }
-  virtual const NVPTXInstrInfo *getInstrInfo() const  { return &InstrInfo; }
-  virtual const DataLayout *getDataLayout() const     { return &DL;}
-  virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
+  virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const DataLayout *getDataLayout() const { return &DL; }
+  virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
 
   virtual const NVPTXRegisterInfo *getRegisterInfo() const {
     return &(InstrInfo.getRegisterInfo());
   }
 
   virtual NVPTXTargetLowering *getTargetLowering() const {
-    return const_cast<NVPTXTargetLowering*>(&TLInfo);
+    return const_cast<NVPTXTargetLowering *>(&TLInfo);
   }
 
   virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
@@ -79,22 +76,19 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
   //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
 
   ManagedStringPool *getManagedStrPool() const {
-    return const_cast<ManagedStringPool*>(&ManagedStrPool);
+    return const_cast<ManagedStringPool *>(&ManagedStrPool);
   }
 
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 
   // Emission of machine code through JITCodeEmitter is not supported.
-  virtual bool addPassesToEmitMachineCode(PassManagerBase &,
-                                          JITCodeEmitter &,
+  virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
                                           bool = true) {
     return true;
   }
 
   // Emission of machine code through MCJIT is not supported.
-  virtual bool addPassesToEmitMC(PassManagerBase &,
-                                 MCContext *&,
-                                 raw_ostream &,
+  virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
                                  bool = true) {
     return true;
   }
@@ -119,7 +113,6 @@ class NVPTXTargetMachine64 : public NVPTXTargetMachine {
                        CodeGenOpt::Level OL);
 };
 
-
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index b5698a2fc08f..6ab0e08ad091 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -46,45 +46,43 @@ class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
   }
 
   virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
-    TextSection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getText());
-    DataSection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getDataRel());
-    BSSSection = new NVPTXSection(MCSection::SV_ELF,
-                                  SectionKind::getBSS());
-    ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getReadOnly());
+    TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
+    DataSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+    BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
+    ReadOnlySection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
 
-    StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    LSDASection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getMetadata());
-    EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
-                                      SectionKind::getMetadata());
-    DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
-                                          SectionKind::getMetadata());
-    DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
-                                        SectionKind::getMetadata());
-    DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
-                                        SectionKind::getMetadata());
-    DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
-                                            SectionKind::getMetadata());
-    DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
-                                               SectionKind::getMetadata());
-    DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getMetadata());
-    DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getMetadata());
-    DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
-                                           SectionKind::getMetadata());
-    DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
-                                          SectionKind::getMetadata());
-    DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
-                                             SectionKind::getMetadata());
+    StaticCtorSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    StaticDtorSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    LSDASection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    EHFrameSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfAbbrevSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfInfoSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfLineSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfFrameSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfPubTypesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfDebugInlineSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfStrSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfLocSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfARangesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfRangesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfMacroInfoSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
   }
 
   virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
@@ -93,8 +91,7 @@ class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
 
   virtual const MCSection *
   getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                           Mangler *Mang,
-                           const TargetMachine &TM) const {
+                           Mangler *Mang, const TargetMachine &TM) const {
     return DataSection;
   }
 
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 1ccc9f7c0229..6786eb02240c 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
 
 ManagedStatic<per_module_annot_t> annotationCache;
 
-
 static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
   assert(md && "Invalid mdnode for annotation");
   assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
@@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
     assert(prop && "Annotation property not a string");
 
     // value
-    ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
+    ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
     assert(Val && "Value operand not a constant int");
 
     std::string keyname = prop->getString().str();
@@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
 bool llvm::isTexture(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a texture symbol");
       return true;
     }
@@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) {
 bool llvm::isSurface(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a surface symbol");
       return true;
     }
@@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) {
 bool llvm::isSampler(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a sampler symbol");
       return true;
     }
@@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) {
   if (const Argument *arg = dyn_cast<Argument>(&val)) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
-    if (llvm::findAllNVVMAnnotation(func,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
-                                   annot)) {
+    if (llvm::findAllNVVMAnnotation(
+            func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+            annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
     if (llvm::findAllNVVMAnnotation(func,
-          llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
-                                   annot)) {
+                                    llvm::PropertyAnnotationNames[
+                                        llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
+                                    annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
     if (llvm::findAllNVVMAnnotation(func,
-         llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
-                                   annot)) {
+                                    llvm::PropertyAnnotationNames[
+                                        llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
+                                    annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) {
 }
 
 bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x));
 }
 
 bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
-                                      y));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y));
 }
 
 bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
-                                      z));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z));
 }
 
 bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x));
 }
 
 bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
-                                      y));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y));
 }
 
 bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
-                                      z));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z));
 }
 
 bool llvm::getMinCTASm(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                    llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x));
 }
 
 bool llvm::isKernelFunction(const Function &F) {
   unsigned x = 0;
-  bool retval = llvm::findOneNVVMAnnotation(&F,
-               llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
-                                            x);
+  bool retval = llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
   if (retval == false) {
     // There is no NVVM metadata, check the calling convention
     if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
@@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) {
     else
       return false;
   }
-  return (x==1);
+  return (x == 1);
 }
 
 bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
   std::vector<unsigned> Vs;
-  bool retval = llvm::findAllNVVMAnnotation(&F,
-                           llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
-                                            Vs);
+  bool retval = llvm::findAllNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
   if (retval == false)
     return false;
-  for (int i=0, e=Vs.size(); i<e; i++) {
+  for (int i = 0, e = Vs.size(); i < e; i++) {
     unsigned v = Vs[i];
-    if ( (v >> 16) == index ) {
-      align =  v & 0xFFFF;
+    if ((v >> 16) == index) {
+      align = v & 0xFFFF;
       return true;
     }
   }
@@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
 
 bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
   if (MDNode *alignNode = I.getMetadata("callalign")) {
-    for (int i=0, n = alignNode->getNumOperands();
-        i<n; i++) {
+    for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
       if (const ConstantInt *CI =
-          dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+              dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
         unsigned v = CI->getZExtValue();
-        if ( (v>>16) == index ) {
+        if ((v >> 16) == index) {
           align = v & 0xFFFF;
           return true;
         }
-        if ( (v>>16) > index ) {
+        if ((v >> 16) > index) {
           return false;
         }
       }
@@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
 // consider several special intrinsics in striping pointer casts, and
 // provide an option to ignore GEP indicies for find out the base address only
 // which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
-                                       bool ignore_GEP_indices) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
   V = V->stripPointerCasts();
   while (true) {
     if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
@@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V,
 // - ignore GEP indicies for find out the base address only, and
 // - tracking PHINode
 // which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
-                                       std::set<const Value *> &processed) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
   if (processed.find(V) != processed.end())
     return NULL;
   processed.insert(V);
@@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V,
   return V;
 }
 
-
 // The following are some useful utilities for debuggung
 
 BasicBlock *llvm::getParentBlock(Value *v) {
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 247e09b8bc69..a208004297d0 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -23,8 +23,7 @@
 #include <string>
 #include <vector>
 
-namespace llvm
-{
+namespace llvm {
 
 #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
 #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
@@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID);
 /// to pass into type construction of CallInst ctors.  This turns a null
 /// terminated list of pointers (or other value types) into a real live vector.
 ///
-template<typename T>
-inline std::vector<T> make_vector(T A, ...) {
+template <typename T> inline std::vector<T> make_vector(T A, ...) {
   va_list Args;
   va_start(Args, A);
   std::vector<T> Result;
@@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) {
 
 bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
 const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *skipPointerTransfer(const Value *V,
-                                 std::set<const Value *> &processed);
+const Value *
+skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
 BasicBlock *getParentBlock(Value *v);
 Function *getParentFunction(Value *v);
 void dumpBlock(Value *v, char *blockName);
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
index 6a0e5328f62f..5f074b33a2d4 100644
--- a/lib/Target/NVPTX/NVPTXutil.cpp
+++ b/lib/Target/NVPTX/NVPTXutil.cpp
@@ -18,8 +18,7 @@ using namespace llvm;
 
 namespace llvm {
 
-bool isParamLoad(const MachineInstr *MI)
-{
+bool isParamLoad(const MachineInstr *MI) {
   if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
       (MI->getOpcode() != NVPTX::LD_i64_avar))
     return false;
@@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI)
   return true;
 }
 
-#define DATA_MASK     0x7f
-#define DIGIT_WIDTH   7
-#define MORE_BYTES    0x80
+#define DATA_MASK 0x7f
+#define DIGIT_WIDTH 7
+#define MORE_BYTES 0x80
 
-static int encode_leb128(uint64_t val, int *nbytes,
-                         char *space, int splen)
-{
+static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
   char *a;
   char *end = space + splen;
 
@@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes,
 #undef DIGIT_WIDTH
 #undef MORE_BYTES
 
-uint64_t encode_leb128(const char *str)
-{
-  union { uint64_t x; char a[8]; } temp64;
+uint64_t encode_leb128(const char *str) {
+  union {
+    uint64_t x;
+    char a[8];
+  } temp64;
 
   temp64.x = 0;
 
-  for (unsigned i=0,e=strlen(str); i!=e; ++i)
-    temp64.a[i] = str[e-1-i];
+  for (unsigned i = 0, e = strlen(str); i != e; ++i)
+    temp64.a[i] = str[e - 1 - i];
 
   char encoded[16];
   int nbytes;
 
   int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
 
-  (void)retval;
-  assert(retval == 0 &&
-         "Encoding to leb128 failed");
+  (void) retval;
+  assert(retval == 0 && "Encoding to leb128 failed");
 
   assert(nbytes <= 8 &&
          "Cannot support register names with leb128 encoding > 8 bytes");
 
   temp64.x = 0;
-  for (int i=0; i<nbytes; ++i)
+  for (int i = 0; i < nbytes; ++i)
     temp64.a[i] = encoded[i];
 
   return temp64.x;
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
new file mode 100644
index 000000000000..0ad62ce39b0d
--- /dev/null
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -0,0 +1,177 @@
+//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces occurences of __nvvm_reflect("string") with an
+// integer based on -nvvm-reflect-list string=<int> option given to this pass.
+// If an undefined string value is seen in a call to __nvvm_reflect("string"),
+// a default value of 0 will be used.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
+
+using namespace llvm;
+
+namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
+
+namespace {
+class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+private:
+  StringMap<int> VarMap;
+  typedef DenseMap<std::string, int>::iterator VarMapIter;
+  Function *ReflectFunction;
+
+public:
+  static char ID;
+  NVVMReflect() : ModulePass(ID) {
+    VarMap.clear();
+    ReflectFunction = 0;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
+  virtual bool runOnModule(Module &);
+
+  void setVarMap();
+};
+}
+
+static cl::opt<bool>
+NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
+                   cl::desc("NVVM reflection, enabled by default"));
+
+char NVVMReflect::ID = 0;
+INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
+                "Replace occurences of __nvvm_reflect() calls with 0/1", false,
+                false)
+
+static cl::list<std::string>
+ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"),
+            cl::desc("A list of string=num assignments"),
+            cl::ValueRequired);
+
+/// The command line can look as follows :
+/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
+/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
+/// ReflectList vector. First, each of ReflectList[i] is 'split'
+/// using "," as the delimiter. Then each of this part is split
+/// using "=" as the delimiter.
+void NVVMReflect::setVarMap() {
+  for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
+    DEBUG(dbgs() << "Option : "  << ReflectList[i] << "\n");
+    SmallVector<StringRef, 4> NameValList;
+    StringRef(ReflectList[i]).split(NameValList, ",");
+    for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
+      SmallVector<StringRef, 2> NameValPair;
+      NameValList[j].split(NameValPair, "=");
+      assert(NameValPair.size() == 2 && "name=val expected");
+      std::stringstream ValStream(NameValPair[1]);
+      int Val;
+      ValStream >> Val;
+      assert((!(ValStream.fail())) && "integer value expected");
+      VarMap[NameValPair[0]] = Val;
+    }
+  }
+}
+
+bool NVVMReflect::runOnModule(Module &M) {
+  if (!NVVMReflectEnabled)
+    return false;
+
+  setVarMap();
+
+  ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
+
+  // If reflect function is not used, then there will be
+  // no entry in the module.
+  if (ReflectFunction == 0)
+    return false;
+
+  // Validate _reflect function
+  assert(ReflectFunction->isDeclaration() &&
+         "_reflect function should not have a body");
+  assert(ReflectFunction->getReturnType()->isIntegerTy() &&
+         "_reflect's return type should be integer");
+
+  std::vector<Instruction *> ToRemove;
+
+  // Go through the uses of ReflectFunction in this Function.
+  // Each of them should a CallInst with a ConstantArray argument.
+  // First validate that. If the c-string corresponding to the
+  // ConstantArray can be found successfully, see if it can be
+  // found in VarMap. If so, replace the uses of CallInst with the
+  // value found in VarMap. If not, replace the use  with value 0.
+  for (Value::use_iterator I = ReflectFunction->use_begin(),
+                           E = ReflectFunction->use_end();
+       I != E; ++I) {
+    assert(isa<CallInst>(*I) && "Only a call instruction can use _reflect");
+    CallInst *Reflect = cast<CallInst>(*I);
+
+    assert((Reflect->getNumOperands() == 2) &&
+           "Only one operand expect for _reflect function");
+    // In cuda, we will have an extra constant-to-generic conversion of
+    // the string.
+    const Value *conv = Reflect->getArgOperand(0);
+    assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion");
+    const CallInst *ConvCall = cast<CallInst>(conv);
+    const Value *str = ConvCall->getArgOperand(0);
+    assert(isa<ConstantExpr>(str) &&
+           "Format of _reflect function not recognized");
+    const ConstantExpr *GEP = cast<ConstantExpr>(str);
+
+    const Value *Sym = GEP->getOperand(0);
+    assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
+
+    const Constant *SymStr = cast<Constant>(Sym);
+
+    assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+           "Format of _reflect function not recognized");
+
+    assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+           "Format of _reflect function not recognized");
+
+    std::string ReflectArg =
+        cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+
+    ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
+    DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
+
+    int ReflectVal = 0; // The default value is 0
+    if (VarMap.find(ReflectArg) != VarMap.end()) {
+      ReflectVal = VarMap[ReflectArg];
+    }
+    Reflect->replaceAllUsesWith(
+        ConstantInt::get(Reflect->getType(), ReflectVal));
+    ToRemove.push_back(Reflect);
+  }
+  if (ToRemove.size() == 0)
+    return false;
+
+  for (unsigned i = 0, e = ToRemove.size(); i != e; ++i)
+    ToRemove[i]->eraseFromParent();
+  return true;
+}
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index 6c801b875e4b..cc7d4dc5ece7 100644
--- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64;
 
 extern "C" void LLVMInitializeNVPTXTargetInfo() {
   RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
-    "NVIDIA PTX 32-bit");
+                                  "NVIDIA PTX 32-bit");
   RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
-    "NVIDIA PTX 64-bit");
+                                    "NVIDIA PTX 64-bit");
 }
diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h
index a7347efd7850..45cc0b8b67f2 100644
--- a/lib/Target/NVPTX/cl_common_defines.h
+++ b/lib/Target/NVPTX/cl_common_defines.h
@@ -24,22 +24,21 @@ enum {
   CLK_LUMINANCE = 0x10B9
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
-  ,
+      ,
   CLK_Rx = 0x10BA,
   CLK_RGx = 0x10BB,
   CLK_RGBx = 0x10BC
 #endif
 };
 
-
 typedef enum clk_channel_type {
   // valid formats for float return types
-  CLK_SNORM_INT8 = 0x10D0,            // four channel RGBA unorm8
-  CLK_SNORM_INT16 = 0x10D1,           // four channel RGBA unorm16
-  CLK_UNORM_INT8 = 0x10D2,            // four channel RGBA unorm8
-  CLK_UNORM_INT16 = 0x10D3,           // four channel RGBA unorm16
-  CLK_HALF_FLOAT = 0x10DD,            // four channel RGBA half
-  CLK_FLOAT = 0x10DE,                 // four channel RGBA float
+  CLK_SNORM_INT8 = 0x10D0,  // four channel RGBA unorm8
+  CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
+  CLK_UNORM_INT8 = 0x10D2,  // four channel RGBA unorm8
+  CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
+  CLK_HALF_FLOAT = 0x10DD,  // four channel RGBA half
+  CLK_FLOAT = 0x10DE,       // four channel RGBA float
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
   CLK_UNORM_SHORT_565 = 0x10D4,
@@ -48,7 +47,7 @@ typedef enum clk_channel_type {
 #endif
 
   // valid only for integer return types
-  CLK_SIGNED_INT8 =  0x10D7,
+  CLK_SIGNED_INT8 = 0x10D7,
   CLK_SIGNED_INT16 = 0x10D8,
   CLK_SIGNED_INT32 = 0x10D9,
   CLK_UNSIGNED_INT8 = 0x10DA,
@@ -56,70 +55,68 @@ typedef enum clk_channel_type {
   CLK_UNSIGNED_INT32 = 0x10DC,
 
   // CI SPI for CPU
-  __CLK_UNORM_INT8888 ,         // four channel ARGB unorm8
-  __CLK_UNORM_INT8888R,        // four channel BGRA unorm8
+  __CLK_UNORM_INT8888,  // four channel ARGB unorm8
+  __CLK_UNORM_INT8888R, // four channel BGRA unorm8
 
   __CLK_VALID_IMAGE_TYPE_COUNT,
   __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
-  __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4,         // number of bits required to
-                                                // represent any image type
-  __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
-}clk_channel_type;
+  __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
+                                        // represent any image type
+  __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1
+} clk_channel_type;
 
 typedef enum clk_sampler_type {
-    __CLK_ADDRESS_BASE             = 0,
-    CLK_ADDRESS_NONE               = 0 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_CLAMP              = 1 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_CLAMP_TO_EDGE      = 2 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_REPEAT             = 3 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_MIRROR             = 4 << __CLK_ADDRESS_BASE,
+  __CLK_ADDRESS_BASE = 0,
+  CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
-    CLK_ADDRESS_MIRRORED_REPEAT    = CLK_ADDRESS_MIRROR,
+  CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
 #endif
-    __CLK_ADDRESS_MASK             = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
-                                     CLK_ADDRESS_CLAMP_TO_EDGE |
-                                     CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
-    __CLK_ADDRESS_BITS             = 3,        // number of bits required to
-                                               // represent address info
-
-    __CLK_NORMALIZED_BASE          = __CLK_ADDRESS_BITS,
-    CLK_NORMALIZED_COORDS_FALSE    = 0,
-    CLK_NORMALIZED_COORDS_TRUE     = 1 << __CLK_NORMALIZED_BASE,
-    __CLK_NORMALIZED_MASK          = CLK_NORMALIZED_COORDS_FALSE |
-                                     CLK_NORMALIZED_COORDS_TRUE,
-    __CLK_NORMALIZED_BITS          = 1,        // number of bits required to
-                                               // represent normalization
-
-    __CLK_FILTER_BASE              = __CLK_NORMALIZED_BASE +
-                                     __CLK_NORMALIZED_BITS,
-    CLK_FILTER_NEAREST             = 0 << __CLK_FILTER_BASE,
-    CLK_FILTER_LINEAR              = 1 << __CLK_FILTER_BASE,
-    CLK_FILTER_ANISOTROPIC         = 2 << __CLK_FILTER_BASE,
-    __CLK_FILTER_MASK              = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
-                                     CLK_FILTER_ANISOTROPIC,
-    __CLK_FILTER_BITS              = 2,        // number of bits required to
-                                               // represent address info
-
-    __CLK_MIP_BASE                 = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
-    CLK_MIP_NEAREST                = 0 << __CLK_MIP_BASE,
-    CLK_MIP_LINEAR                 = 1 << __CLK_MIP_BASE,
-    CLK_MIP_ANISOTROPIC            = 2 << __CLK_MIP_BASE,
-    __CLK_MIP_MASK                 = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
-                                     CLK_MIP_ANISOTROPIC,
-    __CLK_MIP_BITS                 = 2,
-
-    __CLK_SAMPLER_BITS             = __CLK_MIP_BASE + __CLK_MIP_BITS,
-    __CLK_SAMPLER_MASK             = __CLK_MIP_MASK | __CLK_FILTER_MASK |
-                                     __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
-
-    __CLK_ANISOTROPIC_RATIO_BITS   = 5,
-    __CLK_ANISOTROPIC_RATIO_MASK   = (int) 0x80000000 >>
-                                      (__CLK_ANISOTROPIC_RATIO_BITS-1)
+  __CLK_ADDRESS_MASK =
+      CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE |
+      CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
+  __CLK_ADDRESS_BITS = 3, // number of bits required to
+                          // represent address info
+
+  __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
+  CLK_NORMALIZED_COORDS_FALSE = 0,
+  CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
+  __CLK_NORMALIZED_MASK =
+      CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE,
+  __CLK_NORMALIZED_BITS = 1, // number of bits required to
+                             // represent normalization
+
+  __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS,
+  CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
+  CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
+  CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
+  __CLK_FILTER_MASK =
+      CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC,
+  __CLK_FILTER_BITS = 2, // number of bits required to
+                         // represent address info
+
+  __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
+  CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
+  CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
+  CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
+  __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC,
+  __CLK_MIP_BITS = 2,
+
+  __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
+  __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
+                       __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
+
+  __CLK_ANISOTROPIC_RATIO_BITS = 5,
+  __CLK_ANISOTROPIC_RATIO_MASK =
+      (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1)
 } clk_sampler_type;
 
 // Memory synchronization
-#define CLK_LOCAL_MEM_FENCE     (1 << 0)
-#define CLK_GLOBAL_MEM_FENCE    (1 << 1)
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
 
 #endif // __CL_COMMON_DEFINES_H__
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 3d583060d1ef..bacc108c62b4 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -13,7 +13,7 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "PPCInstPrinter.h"
-#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -87,35 +87,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
                                            raw_ostream &O, 
                                            const char *Modifier) {
   unsigned Code = MI->getOperand(OpNo).getImm();
-  if (!Modifier) {
-    unsigned CCReg = MI->getOperand(OpNo+1).getReg();
-    unsigned RegNo;
-    switch (CCReg) {
-    default: llvm_unreachable("Unknown CR register");
-    case PPC::CR0: RegNo = 0; break;
-    case PPC::CR1: RegNo = 1; break;
-    case PPC::CR2: RegNo = 2; break;
-    case PPC::CR3: RegNo = 3; break;
-    case PPC::CR4: RegNo = 4; break;
-    case PPC::CR5: RegNo = 5; break;
-    case PPC::CR6: RegNo = 6; break;
-    case PPC::CR7: RegNo = 7; break;
-    }
-
-    // Print the CR bit number. The Code is ((BI << 5) | BO) for a
-    // BCC, but we must have the positive form here (BO == 12)
-    unsigned BI = Code >> 5;
-    assert((Code & 0xF) == 12 &&
-           "BO in predicate bit must have the positive form");
-
-    unsigned Value = 4*RegNo + BI;
-    O << Value;
-    return;
-  }
 
   if (StringRef(Modifier) == "cc") {
     switch ((PPC::Predicate)Code) {
-    case PPC::PRED_ALWAYS: return; // Don't print anything for always.
     case PPC::PRED_LT: O << "lt"; return;
     case PPC::PRED_LE: O << "le"; return;
     case PPC::PRED_EQ: O << "eq"; return;
@@ -129,8 +103,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
   
   assert(StringRef(Modifier) == "reg" &&
          "Need to specify 'cc' or 'reg' as predicate op modifier!");
-  // Don't print the register for 'always'.
-  if (Code == PPC::PRED_ALWAYS) return;
   printOperand(MI, OpNo+1, O);
 }
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index f24edf62ed71..ec2657403e0c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -30,13 +30,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case FK_Data_2:
   case FK_Data_4:
   case FK_Data_8:
-  case PPC::fixup_ppc_toc:
   case PPC::fixup_ppc_tlsreg:
   case PPC::fixup_ppc_nofixup:
     return Value;
-  case PPC::fixup_ppc_lo14:
-  case PPC::fixup_ppc_toc16_ds:
-    return (Value & 0xffff) << 2;
   case PPC::fixup_ppc_brcond14:
     return Value & 0xfffc;
   case PPC::fixup_ppc_br24:
@@ -48,8 +44,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case PPC::fixup_ppc_ha16:
     return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
   case PPC::fixup_ppc_lo16:
-  case PPC::fixup_ppc_toc16:
     return Value & 0xffff;
+  case PPC::fixup_ppc_lo16_ds:
+    return Value & 0xfffc;
   }
 }
 
@@ -82,10 +79,7 @@ const Target &TheTarget;
       { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_lo16",        16,     16,   0 },
       { "fixup_ppc_ha16",        16,     16,   0 },
-      { "fixup_ppc_lo14",        16,     14,   0 },
-      { "fixup_ppc_toc",          0,     64,   0 },
-      { "fixup_ppc_toc16",       16,     16,   0 },
-      { "fixup_ppc_toc16_ds",    16,     14,   0 },
+      { "fixup_ppc_lo16_ds",     16,     14,   0 },
       { "fixup_ppc_tlsreg",       0,      0,   0 },
       { "fixup_ppc_nofixup",      0,      0,   0 }
     };
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
deleted file mode 100644
index 9c975c089ea6..000000000000
--- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the PPC target useful for the compiler back-end and the MC libraries.
-// As such, it deliberately does not include references to LLVM core
-// code gen types, passes, etc..
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCBASEINFO_H
-#define PPCBASEINFO_H
-
-#include "PPCMCTargetDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-/// getPPCRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) {
-  using namespace PPC;
-  switch (RegEnum) {
-  case 0: return 0;
-  case R0 :  case X0 :  case F0 :  case V0 : case CR0:  case CR0LT: return  0;
-  case R1 :  case X1 :  case F1 :  case V1 : case CR1:  case CR0GT: return  1;
-  case R2 :  case X2 :  case F2 :  case V2 : case CR2:  case CR0EQ: return  2;
-  case R3 :  case X3 :  case F3 :  case V3 : case CR3:  case CR0UN: return  3;
-  case R4 :  case X4 :  case F4 :  case V4 : case CR4:  case CR1LT: return  4;
-  case R5 :  case X5 :  case F5 :  case V5 : case CR5:  case CR1GT: return  5;
-  case R6 :  case X6 :  case F6 :  case V6 : case CR6:  case CR1EQ: return  6;
-  case R7 :  case X7 :  case F7 :  case V7 : case CR7:  case CR1UN: return  7;
-  case R8 :  case X8 :  case F8 :  case V8 : case CR2LT: return  8;
-  case R9 :  case X9 :  case F9 :  case V9 : case CR2GT: return  9;
-  case R10:  case X10:  case F10:  case V10: case CR2EQ: return 10;
-  case R11:  case X11:  case F11:  case V11: case CR2UN: return 11;
-  case R12:  case X12:  case F12:  case V12: case CR3LT: return 12;
-  case R13:  case X13:  case F13:  case V13: case CR3GT: return 13;
-  case R14:  case X14:  case F14:  case V14: case CR3EQ: return 14;
-  case R15:  case X15:  case F15:  case V15: case CR3UN: return 15;
-  case R16:  case X16:  case F16:  case V16: case CR4LT: return 16;
-  case R17:  case X17:  case F17:  case V17: case CR4GT: return 17;
-  case R18:  case X18:  case F18:  case V18: case CR4EQ: return 18;
-  case R19:  case X19:  case F19:  case V19: case CR4UN: return 19;
-  case R20:  case X20:  case F20:  case V20: case CR5LT: return 20;
-  case R21:  case X21:  case F21:  case V21: case CR5GT: return 21;
-  case R22:  case X22:  case F22:  case V22: case CR5EQ: return 22;
-  case R23:  case X23:  case F23:  case V23: case CR5UN: return 23;
-  case R24:  case X24:  case F24:  case V24: case CR6LT: return 24;
-  case R25:  case X25:  case F25:  case V25: case CR6GT: return 25;
-  case R26:  case X26:  case F26:  case V26: case CR6EQ: return 26;
-  case R27:  case X27:  case F27:  case V27: case CR6UN: return 27;
-  case R28:  case X28:  case F28:  case V28: case CR7LT: return 28;
-  case R29:  case X29:  case F29:  case V29: case CR7GT: return 29;
-  case R30:  case X30:  case F30:  case V30: case CR7EQ: return 30;
-  case R31:  case X31:  case F31:  case V31: case CR7UN: return 31;
-  default:
-    llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
-  }
-}
-
-} // end namespace llvm;
-
-#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 61868d446fe4..84e4175e635b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -133,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC_ADDR16_LO;
 	break;
+      case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+        Type = ELF::R_PPC64_TOC16;
+        break;
       case MCSymbolRefExpr::VK_PPC_TOC16_LO:
         Type = ELF::R_PPC64_TOC16_LO;
         break;
@@ -144,35 +147,12 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
         break;
       }
       break;
-    case PPC::fixup_ppc_lo14:
-      Type = ELF::R_PPC_ADDR14;
-      break;
-    case PPC::fixup_ppc_toc:
-      Type = ELF::R_PPC64_TOC;
-      break;
-    case PPC::fixup_ppc_toc16:
+    case PPC::fixup_ppc_lo16_ds:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
-      case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
-        Type = ELF::R_PPC64_TPREL16_LO;
-        break;
-      case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
-        Type = ELF::R_PPC64_DTPREL16_LO;
-        break;
       case MCSymbolRefExpr::VK_None:
-        Type = ELF::R_PPC64_TOC16;
-	break;
-      case MCSymbolRefExpr::VK_PPC_TOC16_LO:
-        Type = ELF::R_PPC64_TOC16_LO;
-        break;
-      case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
-        Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+        Type = ELF::R_PPC64_ADDR16_DS;
         break;
-      }
-      break;
-    case PPC::fixup_ppc_toc16_ds:
-      switch (Modifier) {
-      default: llvm_unreachable("Unsupported Modifier");
       case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
         Type = ELF::R_PPC64_TOC16_DS;
 	break;
@@ -253,8 +233,7 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
   switch ((unsigned)Fixup.getKind()) {
     case PPC::fixup_ppc_ha16:
     case PPC::fixup_ppc_lo16:
-    case PPC::fixup_ppc_toc16:
-    case PPC::fixup_ppc_toc16_ds:
+    case PPC::fixup_ppc_lo16_ds:
       RelocOffset += 2;
       break;
     default:
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 709daa4e4bfd..86c44f57a5e2 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -33,19 +33,9 @@ enum Fixups {
   /// like 'lis'.
   fixup_ppc_ha16,
   
-  /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
-  /// like 'std'.
-  fixup_ppc_lo14,
-
-  /// fixup_ppc_toc - Insert value of TOC base (.TOC.).
-  fixup_ppc_toc,
-
-  /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base.
-  fixup_ppc_toc16,
-
-  /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
-  /// implied 2 zero bits
-  fixup_ppc_toc16_ds,
+  /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with
+  /// implied 2 zero bits for instrs like 'std'.
+  fixup_ppc_lo16_ds,
 
   /// fixup_ppc_tlsreg - Insert thread-pointer register number.
   fixup_ppc_tlsreg,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index d048426d43a4..2223cd623cb5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -13,10 +13,10 @@
 
 #define DEBUG_TYPE "mccodeemitter"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "MCTargetDesc/PPCBaseInfo.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -33,24 +33,17 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
   void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
 
   const MCSubtargetInfo &STI;
+  const MCContext &CTX;
   Triple TT;
 
 public:
   PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
                    MCContext &ctx)
-    : STI(sti), TT(STI.getTargetTriple()) {
+    : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) {
   }
   
   ~PPCMCCodeEmitter() {}
 
-  bool is64BitMode() const {
-    return (STI.getFeatureBits() & PPC::Feature64Bit) != 0;
-  }
-
-  bool isSVR4ABI() const {
-    return TT.isMacOSX() == 0;
-  }
-
   unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -81,12 +74,11 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
                          SmallVectorImpl<MCFixup> &Fixups) const {
     uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
 
-    // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the
-    // following 'nop'.
+    // BL8_NOP etc. all have a size of 8 because of the following 'nop'.
     unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
     unsigned Opcode = MI.getOpcode();
-    if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF ||
-        Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD)
+    if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
+        Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD)
       Size = 8;
     
     // Output the constant in big endian byte order.
@@ -121,11 +113,11 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
                                    (MCFixupKind)PPC::fixup_ppc_br24));
 
   // For special TLS calls, add another fixup for the symbol.  Apparently
-  // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently
+  // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently
   // similar that TblGen will not generate a separate case for the latter
   // two, so this is the only way to get the extra fixup generated.
   unsigned Opcode = MI.getOpcode();
-  if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) {
+  if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) {
     const MCOperand &MO2 = MI.getOperand(OpNo+1);
     Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
                                      (MCFixupKind)PPC::fixup_ppc_nofixup));
@@ -178,12 +170,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
     return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  if (isSVR4ABI() && is64BitMode())
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                     (MCFixupKind)PPC::fixup_ppc_toc16));
-  else
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                     (MCFixupKind)PPC::fixup_ppc_lo16));
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16));
   return RegBits;
 }
 
@@ -199,13 +187,9 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
   if (MO.isImm())
     return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
   
-  // Add a fixup for the branch target.
-  if (isSVR4ABI() && is64BitMode())
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                     (MCFixupKind)PPC::fixup_ppc_toc16_ds));
-  else
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                     (MCFixupKind)PPC::fixup_ppc_lo14));
+  // Add a fixup for the displacement field.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16_ds));
   return RegBits;
 }
 
@@ -220,7 +204,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
   // Return the thread-pointer register's encoding.
   Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_tlsreg));
-  return getPPCRegisterNumbering(PPC::X13);
+  return CTX.getRegisterInfo().getEncodingValue(PPC::X13);
 }
 
 unsigned PPCMCCodeEmitter::
@@ -231,7 +215,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
           MI.getOpcode() == PPC::MFOCRF ||
           MI.getOpcode() == PPC::MTCRF8) &&
          (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
-  return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+  return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg());
 }
 
 
@@ -243,7 +227,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
     // The GPR operand should come through here though.
     assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
-    return getPPCRegisterNumbering(MO.getReg());
+    return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
   }
   
   assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 12bb0a143406..d84eb9c6aa03 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -18,7 +18,6 @@ using namespace llvm;
 
 PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
   switch (Opcode) {
-  default: llvm_unreachable("Unknown PPC branch opcode!");
   case PPC::PRED_EQ: return PPC::PRED_NE;
   case PPC::PRED_NE: return PPC::PRED_EQ;
   case PPC::PRED_LT: return PPC::PRED_GE;
@@ -28,4 +27,5 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
   case PPC::PRED_NU: return PPC::PRED_UN;
   case PPC::PRED_UN: return PPC::PRED_NU;
   }
+  llvm_unreachable("Unknown PPC branch opcode!");
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index b0680fbb8c65..ad2b01812816 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -25,7 +25,6 @@ namespace llvm {
 namespace PPC {
   /// Predicate - These are "(BI << 5) | BO"  for various predicates.
   enum Predicate {
-    PRED_ALWAYS = (0 << 5) | 20,
     PRED_LT     = (0 << 5) | 12,
     PRED_LE     = (1 << 5) |  4,
     PRED_EQ     = (2 << 5) | 12,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index f71979f245c3..b4be51a8caec 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_TARGET_POWERPC_H
 #define LLVM_TARGET_POWERPC_H
 
-#include "MCTargetDesc/PPCBaseInfo.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include <string>
 
@@ -32,6 +31,7 @@ namespace llvm {
   class MCInst;
 
   FunctionPass *createPPCCTRLoops();
+  FunctionPass *createPPCEarlyReturnPass();
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@@ -41,7 +41,7 @@ namespace llvm {
 
   /// \brief Creates an PPC-specific Target Transformation Info pass.
   ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
-  
+
   namespace PPCII {
     
   /// Target Operand Flag enum.
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 992913602a10..649ffc1abeaa 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -57,10 +57,30 @@ def FeatureMFOCRF    : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
                                         "Enable the MFOCRF instruction">;
 def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
                                         "Enable the fsqrt instruction">;
+def FeatureFRE       : SubtargetFeature<"fre", "HasFRE", "true",
+                                        "Enable the fre instruction">;
+def FeatureFRES      : SubtargetFeature<"fres", "HasFRES", "true",
+                                        "Enable the fres instruction">;
+def FeatureFRSQRTE   : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
+                                        "Enable the frsqrte instruction">;
+def FeatureFRSQRTES  : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
+                                        "Enable the frsqrtes instruction">;
+def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
+                              "Assume higher precision reciprocal estimates">;
 def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
                                         "Enable the stfiwx instruction">;
+def FeatureLFIWAX    : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+                                        "Enable the lfiwax instruction">;
+def FeatureFPRND     : SubtargetFeature<"fprnd", "HasFPRND", "true",
+                                        "Enable the fri[mnpz] instructions">;
+def FeatureFPCVT     : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
+  "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
 def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                         "Enable the isel instruction">;
+def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+                                        "Enable the popcnt[dw] instructions">;
+def FeatureLDBRX     : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+                                        "Enable the ldbrx instruction">;
 def FeatureBookE     : SubtargetFeature<"booke", "IsBookE", "true",
                                         "Enable Book E instructions">;
 def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
@@ -71,18 +91,46 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 //
 // CMPB         p6, p6x, p7        cmpb
 // DFP          p6, p6x, p7        decimal floating-point instructions
-// FLT_CVT      p7                 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
-// FPRND        p5x, p6, p6x, p7   frim, frin, frip, friz
-// FRE          p5 through p7      fre (vs. fres, available since p3)
-// FRSQRTES     p5 through p7      frsqrtes (vs. frsqrte, available since p3)
-// LDBRX        p7                 load with byte reversal
-// LFIWAX       p6, p6x, p7        lfiwax
-// LFIWZX       p7                 lfiwzx
 // POPCNTB      p5 through p7      popcntb and related instructions
-// POPCNTD      p7                 popcntd and related instructions
-// RECIP_PREC   p6, p6x, p7        higher precision reciprocal estimates
 // VSX          p7                 vector-scalar instruction set
 
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// RecFormRel - Filter class used to relate non-record-form instructions with
+// their record-form variants.
+class RecFormRel;
+
+//===----------------------------------------------------------------------===//
+// Relation Map Definitions.
+//===----------------------------------------------------------------------===//
+
+def getRecordFormOpcode : InstrMapping {
+  let FilterClass = "RecFormRel";
+  // Instructions with the same BaseName and Interpretation64Bit values
+  // form a row.
+  let RowFields = ["BaseName", "Interpretation64Bit"];
+  // Instructions with the same RC value form a column.
+  let ColFields = ["RC"];
+  // The key column are the non-record-form instructions.
+  let KeyCol = ["0"];
+  // Value columns RC=1
+  let ValueCols = [["1"]];
+}
+
+def getNonRecordFormOpcode : InstrMapping {
+  let FilterClass = "RecFormRel";
+  // Instructions with the same BaseName and Interpretation64Bit values
+  // form a row.
+  let RowFields = ["BaseName", "Interpretation64Bit"];
+  // Instructions with the same RC value form a column.
+  let ColFields = ["RC"];
+  // The key column are the record-form instructions.
+  let KeyCol = ["1"];
+  // Value columns are RC=0
+  let ValueCols = [["0"]];
+}
+
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -97,30 +145,46 @@ include "PPCInstrInfo.td"
 
 def : Processor<"generic", G3Itineraries, [Directive32]>;
 def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+                                           FeatureFRES, FeatureFRSQRTE,
                                            FeatureBookE]>;
 def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+                                           FeatureFRES, FeatureFRSQRTE,
                                            FeatureBookE]>;
 def : Processor<"601", G3Itineraries, [Directive601]>;
 def : Processor<"602", G3Itineraries, [Directive602]>;
-def : Processor<"603", G3Itineraries, [Directive603]>;
-def : Processor<"603e", G3Itineraries, [Directive603]>;
-def : Processor<"603ev", G3Itineraries, [Directive603]>;
-def : Processor<"604", G3Itineraries, [Directive604]>;
-def : Processor<"604e", G3Itineraries, [Directive604]>;
-def : Processor<"620", G3Itineraries, [Directive620]>;
-def : Processor<"750", G4Itineraries, [Directive750]>;
-def : Processor<"g3", G3Itineraries, [Directive750]>;
-def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"970", G5Itineraries,
+def : Processor<"603", G3Itineraries, [Directive603,
+                                       FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603e", G3Itineraries, [Directive603,
+                                        FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603ev", G3Itineraries, [Directive603,
+                                         FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604", G3Itineraries, [Directive604,
+                                       FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604e", G3Itineraries, [Directive604,
+                                        FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"620", G3Itineraries, [Directive620,
+                                       FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"750", G4Itineraries, [Directive750,
+                                       FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g3", G3Itineraries, [Directive750,
+                                      FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
+                                        FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
+                                      FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
+                                            FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
+                                           FeatureFRES, FeatureFRSQRTE]>;
+def : ProcessorModel<"970", G5Model,
                   [Directive970, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+                   FeatureMFOCRF, FeatureFSqrt,
+                   FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"g5", G5Itineraries,
+def : ProcessorModel<"g5", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+                   FeatureFRES, FeatureFRSQRTE,
                    Feature64Bit /*, Feature64BitRegs */]>;
 def : ProcessorModel<"e500mc", PPCE500mcModel,
                   [DirectiveE500mc, FeatureMFOCRF,
@@ -128,46 +192,67 @@ def : ProcessorModel<"e500mc", PPCE500mcModel,
 def : ProcessorModel<"e5500", PPCE5500Model,
                   [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
                    FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
-                                         FeatureMFOCRF, FeatureFSqrt,
-                                         FeatureSTFIWX, FeatureISEL,
-                                         Feature64Bit
-                                     /*, Feature64BitRegs */]>;
-def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
-                                          FeatureMFOCRF, FeatureFSqrt,
-                                          FeatureSTFIWX, FeatureISEL,
-                                          Feature64Bit /*, Feature64BitRegs */,
-                                          FeatureQPX]>;
-def : Processor<"pwr3", G5Itineraries,
-                  [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
+def : ProcessorModel<"a2", PPCA2Model,
+                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+                   FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+               /*, Feature64BitRegs */]>;
+def : ProcessorModel<"a2q", PPCA2Model,
+                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+                   FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+               /*, Feature64BitRegs */, FeatureQPX]>;
+def : ProcessorModel<"pwr3", G5Model,
+                  [DirectivePwr3, FeatureAltivec,
+                   FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
                    FeatureSTFIWX, Feature64Bit]>;
-def : Processor<"pwr4", G5Itineraries,
+def : ProcessorModel<"pwr4", G5Model,
                   [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
-def : Processor<"pwr5", G5Itineraries,
+                   FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
+                   FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5", G5Model,
                   [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
-def : Processor<"pwr5x", G5Itineraries,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5x", G5Model,
                   [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
-def : Processor<"pwr6", G5Itineraries,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr6", G5Model,
                   [DirectivePwr6, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"pwr6x", G5Itineraries,
+                   FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"pwr6x", G5Model,
                   [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
-def : Processor<"pwr7", G5Itineraries,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+                   FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr7", G5Model,
                   [DirectivePwr7, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
-def : Processor<"ppc64", G5Itineraries,
+def : ProcessorModel<"ppc64", G5Model,
                   [Directive64, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+                   FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+                   FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */]>;
 
-
 //===----------------------------------------------------------------------===//
 // Calling Conventions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index eae9b7b7fb2c..3c7cc4ed0eb3 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -370,7 +370,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *PICBase = MF->getPICBaseSymbol();
     
     // Emit the 'bl'.
-    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL_Darwin) // Darwin vs SVR4 doesn't matter here.
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
       // FIXME: We would like an efficient form for this, so we don't have to do
       // a lot of extra uniquing.
       .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
@@ -458,11 +458,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // Transform %Xd = LDtocL <ga:@sym>, %Xs
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
 
-    // Change the opcode to LDrs, which is a form of LD with the offset
-    // specified by a SymbolLo.  If the global address is external, has
+    // Change the opcode to LD.  If the global address is external, has
     // common linkage, or is a jump table address, then reference the
     // associated TOC entry.  Otherwise reference the symbol directly.
-    TmpInst.setOpcode(PPC::LDrs);
+    TmpInst.setOpcode(PPC::LD);
     const MachineOperand &MO = MI->getOperand(1);
     assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
            "Invalid operand for LDtocL!");
@@ -496,10 +495,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // Transform %Xd = ADDItocL %Xs, <ga:@sym>
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
 
-    // Change the opcode to ADDI8L.  If the global address is external, then
+    // Change the opcode to ADDI8.  If the global address is external, then
     // generate a TOC entry and reference that.  Otherwise reference the
     // symbol directly.
-    TmpInst.setOpcode(PPC::ADDI8L);
+    TmpInst.setOpcode(PPC::ADDI8);
     const MachineOperand &MO = MI->getOperand(2);
     assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
     MCSymbol *MOSymbol = 0;
@@ -548,9 +547,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
 
-    // Change the opcode to LDrs, which is a form of LD with the offset
-    // specified by a SymbolLo.
-    TmpInst.setOpcode(PPC::LDrs);
+    // Change the opcode to LD.
+    TmpInst.setOpcode(PPC::LD);
     const MachineOperand &MO = MI->getOperand(1);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = Mang->getSymbol(GValue);
@@ -579,7 +577,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case PPC::ADDItlsgdL: {
     // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
-    // Into:      %Xd = ADDI8L %Xs, sym@got@tlsgd@l
+    // Into:      %Xd = ADDI8 %Xs, sym@got@tlsgd@l
     assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
@@ -587,7 +585,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *SymGotTlsGD =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
                               OutContext);
-    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
                                 .addReg(MI->getOperand(0).getReg())
                                 .addReg(MI->getOperand(1).getReg())
                                 .addExpr(SymGotTlsGD));
@@ -595,7 +593,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case PPC::GETtlsADDR: {
     // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
-    // Into:      BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd)
+    // Into:      BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd)
     assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
 
     StringRef Name = "__tls_get_addr";
@@ -608,7 +606,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *SymVar =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
                               OutContext);
-    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD)
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD)
                                 .addExpr(TlsRef)
                                 .addExpr(SymVar));
     return;
@@ -631,7 +629,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case PPC::ADDItlsldL: {
     // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
-    // Into:      %Xd = ADDI8L %Xs, sym@got@tlsld@l
+    // Into:      %Xd = ADDI8 %Xs, sym@got@tlsld@l
     assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
@@ -639,7 +637,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *SymGotTlsLD =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
                               OutContext);
-    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
                                 .addReg(MI->getOperand(0).getReg())
                                 .addReg(MI->getOperand(1).getReg())
                                 .addExpr(SymGotTlsLD));
@@ -647,7 +645,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case PPC::GETtlsldADDR: {
     // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
-    // Into:      BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld)
+    // Into:      BL8_NOP_TLSLD __tls_get_addr(sym@tlsld)
     assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
 
     StringRef Name = "__tls_get_addr";
@@ -660,7 +658,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *SymVar =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
                               OutContext);
-    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD)
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD)
                                 .addExpr(TlsRef)
                                 .addExpr(SymVar));
     return;
@@ -683,7 +681,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case PPC::ADDIdtprelL: {
     // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
-    // Into:      %Xd = ADDI8L %Xs, sym@dtprel@l
+    // Into:      %Xd = ADDI8 %Xs, sym@dtprel@l
     assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
@@ -691,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *SymDtprel =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
                               OutContext);
-    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
                                 .addReg(MI->getOperand(0).getReg())
                                 .addReg(MI->getOperand(1).getReg())
                                 .addExpr(SymDtprel));
@@ -723,7 +721,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
     return AsmPrinter::EmitFunctionEntryLabel();
     
   // Emit an official procedure descriptor.
-  const MCSection *Current = OutStreamer.getCurrentSection();
+  MCSectionSubPair Current = OutStreamer.getCurrentSection();
   const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
       ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
       SectionKind::getReadOnly());
@@ -743,7 +741,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
                         8/*size*/);
   // Emit a null environment pointer.
   OutStreamer.EmitIntValue(0, 8 /* size */);
-  OutStreamer.SwitchSection(Current);
+  OutStreamer.SwitchSection(Current.first, Current.second);
 
   MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
                           ".L." + Twine(CurrentFnSym->getName()));
@@ -911,18 +909,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
       OutStreamer.EmitLabel(Stub);
       OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
 
+      const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+
       // mflr r0
       OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
-      // FIXME: MCize this.
-      OutStreamer.EmitRawText("\tbcl 20, 31, " + Twine(AnonSymbol->getName()));
+      // bcl 20, 31, AnonSymbol
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
       OutStreamer.EmitLabel(AnonSymbol);
       // mflr r11
       OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
       // addis r11, r11, ha16(LazyPtr - AnonSymbol)
       const MCExpr *Sub =
         MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
-                                MCSymbolRefExpr::Create(AnonSymbol, OutContext),
-                                OutContext);
+                                Anon, OutContext);
       OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
         .addReg(PPC::R11)
         .addReg(PPC::R11)
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index b98cc489f6d7..81a54d7015b0 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
 
 /// isCompareEquals - Returns true if the instruction is a compare equals
 /// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
-  if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+                               bool &Int64Cmp) {
+  if (MI->getOpcode() == PPC::CMPWI) {
     SignedCmp = true;
+    Int64Cmp = false;
     return true;
-  } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+  } else if (MI->getOpcode() == PPC::CMPDI) {
+    SignedCmp = true;
+    Int64Cmp = true;
+    return true;
+  } else if (MI->getOpcode() == PPC::CMPLWI) {
+    SignedCmp = false;
+    Int64Cmp = false;
+    return true;
+  } else if (MI->getOpcode() == PPC::CMPLDI) {
     SignedCmp = false;
+    Int64Cmp = true;
     return true;
   }
 
@@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
          RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
          RI != RE; ++RI) {
       IV_Opnd = &RI.getOperand();
-      bool SignedCmp;
+      bool SignedCmp, Int64Cmp;
       MachineInstr *MI = IV_Opnd->getParent();
-      if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+      if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
           MI->getOperand(0).getReg() == PredReg) {
 
         OldInsts.push_back(MI);
@@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
         assert(InitialValue->isReg() && "Expecting register for init value");
         unsigned InitialValueReg = InitialValue->getReg();
   
-        const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+        MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
   
         // Here we need to look for an immediate load (an li or lis/ori pair).
         if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
                          DefInstr->getOpcode() == PPC::ORI)) {
-          int64_t start = (short) DefInstr->getOperand(2).getImm();
-          const MachineInstr *DefInstr2 =
-            MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+          int64_t start = DefInstr->getOperand(2).getImm();
+          MachineInstr *DefInstr2 =
+            MRI->getVRegDef(DefInstr->getOperand(1).getReg());
           if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
                             DefInstr2->getOpcode() == PPC::LIS)) {
             DEBUG(dbgs() << "  initial constant: " << *DefInstr);
@@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
             if ((count % iv_value) != 0) {
               return 0;
             }
-            return new CountValue(count/iv_value);
+
+            OldInsts.push_back(DefInstr);
+            OldInsts.push_back(DefInstr2);
+
+            // count/iv_value, the trip count, should be positive here. If it
+            // is negative, that indicates that the counter will wrap.
+            if (Int64Cmp)
+              return new CountValue(count/iv_value);
+            else
+              return new CountValue(uint32_t(count/iv_value));
           }
         } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
                                 DefInstr->getOpcode() == PPC::LI)) {
           DEBUG(dbgs() << "  initial constant: " << *DefInstr);
 
-          int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+          int64_t count = ImmVal -
+            int64_t(short(DefInstr->getOperand(1).getImm()));
           if ((count % iv_value) != 0) {
             return 0;
           }
-          return new CountValue(count/iv_value);
+
+          OldInsts.push_back(DefInstr);
+
+          if (Int64Cmp)
+            return new CountValue(count/iv_value);
+          else
+            return new CountValue(uint32_t(count/iv_value));
         } else if (iv_value == 1 || iv_value == -1) {
           // We can't determine a constant starting value.
           if (ImmVal == 0) {
@@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
           }
           // FIXME: handle non-zero end value.
         }
-        // FIXME: handle non-unit increments (we might not want to introduce division
-        // but we can handle some 2^n cases with shifts).
+        // FIXME: handle non-unit increments (we might not want to introduce
+        // division but we can handle some 2^n cases with shifts).
   
       }
     }
@@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI,
     if (MO.isReg() && MO.isDef()) {
       unsigned Reg = MO.getReg();
       if (!MRI->use_nodbg_empty(Reg)) {
-        // This instruction has users, but if the only user is the phi node for the
-        // parent block, and the only use of that phi node is this instruction, then
-        // this instruction is dead: both it (and the phi node) can be removed.
+        // This instruction has users, but if the only user is the phi node for
+        // the parent block, and the only use of that phi node is this
+        // instruction, then this instruction is dead: both it (and the phi
+        // node) can be removed.
         MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
         if (llvm::next(I) == MRI->use_end() &&
             I.getOperand().getParent()->isPHI()) {
@@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
     DEBUG(dbgs() << "failed to get trip count!\n");
     return false;
   }
+
+  if (TripCount->isImm()) {
+    DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+    // FIXME: We currently can't form 64-bit constants
+    // (including 32-bit unsigned constants)
+    if (!isInt<32>(TripCount->getImm()))
+      return false;
+  }
+
   // Does the loop contain any invalid instructions?
   if (containsInvalidInstruction(L)) {
     return false;
@@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
     const TargetRegisterClass *SrcRC =
       MF->getRegInfo().getRegClass(TripCount->getReg());
     CountReg = MF->getRegInfo().createVirtualRegister(RC);
-    unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+    unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
                         (unsigned) PPC::EXTSW_32_64 :
                         (unsigned) TargetOpcode::COPY;
     BuildMI(*Preheader, InsertPos, dl,
@@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
     // Put the trip count in a register for transfer into the count register.
 
     int64_t CountImm = TripCount->getImm();
-    assert(!TripCount->isNeg() && "Constant trip count must be positive");
+    if (TripCount->isNeg())
+      CountImm = -CountImm;
 
     CountReg = MF->getRegInfo().createVirtualRegister(RC);
-    if (CountImm > 0xFFFF) {
+    if (abs64(CountImm) > 0x7FFF) {
       BuildMI(*Preheader, InsertPos, dl,
               TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
-              CountReg).addImm(CountImm >> 16);
+              CountReg).addImm((CountImm >> 16) & 0xFFFF);
       unsigned CountReg1 = CountReg;
       CountReg = MF->getRegInfo().createVirtualRegister(RC);
       BuildMI(*Preheader, InsertPos, dl,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index caeb1796f7a4..c8a29a3d2cfe 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -136,3 +136,9 @@ def CSR_SVR464   : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV
                                         F27, F28, F29, F30, F31, CR2, CR3, CR4,
                                         V20, V21, V22, V23, V24, V25, V26, V27,
                                         V28, V29, V30, V31)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>;
+def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>;
+
+def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index d68bfd12e4c2..64787185138b 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -142,7 +142,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
   assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
             MI.getOpcode() == PPC::MFOCRF) &&
          (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
-  return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+  return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
 }
 
 MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, 
@@ -260,7 +260,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
     assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
              MI.getOpcode() != PPC::MFOCRF) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
-    return getPPCRegisterNumbering(MO.getReg());
+    return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
   }
   
   assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 353560d7f97d..9ec10f62f595 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -103,6 +103,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
 // transform this into the appropriate ORI instruction.
 static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   MachineFunction *MF = MI->getParent()->getParent();
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
   DebugLoc dl = MI->getDebugLoc();
 
   unsigned UsedRegMask = 0;
@@ -115,7 +116,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   for (MachineRegisterInfo::livein_iterator
        I = MF->getRegInfo().livein_begin(),
        E = MF->getRegInfo().livein_end(); I != E; ++I) {
-    unsigned RegNo = getPPCRegisterNumbering(I->first);
+    unsigned RegNo = TRI->getEncodingValue(I->first);
     if (VRRegNo[RegNo] == I->first)        // If this really is a vector reg.
       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
   }
@@ -131,7 +132,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
       const MachineOperand &MO = Ret.getOperand(I);
       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
         continue;
-      unsigned RegNo = getPPCRegisterNumbering(MO.getReg());
+      unsigned RegNo = TRI->getEncodingValue(MO.getReg());
       UsedRegMask &= ~(1 << (31-RegNo));
     }
   }
@@ -188,6 +189,11 @@ static bool spillsCR(const MachineFunction &MF) {
   return FuncInfo->isCRSpilled();
 }
 
+static bool spillsVRSAVE(const MachineFunction &MF) {
+  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  return FuncInfo->isVRSAVESpilled();
+}
+
 static bool hasSpills(const MachineFunction &MF) {
   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   return FuncInfo->hasSpills();
@@ -217,9 +223,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 
   // If we are a leaf function, and use up to 224 bytes of stack space,
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
-  // to adjust the stack pointer (we fit in the Red Zone).  For 64-bit
-  // SVR4, we also require a stack frame if we need to spill the CR,
-  // since this spill area is addressed relative to the stack pointer.
+  // to adjust the stack pointer (we fit in the Red Zone).
   // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
   // stackless code if all local vars are reg-allocated.
   bool DisableRedZone = MF.getFunction()->getAttributes().
@@ -231,9 +235,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
       !MFI->adjustsStack() &&                      // No calls.
-      !(Subtarget.isPPC64() &&                     // No 64-bit SVR4 CRsave.
-	Subtarget.isSVR4ABI()
-	&& spillsCR(MF)) &&
       (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
     // No need for frame
     if (UpdateMF)
@@ -299,6 +300,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
 }
 
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+  bool is31 = needsFP(MF);
+  unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
+  unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+       BI != BE; ++BI)
+    for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+      --MBBI;
+      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+        MachineOperand &MO = MBBI->getOperand(I);
+        if (!MO.isReg())
+          continue;
+
+        switch (MO.getReg()) {
+        case PPC::FP:
+          MO.setReg(FPReg);
+          break;
+        case PPC::FP8:
+          MO.setReg(FP8Reg);
+          break;
+        }
+      }
+    }
+}
 
 void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
@@ -332,6 +358,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   unsigned FrameSize = determineFrameLayout(MF);
   int NegFrameSize = -FrameSize;
 
+  if (MFI->isFrameAddressTaken())
+    replaceFPWithRealFP(MF);
+
   // Get processor type.
   bool isPPC64 = Subtarget.isPPC64();
   // Get operating system
@@ -339,6 +368,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Check if the link register (LR) must be saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF);
 
@@ -360,6 +390,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
 
+    if (!MustSaveCRs.empty()) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12);
+      for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+        MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
+    }
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
         .addReg(PPC::X31)
@@ -371,6 +408,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::X0)
         .addImm(LROffset / 4)
         .addReg(PPC::X1);
+
+    if (!MustSaveCRs.empty())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+        .addReg(PPC::X12, getKillRegState(true))
+        .addImm(8)
+        .addReg(PPC::X1);
   } else {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
@@ -383,6 +426,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addImm(FPOffset)
         .addReg(PPC::R1);
 
+    assert(MustSaveCRs.empty() &&
+           "Prologue CR saving supported only in 64-bit mode");
+
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
         .addReg(PPC::R0)
@@ -546,7 +592,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       // spilled CRs.
       if (Subtarget.isSVR4ABI()
 	  && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
-	  && !spillsCR(MF))
+	  && MustSaveCRs.empty())
 	continue;
 
       // For 64-bit SVR4 when we have spilled CRs, the spill location
@@ -602,6 +648,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   // Check if the link register (LR) has been saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF);
 
@@ -702,10 +749,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
         .addImm(LROffset/4).addReg(PPC::X1);
 
+    if (!MustSaveCRs.empty())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
+        .addImm(8).addReg(PPC::X1);
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
         .addImm(FPOffset/4).addReg(PPC::X1);
 
+    if (!MustSaveCRs.empty())
+      for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::MTCRF8), MustSaveCRs[i])
+          .addReg(PPC::X12, getKillRegState(i == e-1));
+
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
   } else {
@@ -713,6 +769,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
           .addImm(LROffset).addReg(PPC::R1);
 
+    assert(MustSaveCRs.empty() &&
+           "Epilogue CR restoring supported only in 64-bit mode");
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
           .addImm(FPOffset).addReg(PPC::R1);
@@ -917,6 +976,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
   }
 
   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
 
   int64_t LowerBound = 0;
 
@@ -936,7 +996,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
       FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
     }
 
-    LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8;
+    LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
   }
 
   // Check whether the frame pointer register is allocated. If so, make sure it
@@ -970,8 +1030,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     }
 
     unsigned MinReg =
-      std::min<unsigned>(getPPCRegisterNumbering(MinGPR),
-                         getPPCRegisterNumbering(MinG8R));
+      std::min<unsigned>(TRI->getEncodingValue(MinGPR),
+                         TRI->getEncodingValue(MinG8R));
 
     if (Subtarget.isPPC64()) {
       LowerBound -= (31 - MinReg + 1) * 8;
@@ -1053,14 +1113,21 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
   // needed alignment padding.
   unsigned StackSize = determineFrameLayout(MF, false, true);
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (MFI->hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
-      (hasSpills(MF) && !isInt<16>(StackSize))) {
+  if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+      hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
     const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
     const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
-    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                        RC->getAlignment(),
                                                        false));
+
+    // These kinds of spills might need two registers.
+    if (spillsCR(MF) || spillsVRSAVE(MF))
+      RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                         RC->getAlignment(),
+                                                         false));
+
   }
 }
 
@@ -1080,44 +1147,41 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
   DebugLoc DL;
   bool CRSpilled = false;
+  MachineInstrBuilder CRMIB;
   
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
     // CR2 through CR4 are the nonvolatile CR fields.
     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
 
-    if (CRSpilled && IsCRField)
-      continue;
-
     // Add the callee-saved register as live-in; it's killed at the spill.
     MBB.addLiveIn(Reg);
 
+    if (CRSpilled && IsCRField) {
+      CRMIB.addReg(Reg, RegState::ImplicitKill);
+      continue;
+    }
+
     // Insert the spill to the stack frame.
     if (IsCRField) {
-      CRSpilled = true;
-      // The first time we see a CR field, store the whole CR into the
-      // save slot via GPR12 (available in the prolog for 32- and 64-bit).
+      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
       if (Subtarget.isPPC64()) {
-	// 64-bit:  SP+8
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12));
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW))
-			       .addReg(PPC::X12,
-				       getKillRegState(true))
-			       .addImm(8)
-			       .addReg(PPC::X1));
+        // The actual spill will happen at the start of the prologue.
+        FuncInfo->addMustSaveCR(Reg);
       } else {
+        CRSpilled = true;
+
 	// 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
 	// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12));
+	CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
+                  .addReg(Reg, RegState::ImplicitKill);
+
+	MBB.insert(MI, CRMIB);
 	MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
 					 .addReg(PPC::R12,
 						 getKillRegState(true)),
 					 CSI[i].getFrameIdx()));
       }
-      
-      // Record that we spill the CR in this function.
-      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
-      FuncInfo->setSpillsCR();
     } else {
       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
       TII.storeRegToStackSlot(MBB, MI, Reg, true,
@@ -1128,7 +1192,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 }
 
 static void
-restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
+restoreCRs(bool isPPC64, bool is31,
+           bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
 	   MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
 	   const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
 
@@ -1138,14 +1203,10 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
   DebugLoc DL;
   unsigned RestoreOp, MoveReg;
 
-  if (isPPC64) {
-    // 64-bit:  SP+8
-    MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12)
-	       .addImm(8)
-	       .addReg(PPC::X1));
-    RestoreOp = PPC::MTCRF8;
-    MoveReg = PPC::X12;
-  } else {
+  if (isPPC64)
+    // This is handled during epilogue generation.
+    return;
+  else {
     // 32-bit:  FP-relative
     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
 					     PPC::R12),
@@ -1156,15 +1217,15 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
   
   if (CR2Spilled)
     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
-	       .addReg(MoveReg));
+               .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
 
   if (CR3Spilled)
     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
-	       .addReg(MoveReg));
+               .addReg(MoveReg, getKillRegState(!CR4Spilled)));
 
   if (CR4Spilled)
     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
-	       .addReg(MoveReg));
+               .addReg(MoveReg, getKillRegState(true)));
 }
 
 void PPCFrameLowering::
@@ -1255,7 +1316,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
       // least one CR register, restore all spilled CRs together.
       if ((CR2Spilled || CR3Spilled || CR4Spilled)
 	  && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
-	restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+        bool is31 = needsFP(*MF);
+        restoreCRs(Subtarget.isPPC64(), is31,
+                   CR2Spilled, CR3Spilled, CR4Spilled,
 		   MBB, I, CSI, CSIIndex);
 	CR2Spilled = CR3Spilled = CR4Spilled = false;
       }
@@ -1278,9 +1341,11 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   }
 
   // If we haven't yet spilled the CRs, do so now.
-  if (CR2Spilled || CR3Spilled || CR4Spilled)
-    restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+  if (CR2Spilled || CR3Spilled || CR4Spilled) {
+    bool is31 = needsFP(*MF); 
+    restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
 	       MBB, I, CSI, CSIIndex);
+  }
 
   return true;
 }
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 53ee32601f2d..6f5f9368c6c6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -43,6 +43,7 @@ class PPCFrameLowering: public TargetFrameLowering {
 
   bool hasFP(const MachineFunction &MF) const;
   bool needsFP(const MachineFunction &MF) const;
+  void replaceFPWithRealFP(MachineFunction &MF) const;
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 6ed1fb9e6a3c..4bf1e3396429 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -179,7 +179,7 @@ getHazardType(SUnit *SU, int Stalls) {
   }
 
   // Do not allow MTCTR and BCTRL to be in the same dispatch group.
-  if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+  if (HasCTRSet && Opcode == PPC::BCTRL)
     return NoopHazard;
 
   // If this is a load following a store, make sure it's not to the same or
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 17bea8a6a609..aed0fbb6c84e 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -120,10 +120,10 @@ namespace {
     }
 
     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
-    /// immediate field.  Because preinc imms have already been validated, just
-    /// accept it.
+    /// immediate field.  Note that the operand at this point is already the
+    /// result of a prior SelectAddressRegImm call.
     bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
-      if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+      if (N.getOpcode() == ISD::TargetConstant ||
           N.getOpcode() == ISD::TargetGlobalAddress) {
         Out = N;
         return true;
@@ -132,18 +132,6 @@ namespace {
       return false;
     }
 
-    /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc
-    /// index field.  Because preinc imms have already been validated, just
-    /// accept it.
-    bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const {
-      if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
-          N.getOpcode() == ISD::TargetGlobalAddress)
-        return false;
-
-      Out = N;
-      return true;
-    }
-
     /// SelectAddrIdx - Given the specified addressed, check to see if it can be
     /// represented as an indexed [r+r] operation.  Returns false if it can
     /// be represented by [r+imm], which are preferred.
@@ -164,6 +152,12 @@ namespace {
       return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
     }
 
+    // Select an address into a single register.
+    bool SelectAddr(SDValue N, SDValue &Base) {
+      Base = N;
+      return true;
+    }
+
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     /// inline asm expressions.  It is always correct to compute the value into
     /// a register.  The case of adding a (possibly relocatable) constant to a
@@ -463,7 +457,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
       SH &= 31;
       SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
                           getI32Imm(ME) };
-      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
     }
   }
   return 0;
@@ -786,7 +780,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       }
       case ISD::SETGT: {
         SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
-        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
                      0);
         return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
                                     getI32Imm(1));
@@ -879,7 +873,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
 
   // Get the specified bit.
   SDValue Tmp =
-    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
   if (Inv) {
     assert(OtherCondIdx == -1 && "Can't have split plus negation");
     return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
@@ -891,7 +885,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   // Get the other bit of the comparison.
   Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
   SDValue OtherCond =
-    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
 
   return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
 }
@@ -1050,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       break;
 
     SDValue Offset = LD->getOffset();
-    if (isa<ConstantSDNode>(Offset) ||
+    if (Offset.getOpcode() == ISD::TargetConstant ||
         Offset.getOpcode() == ISD::TargetGlobalAddress) {
 
       unsigned Opcode;
@@ -1085,7 +1079,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { Offset, Base, Chain };
       return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
                                     PPCLowering.getPointerTy(),
-                                    MVT::Other, Ops, 3);
+                                    MVT::Other, Ops);
     } else {
       unsigned Opcode;
       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -1117,10 +1111,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
 
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
-      SDValue Ops[] = { Offset, Base, Chain };
+      SDValue Ops[] = { Base, Offset, Chain };
       return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
                                     PPCLowering.getPointerTy(),
-                                    MVT::Other, Ops, 3);
+                                    MVT::Other, Ops);
     }
   }
 
@@ -1169,7 +1163,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         SDValue Ops[] = { N->getOperand(0).getOperand(0),
                             N->getOperand(0).getOperand(1),
                             getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
-        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
       }
     }
 
@@ -1483,8 +1477,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
     default: continue;
 
     case PPC::ADDI8:
-    case PPC::ADDI8L:
-    case PPC::ADDIL:
+    case PPC::ADDI:
       // In some cases (such as TLS) the relocation information
       // is already in place on the operand, so copying the operand
       // is sufficient.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index a7e9d56fa9d6..333976b06c53 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -70,6 +70,8 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
   const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+  PPCRegInfo = TM.getRegisterInfo();
+  PPCII = TM.getInstrInfo();
 
   setPow2DivIsCheap();
 
@@ -115,6 +117,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 
   // PowerPC has no SREM/UREM instructions
   setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -149,26 +152,58 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 
   // If we're enabling GP optimizations, use hardware square root
-  if (!Subtarget->hasFSQRT()) {
+  if (!Subtarget->hasFSQRT() &&
+      !(TM.Options.UnsafeFPMath &&
+        Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+  if (!Subtarget->hasFSQRT() &&
+      !(TM.Options.UnsafeFPMath &&
+        Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-  }
 
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
+  if (Subtarget->hasFPRND()) {
+    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+
+    // frin does not implement "ties to even." Thus, this is safe only in
+    // fast-math mode.
+    if (TM.Options.UnsafeFPMath) {
+      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+      setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+
+      // These need to set FE_INEXACT, and use a custom inserter.
+      setOperationAction(ISD::FRINT, MVT::f64, Legal);
+      setOperationAction(ISD::FRINT, MVT::f32, Legal);
+    }
+  }
+
   // PowerPC does not have BSWAP, CTPOP or CTTZ
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
-  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
-  setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 
+  if (Subtarget->hasPOPCNTD()) {
+    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
+    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
+  } else {
+    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
+    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
+  }
+
   // PowerPC does not have ROTR
   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
@@ -211,6 +246,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
   setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
 
+  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+  // support continuation, user-level threading, and etc.. As a result, no
+  // other SjLj exception interfaces are implemented and please don't build
+  // your own exception handling based on them.
+  // LLVM/Clang supports zero-cost DWARF exception handling.
+  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 
   // We want to legalize GlobalAddress and ConstantPool nodes into the
   // appropriate instructions to materialize the address.
@@ -290,15 +333,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     // We cannot do this with Promote because i64 is not a legal type.
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 
-    // FIXME: disable this lowered code.  This generates 64-bit register values,
-    // and we don't model the fact that the top part is clobbered by calls.  We
-    // need to flag these together so that the value isn't live across a call.
-    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
   } else {
     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
   }
 
+  // With the instructions enabled under FPCVT, we can do everything.
+  if (PPCSubTarget.hasFPCVT()) {
+    if (Subtarget->has64BitSupport()) {
+      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+    }
+
+    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+  }
+
   if (Subtarget->use64BitRegs()) {
     // 64-bit PowerPC implementations can support i64 types directly
     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
@@ -420,6 +476,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 
     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+    if (TM.Options.UnsafeFPMath) {
+      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+    }
+
     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -470,6 +532,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setTargetDAGCombine(ISD::BR_CC);
   setTargetDAGCombine(ISD::BSWAP);
 
+  // Use reciprocal estimates.
+  if (TM.Options.UnsafeFPMath) {
+    setTargetDAGCombine(ISD::FDIV);
+    setTargetDAGCombine(ISD::FSQRT);
+  }
+
   // Darwin long double math library functions have $LDBL128 appended.
   if (Subtarget->isDarwin()) {
     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -511,7 +579,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     MaxStoresPerMemmoveOptSize = 8;
 
     setPrefFunctionAlignment(4);
-    BenefitFromCodePlacementOpt = true;
   }
 }
 
@@ -542,6 +609,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::FCFID:           return "PPCISD::FCFID";
   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
+  case PPCISD::FRE:             return "PPCISD::FRE";
+  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
@@ -557,16 +626,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::SRL:             return "PPCISD::SRL";
   case PPCISD::SRA:             return "PPCISD::SRA";
   case PPCISD::SHL:             return "PPCISD::SHL";
-  case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
-  case PPCISD::STD_32:          return "PPCISD::STD_32";
-  case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
-  case PPCISD::CALL_NOP_SVR4:   return "PPCISD::CALL_NOP_SVR4";
-  case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
-  case PPCISD::NOP:             return "PPCISD::NOP";
+  case PPCISD::CALL:            return "PPCISD::CALL";
+  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
-  case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
-  case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
+  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
+  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
+  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
   case PPCISD::MFCR:            return "PPCISD::MFCR";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
@@ -576,10 +642,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::STCX:            return "PPCISD::STCX";
   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
   case PPCISD::MFFS:            return "PPCISD::MFFS";
-  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
-  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
-  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
@@ -1031,7 +1094,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     short Imm;
     if (isIntS16Immediate(CN, Imm)) {
       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
-      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                              CN->getValueType(0));
       return true;
     }
@@ -1080,7 +1143,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
   }
 
   // Otherwise, do it the hard way, using R0 as the base register.
-  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                          N.getValueType());
   Index = N;
   return true;
@@ -1143,7 +1206,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
       short Imm;
       if (isIntS16Immediate(CN, Imm)) {
         Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
-        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                                CN->getValueType(0));
         return true;
       }
@@ -1181,15 +1244,19 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
                                                   SelectionDAG &DAG) const {
   if (DisablePPCPreinc) return false;
 
+  bool isLoad = true;
   SDValue Ptr;
   EVT VT;
+  unsigned Alignment;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     Ptr = LD->getBasePtr();
     VT = LD->getMemoryVT();
-
+    Alignment = LD->getAlignment();
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     Ptr = ST->getBasePtr();
     VT  = ST->getMemoryVT();
+    Alignment = ST->getAlignment();
+    isLoad = false;
   } else
     return false;
 
@@ -1197,7 +1264,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   if (VT.isVector())
     return false;
 
-  if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+    // Common code will reject creating a pre-inc form if the base pointer
+    // is a frame index, or if N is a store and the base pointer is either
+    // the same as or a predecessor of the value being stored.  Check for
+    // those situations here, and try with swapped Base/Offset instead.
+    bool Swap = false;
+
+    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+      Swap = true;
+    else if (!isLoad) {
+      SDValue Val = cast<StoreSDNode>(N)->getValue();
+      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+        Swap = true;
+    }
+
+    if (Swap)
+      std::swap(Base, Offset);
+
     AM = ISD::PRE_INC;
     return true;
   }
@@ -1208,6 +1293,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
       return false;
   } else {
+    // LDU/STU need an address with at least 4-byte alignment.
+    if (Alignment < 4)
+      return false;
+
     // reg + imm * 4.
     if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
       return false;
@@ -3099,7 +3188,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   NodeTys.push_back(MVT::Other);   // Returns a chain
   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
 
-  unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+  unsigned CallOpc = PPCISD::CALL;
 
   bool needIndirectCall = true;
   if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
@@ -3232,8 +3321,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     NodeTys.push_back(MVT::Other);
     NodeTys.push_back(MVT::Glue);
     Ops.push_back(Chain);
-    CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+    CallOpc = PPCISD::BCTRL;
     Callee.setNode(0);
+    // Add use of X11 (holding environment pointer)
+    if (isSVR4ABI && isPPC64)
+      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
     // Add CTR register as callee so a bctr can be emitted later.
     if (isTailCall)
       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
@@ -3372,7 +3464,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
 
   bool needsTOCRestore = false;
   if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
-    if (CallOpc == PPCISD::BCTRL_SVR4) {
+    if (CallOpc == PPCISD::BCTRL) {
       // This is a call through a function pointer.
       // Restore the caller TOC from the save area into R2.
       // See PrepareCall() for more information about calls through function
@@ -3383,9 +3475,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
       // from allocating it), resulting in an additional register being
       // allocated and an unnecessary move instruction being generated.
       needsTOCRestore = true;
-    } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+    } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
       // Otherwise insert NOP for non-local calls.
-      CallOpc = PPCISD::CALL_NOP_SVR4;
+      CallOpc = PPCISD::CALL_NOP;
     }
   }
 
@@ -4558,6 +4650,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
 }
 
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+                     DAG.getVTList(MVT::i32, MVT::Other),
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
 /// possible.
 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4566,10 +4673,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
       !Op.getOperand(2).getValueType().isFloatingPoint())
     return Op;
 
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  // We might be able to do better than this under some circumstances, but in
+  // general, fsel-based lowering of select is a finite-math-only optimization.
+  // For more information, see section F.3 of the 2.06 ISA specification.
+  if (!DAG.getTarget().Options.NoInfsFPMath ||
+      !DAG.getTarget().Options.NoNaNsFPMath)
+    return Op;
 
-  // Cannot handle SETEQ/SETNE.
-  if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
   EVT ResVT = Op.getValueType();
   EVT CmpVT = Op.getOperand(0).getValueType();
@@ -4579,9 +4690,20 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
+  SDValue Sel1;
   if (isFloatingPointZero(RHS))
     switch (CC) {
     default: break;       // SETUO etc aren't handled by fsel.
+    case ISD::SETNE:
+      std::swap(TV, FV);
+    case ISD::SETEQ:
+      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
     case ISD::SETULT:
     case ISD::SETLT:
       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
@@ -4604,30 +4726,41 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Cmp;
   switch (CC) {
   default: break;       // SETUO etc aren't handled by fsel.
+  case ISD::SETNE:
+    std::swap(TV, FV);
+  case ISD::SETEQ:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
   case ISD::SETULT:
   case ISD::SETLT:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOGE:
   case ISD::SETGE:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   case ISD::SETUGT:
   case ISD::SETGT:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOLE:
   case ISD::SETLE:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   }
   return Op;
 }
@@ -4645,37 +4778,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
-                                                         PPCISD::FCTIDZ,
+                        (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+                                                   PPCISD::FCTIDZ),
                       dl, MVT::f64, Src);
     break;
   case MVT::i64:
-    Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+    assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+           "i64 FP_TO_UINT is supported only with FPCVT");
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+                                                        PPCISD::FCTIDUZ,
+                      dl, MVT::f64, Src);
     break;
   }
 
   // Convert the FP value to an int value through memory.
-  SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+  bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
+    (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
 
   // Emit a store to the stack slot.
-  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
-                               MachinePointerInfo(), false, false, 0);
+  SDValue Chain;
+  if (i32Stack) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+    SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+    Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+              DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+              MVT::i32, MMO);
+  } else
+    Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+                         MPI, false, false, 0);
 
   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
   // add in a bias.
-  if (Op.getValueType() == MVT::i32)
+  if (Op.getValueType() == MVT::i32 && !i32Stack) {
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, FIPtr.getValueType()));
-  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+    MPI = MachinePointerInfo();
+  }
+
+  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
                      false, false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
     return SDValue();
 
+  assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+         "UINT_TO_FP is supported only with FPCVT");
+
+  // If we have FCFIDS, then use it when converting to single-precision.
+  // Otherwise, convert to double-precision and then round.
+  unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+                   (Op.getOpcode() == ISD::UINT_TO_FP ?
+                    PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+                   (Op.getOpcode() == ISD::UINT_TO_FP ?
+                    PPCISD::FCFIDU : PPCISD::FCFID);
+  MVT      FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+                   MVT::f32 : MVT::f64;
+
   if (Op.getOperand(0).getValueType() == MVT::i64) {
     SDValue SINT = Op.getOperand(0);
     // When converting to single-precision, we actually need to convert
@@ -4689,6 +4857,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
     // However, if -enable-unsafe-fp-math is in effect, accept double
     // rounding to avoid the extra overhead.
     if (Op.getValueType() == MVT::f32 &&
+        !PPCSubTarget.hasFPCVT() &&
         !DAG.getTarget().Options.UnsafeFPMath) {
 
       // Twiddle input to make sure the low 11 bits are zero.  (If this
@@ -4722,44 +4891,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
 
       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
     }
+
     SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
-    SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
-    if (Op.getValueType() == MVT::f32)
+    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+    if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
       FP = DAG.getNode(ISD::FP_ROUND, dl,
                        MVT::f32, FP, DAG.getIntPtrConstant(0));
     return FP;
   }
 
   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
-         "Unhandled SINT_TO_FP type in custom expander!");
+         "Unhandled INT_TO_FP type in custom expander!");
   // Since we only generate this in 64-bit mode, we can take advantage of
   // 64-bit registers.  In particular, sign extend the input value into the
   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
   // then lfd it and fcfid it.
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
-  SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+  SDValue Ld;
+  if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+    int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+                                 MachinePointerInfo::getFixedStack(FrameIdx),
+                                 false, false, 0);
+
+    assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+           "Expected an i32 store");
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOLoad, 4, 4);
+    SDValue Ops[] = { Store, FIdx };
+    Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+                                   PPCISD::LFIWZX : PPCISD::LFIWAX,
+                                 dl, DAG.getVTList(MVT::f64, MVT::Other),
+                                 Ops, 2, MVT::i32, MMO);
+  } else {
+    assert(PPCSubTarget.isPPC64() &&
+           "i32->FP without LFIWAX supported only on PPC64");
+
+    int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
                                 Op.getOperand(0));
 
-  // STD the extended value into the stack slot.
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore, 8, 8);
-  SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
-  SDValue Store =
-    DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
-                            Ops, 4, MVT::i64, MMO);
-  // Load the value as a double.
-  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
-                           false, false, false, 0);
+    // STD the extended value into the stack slot.
+    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+                                 MachinePointerInfo::getFixedStack(FrameIdx),
+                                 false, false, 0);
+
+    // Load the value as a double.
+    Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+                     MachinePointerInfo::getFixedStack(FrameIdx),
+                     false, false, false, 0);
+  }
 
   // FCFID it and return it.
-  SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
-  if (Op.getValueType() == MVT::f32)
+  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+  if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
   return FP;
 }
@@ -5554,11 +5748,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
 
+  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
                                                        Op.getDebugLoc());
-  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::UINT_TO_FP:
+  case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
 
   // Lower 64-bit shifts.
@@ -5612,50 +5810,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                              MVT::f64, N->getOperand(0),
                              DAG.getIntPtrConstant(1));
 
-    // This sequence changes FPSCR to do round-to-zero, adds the two halves
-    // of the long double, and puts FPSCR back the way it was.  We do not
-    // actually model FPSCR.
-    std::vector<EVT> NodeTys;
-    SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
-
-    NodeTys.push_back(MVT::f64);   // Return register
-    NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
-    Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
-    MFFSreg = Result.getValue(0);
-    InFlag = Result.getValue(1);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = DAG.getConstant(31, MVT::i32);
-    Ops[1] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
-    InFlag = Result.getValue(0);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = DAG.getConstant(30, MVT::i32);
-    Ops[1] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
-    InFlag = Result.getValue(0);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::f64);    // result of add
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = Lo;
-    Ops[1] = Hi;
-    Ops[2] = InFlag;
-    Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
-    FPreg = Result.getValue(0);
-    InFlag = Result.getValue(1);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::f64);
-    Ops[0] = DAG.getConstant(1, MVT::i32);
-    Ops[1] = MFFSreg;
-    Ops[2] = FPreg;
-    Ops[3] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
-    FPreg = Result.getValue(0);
+    // Add the two halves of the long double in round-to-zero mode.
+    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
 
     // We know the low half is about to be thrown away, so just use something
     // convenient.
@@ -5747,7 +5903,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   // registers without caring whether they're 32 or 64, but here we're
   // doing actual arithmetic on the addresses.
   bool is64bit = PPCSubTarget.isPPC64();
-  unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
@@ -5851,7 +6007,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
     .addReg(TmpReg).addReg(MaskReg);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
     .addReg(Tmp3Reg).addReg(Tmp2Reg);
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+  BuildMI(BB, dl, TII->get(PPC::STWCX))
     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -5866,9 +6022,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   return BB;
 }
 
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+                                    MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = MBB;
+  ++I;
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  unsigned DstReg = MI->getOperand(0).getReg();
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+  assert(RC->hasType(MVT::i32) && "Invalid destination!");
+  unsigned mainDstReg = MRI.createVirtualRegister(RC);
+  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+  MVT PVT = getPointerTy();
+  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+         "Invalid Pointer Size!");
+  // For v = setjmp(buf), we generate
+  //
+  // thisMBB:
+  //  SjLjSetup mainMBB
+  //  bl mainMBB
+  //  v_restore = 1
+  //  b sinkMBB
+  //
+  // mainMBB:
+  //  buf[LabelOffset] = LR
+  //  v_main = 0
+  //
+  // sinkMBB:
+  //  v = phi(main, restore)
+  //
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, mainMBB);
+  MF->insert(I, sinkMBB);
+
+  MachineInstrBuilder MIB;
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), MBB,
+                  llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Note that the structure of the jmp_buf used here is not compatible
+  // with that used by libc, and is not designed to be. Specifically, it
+  // stores only those 'reserved' registers that LLVM does not otherwise
+  // understand how to spill. Also, by convention, by the time this
+  // intrinsic is called, Clang has already stored the frame address in the
+  // first slot of the buffer and stack address in the third. Following the
+  // X86 target code, we'll store the jump address in the second slot. We also
+  // need to save the TOC pointer (R2) to handle jumps between shared
+  // libraries, and that will be stored in the fourth slot. The thread
+  // identifier (R13) is not affected.
+
+  // thisMBB:
+  const int64_t LabelOffset = 1 * PVT.getStoreSize();
+  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
+
+  // Prepare IP either in reg.
+  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+  unsigned BufReg = MI->getOperand(1).getReg();
+
+  if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+            .addReg(PPC::X2)
+            .addImm(TOCOffset / 4)
+            .addReg(BufReg);
+
+    MIB.setMemRefs(MMOBegin, MMOEnd);
+  }
+
+  // Setup
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+  MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+          .addMBB(mainMBB);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+  thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+  thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+  // mainMBB:
+  //  mainDstReg = 0
+  MIB = BuildMI(mainMBB, DL,
+    TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+  // Store IP
+  if (PPCSubTarget.isPPC64()) {
+    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+            .addReg(LabelReg)
+            .addImm(LabelOffset / 4)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+            .addReg(LabelReg)
+            .addImm(LabelOffset)
+            .addReg(BufReg);
+  }
+
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+  mainMBB->addSuccessor(sinkMBB);
+
+  // sinkMBB:
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(PPC::PHI), DstReg)
+    .addReg(mainDstReg).addMBB(mainMBB)
+    .addReg(restoreDstReg).addMBB(thisMBB);
+
+  MI->eraseFromParent();
+  return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+                                     MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  MVT PVT = getPointerTy();
+  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+         "Invalid Pointer Size!");
+
+  const TargetRegisterClass *RC =
+    (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+  unsigned Tmp = MRI.createVirtualRegister(RC);
+  // Since FP is only updated here but NOT referenced, it's treated as GPR.
+  unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+  unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+  MachineInstrBuilder MIB;
+
+  const int64_t LabelOffset = 1 * PVT.getStoreSize();
+  const int64_t SPOffset    = 2 * PVT.getStoreSize();
+  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
+
+  unsigned BufReg = MI->getOperand(0).getReg();
+
+  // Reload FP (the jumped-to function may not have had a
+  // frame pointer, and if so, then its r31 will be restored
+  // as necessary).
+  if (PVT == MVT::i64) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+            .addImm(0)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+            .addImm(0)
+            .addReg(BufReg);
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  // Reload IP
+  if (PVT == MVT::i64) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+            .addImm(LabelOffset / 4)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+            .addImm(LabelOffset)
+            .addReg(BufReg);
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  // Reload SP
+  if (PVT == MVT::i64) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+            .addImm(SPOffset / 4)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+            .addImm(SPOffset)
+            .addReg(BufReg);
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  // FIXME: When we also support base pointers, that register must also be
+  // restored here.
+
+  // Reload TOC
+  if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+            .addImm(TOCOffset / 4)
+            .addReg(BufReg);
+
+    MIB.setMemRefs(MMOBegin, MMOEnd);
+  }
+
+  // Jump
+  BuildMI(*MBB, MI, DL,
+          TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+  MI->eraseFromParent();
+  return MBB;
+}
+
 MachineBasicBlock *
 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
+  if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+      MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+    return emitEHSjLjSetJmp(MI, BB);
+  } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+             MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+    return emitEHSjLjLongJmp(MI, BB);
+  }
+
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 
   // To "insert" these instructions we actually have to insert their
@@ -5881,29 +6266,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
   if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
                                  MI->getOpcode() == PPC::SELECT_CC_I8)) {
-    unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
-                                         PPC::ISEL8 : PPC::ISEL;
-    unsigned SelectPred = MI->getOperand(4).getImm();
-    DebugLoc dl = MI->getDebugLoc();
-
-    // The SelectPred is ((BI << 5) | BO) for a BCC
-    unsigned BO = SelectPred & 0xF;
-    assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
-
-    unsigned TrueOpNo, FalseOpNo;
-    if (BO == 12) {
-      TrueOpNo = 2;
-      FalseOpNo = 3;
-    } else {
-      TrueOpNo = 3;
-      FalseOpNo = 2;
-      SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
-    }
+    SmallVector<MachineOperand, 2> Cond;
+    Cond.push_back(MI->getOperand(4));
+    Cond.push_back(MI->getOperand(1));
 
-    BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(TrueOpNo).getReg())
-      .addReg(MI->getOperand(FalseOpNo).getReg())
-      .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+    DebugLoc dl = MI->getDebugLoc();
+    PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
+                        MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
   } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
              MI->getOpcode() == PPC::SELECT_CC_I8 ||
              MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6136,7 +6505,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
     unsigned Ptr1Reg;
     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
-    unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+    unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
     //  thisMBB:
     //   ...
     //   fallthrough --> loopMBB
@@ -6239,6 +6608,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BB = exitMBB;
     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
       .addReg(ShiftReg);
+  } else if (MI->getOpcode() == PPC::FADDrtz) {
+    // This pseudo performs an FADD with rounding mode temporarily forced
+    // to round-to-zero.  We emit this via custom inserter since the FPSCR
+    // is not modeled at the SelectionDAG level.
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src1 = MI->getOperand(1).getReg();
+    unsigned Src2 = MI->getOperand(2).getReg();
+    DebugLoc dl   = MI->getDebugLoc();
+
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+    // Save FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+    // Set rounding mode to round-to-zero.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+    // Perform addition.
+    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+    // Restore FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+  } else if (MI->getOpcode() == PPC::FRINDrint ||
+             MI->getOpcode() == PPC::FRINSrint) {
+    bool isf32 = MI->getOpcode() == PPC::FRINSrint;
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src = MI->getOperand(1).getReg();
+    DebugLoc dl   = MI->getDebugLoc();
+
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+    // Perform the rounding.
+    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
+      .addReg(Src);
+
+    // Compare the results.
+    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
+      .addReg(Dest).addReg(Src);
+
+    // If the results were not equal, then set the FPSCR XX bit.
+    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, midMBB);
+    F->insert(It, exitMBB);
+    exitMBB->splice(exitMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+
+    BB->addSuccessor(midMBB);
+    BB->addSuccessor(exitMBB);
+
+    BB = midMBB;
+
+    // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
+    // the FI bit here because that will not automatically set XX also,
+    // and XX is what libm interprets as the FE_INEXACT flag.
+    BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
+    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+
+    BB->addSuccessor(exitMBB);
+
+    BB = exitMBB;
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
@@ -6251,6 +6689,139 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
+SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
+                                               DAGCombinerInfo &DCI) const {
+  if (DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+
+  if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
+      (VT == MVT::f64 && PPCSubTarget.hasFRE())  ||
+      (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+    // For the reciprocal, we need to find the zero of the function:
+    //   F(X) = A X - 1 [which has a zero at X = 1/A]
+    //     =>
+    //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+    //     does not require additional intermediate precision]
+
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. The minimum architected relative
+    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+    // 23 digits and double has 52 digits.
+    int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++Iterations;
+
+    SelectionDAG &DAG = DCI.DAG;
+    DebugLoc dl = Op.getDebugLoc();
+
+    SDValue FPOne =
+      DAG.getConstantFP(1.0, VT.getScalarType());
+    if (VT.isVector()) {
+      assert(VT.getVectorNumElements() == 4 &&
+             "Unknown vector type");
+      FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+                          FPOne, FPOne, FPOne, FPOne);
+    }
+
+    SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
+    DCI.AddToWorklist(Est.getNode());
+
+    // Newton iterations: Est = Est + Est (1 - Arg * Est)
+    for (int i = 0; i < Iterations; ++i) {
+      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
+      DCI.AddToWorklist(Est.getNode());
+    }
+
+    return Est;
+  }
+
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
+                                             DAGCombinerInfo &DCI) const {
+  if (DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+
+  if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
+      (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE())  ||
+      (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+    // For the reciprocal sqrt, we need to find the zero of the function:
+    //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+    //     =>
+    //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+    // As a result, we precompute A/2 prior to the iteration loop.
+
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. The minimum architected relative
+    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+    // 23 digits and double has 52 digits.
+    int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++Iterations;
+
+    SelectionDAG &DAG = DCI.DAG;
+    DebugLoc dl = Op.getDebugLoc();
+
+    SDValue FPThreeHalves =
+      DAG.getConstantFP(1.5, VT.getScalarType());
+    if (VT.isVector()) {
+      assert(VT.getVectorNumElements() == 4 &&
+             "Unknown vector type");
+      FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+                                  FPThreeHalves, FPThreeHalves,
+                                  FPThreeHalves, FPThreeHalves);
+    }
+
+    SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
+    DCI.AddToWorklist(Est.getNode());
+
+    // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
+    // this entire sequence requires only one FP constant.
+    SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
+    DCI.AddToWorklist(HalfArg.getNode());
+
+    HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
+    DCI.AddToWorklist(HalfArg.getNode());
+
+    // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+    for (int i = 0; i < Iterations; ++i) {
+      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+      DCI.AddToWorklist(Est.getNode());
+    }
+
+    return Est;
+  }
+
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   const TargetMachine &TM = getTargetMachine();
@@ -6277,7 +6848,72 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         return N->getOperand(0);
     }
     break;
+  case ISD::FDIV: {
+    assert(TM.Options.UnsafeFPMath &&
+           "Reciprocal estimates require UnsafeFPMath");
+
+    if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
+      SDValue RV =
+        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
+      if (RV.getNode() != 0) {
+        DCI.AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+                           N->getOperand(0), RV);
+      }
+    } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
+               N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+      SDValue RV =
+        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+                                 DCI);
+      if (RV.getNode() != 0) {
+        DCI.AddToWorklist(RV.getNode());
+        RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+                         N->getValueType(0), RV);
+        DCI.AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+                           N->getOperand(0), RV);
+      }
+    } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
+               N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+      SDValue RV =
+        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+                                 DCI);
+      if (RV.getNode() != 0) {
+        DCI.AddToWorklist(RV.getNode());
+        RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+                         N->getValueType(0), RV,
+                         N->getOperand(1).getOperand(1));
+        DCI.AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+                           N->getOperand(0), RV);
+      }
+    }
 
+    SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
+    if (RV.getNode() != 0) {
+      DCI.AddToWorklist(RV.getNode());
+      return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+                         N->getOperand(0), RV);
+    }
+
+    }
+    break;
+  case ISD::FSQRT: {
+    assert(TM.Options.UnsafeFPMath &&
+           "Reciprocal estimates require UnsafeFPMath");
+
+    // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
+    // reciprocal sqrt.
+    SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
+    if (RV.getNode() != 0) {
+      DCI.AddToWorklist(RV.getNode());
+      RV = DAGCombineFastRecip(RV, DCI);
+      if (RV.getNode() != 0)
+        return RV;
+    }
+
+    }
+    break;
   case ISD::SINT_TO_FP:
     if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
       if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
@@ -6324,8 +6960,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
       DCI.AddToWorklist(Val.getNode());
 
-      Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
-                        N->getOperand(2), N->getOperand(3));
+      SDValue Ops[] = {
+        N->getOperand(0), Val, N->getOperand(2),
+        DAG.getValueType(N->getOperand(1).getValueType())
+      };
+
+      Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+              DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+              cast<StoreSDNode>(N)->getMemoryVT(),
+              cast<StoreSDNode>(N)->getMemOperand());
       DCI.AddToWorklist(Val.getNode());
       return Val;
     }
@@ -6335,7 +6978,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         N->getOperand(1).getOpcode() == ISD::BSWAP &&
         N->getOperand(1).getNode()->hasOneUse() &&
         (N->getOperand(1).getValueType() == MVT::i32 ||
-         N->getOperand(1).getValueType() == MVT::i16)) {
+         N->getOperand(1).getValueType() == MVT::i16 ||
+         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+          N->getOperand(1).getValueType() == MVT::i64))) {
       SDValue BSwapOp = N->getOperand(1).getOperand(0);
       // Do an any-extend to 32-bits if this is a half-word input.
       if (BSwapOp.getValueType() == MVT::i16)
@@ -6356,7 +7002,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
         N->getOperand(0).hasOneUse() &&
-        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+          N->getValueType(0) == MVT::i64))) {
       SDValue Load = N->getOperand(0);
       LoadSDNode *LD = cast<LoadSDNode>(Load);
       // Create the byte-swapping load.
@@ -6367,8 +7016,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       };
       SDValue BSLoad =
         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
-                                DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
-                                LD->getMemoryVT(), LD->getMemOperand());
+                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+                                              MVT::i64 : MVT::i32, MVT::Other),
+                                Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
 
       // If this is an i16 load, insert the truncate.
       SDValue ResVal = BSLoad;
@@ -6625,6 +7275,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
     // GCC RS6000 Constraint Letters
     switch (Constraint[0]) {
     case 'b':   // R1-R31
+      if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
     case 'r':   // R0-R31
       if (VT == MVT::i64 && PPCSubTarget.isPPC64())
         return std::make_pair(0U, &PPC::G8RCRegClass);
@@ -6809,13 +7462,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
-  bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
-               MFI->hasVarSizedObjects()) &&
-                  MFI->getStackSize() &&
-                  !MF.getFunction()->getAttributes().
-                    hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked);
-  unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
-                                (is31 ? PPC::R31 : PPC::R1);
+
+  // Naked functions never have a frame pointer, and so we use r1. For all
+  // other functions, this decision must be delayed until during PEI.
+  unsigned FrameReg;
+  if (MF.getFunction()->getAttributes().hasAttribute(
+        AttributeSet::FunctionIndex, Attribute::Naked))
+    FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+  else
+    FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
                                          PtrVT);
   while (Depth--)
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 8d44d9ff46b0..423e9839807b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,6 +16,8 @@
 #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
 
 #include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCRegisterInfo.h"
 #include "PPCSubtarget.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
@@ -35,14 +37,21 @@ namespace llvm {
       /// was temporarily in the f64 operand.
       FCFID,
 
+      /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+      /// unsigned integers and single-precision outputs.
+      FCFIDU, FCFIDS, FCFIDUS,
+
       /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
       /// operand, producing an f64 value containing the integer representation
       /// of that FP value.
       FCTIDZ, FCTIWZ,
 
-      /// STFIWX - The STFIWX instruction.  The first operand is an input token
-      /// chain, then an f64 value to store, then an address to store it to.
-      STFIWX,
+      /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+      /// unsigned integers.
+      FCTIDUZ, FCTIWUZ,
+
+      /// Reciprocal estimate instructions (unary FP ops).
+      FRE, FRSQRTE,
 
       // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
       // three v4f32 operands and producing a v4f32 result.
@@ -90,17 +99,10 @@ namespace llvm {
       /// code.
       SRL, SRA, SHL,
 
-      /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
-      /// registers.
-      EXTSW_32,
-
       /// CALL - A direct function call.
-      /// CALL_NOP_SVR4 is a call with the special  NOP which follows 64-bit
+      /// CALL_NOP is a call with the special NOP which follows 64-bit
       /// SVR4 calls.
-      CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
-
-      /// NOP - Special NOP which follows 64-bit SVR4 calls.
-      NOP,
+      CALL, CALL_NOP,
 
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
@@ -108,7 +110,7 @@ namespace llvm {
 
       /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
       /// BCTRL instruction.
-      BCTRL_Darwin, BCTRL_SVR4,
+      BCTRL,
 
       /// Return with a flag operand, matched by 'blr'
       RET_FLAG,
@@ -119,6 +121,12 @@ namespace llvm {
       /// are undefined.
       MFCR,
 
+      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+      EH_SJLJ_SETJMP,
+
+      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+      EH_SJLJ_LONGJMP,
+
       /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
       /// instructions.  For lack of better number, we use the opcode number
       /// encoding for the OPC field to identify the compare.  For example, 838
@@ -138,26 +146,13 @@ namespace llvm {
       /// an optional input flag argument.
       COND_BRANCH,
 
-      // The following 5 instructions are used only as part of the
-      // long double-to-int conversion sequence.
-
-      /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
-      /// register.
-      MFFS,
-
-      /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
-      MTFSB0,
-
-      /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
-      MTFSB1,
-
-      /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
-      /// rounding towards zero.  It has flags added so it won't move past the
-      /// FPSCR-setting instructions.
+      /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+      /// towards zero.  Used only as part of the long double-to-int
+      /// conversion sequence.
       FADDRTZ,
 
-      /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
-      MTFSF,
+      /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+      MFFS,
 
       /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
       /// reserve indexed. This is used to implement atomic operations.
@@ -243,14 +238,11 @@ namespace llvm {
       /// optimizations due to constant folding.
       VADD_SPLAT,
 
-      /// STD_32 - This is the STD instruction for use with "32-bit" registers.
-      STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
-
       /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
       /// i32.
-      STBRX,
+      STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
 
       /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
       /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
@@ -258,6 +250,20 @@ namespace llvm {
       /// or i32.
       LBRX,
 
+      /// STFIWX - The STFIWX instruction.  The first operand is an input token
+      /// chain, then an f64 value to store, then an address to store it to.
+      STFIWX,
+
+      /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+      /// load which sign-extends from a 32-bit integer value into the
+      /// destination 64-bit register.
+      LFIWAX,
+
+      /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+      /// load which zero-extends from a 32-bit integer value into the
+      /// destination 64-bit register.
+      LFIWZX,
+
       /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
       /// produces an ADDIS8 instruction that adds the TOC base register to
       /// sym@toc@ha.
@@ -321,6 +327,8 @@ namespace llvm {
 
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &PPCSubTarget;
+    const PPCRegisterInfo *PPCRegInfo;
+    const PPCInstrInfo *PPCII;
 
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -395,6 +403,12 @@ namespace llvm {
                                                 MachineBasicBlock *MBB,
                                             bool is8bit, unsigned Opcode) const;
 
+    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+                                        MachineBasicBlock *MBB) const;
+
+    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+                                         MachineBasicBlock *MBB) const;
+
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
     /// Examine constraint string and operand type and determine a weight value.
@@ -498,7 +512,7 @@ namespace llvm {
                                       const PPCSubtarget &Subtarget) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
-    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
@@ -608,6 +622,12 @@ namespace llvm {
                      const SmallVectorImpl<ISD::InputArg> &Ins,
                      DebugLoc dl, SelectionDAG &DAG,
                      SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
+    SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
   };
 }
 
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 01201304f74f..7f76751be4bb 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -30,12 +30,7 @@ def symbolLo64 : Operand<i64> {
   let EncoderMethod = "getLO16Encoding";
 }
 def tocentry : Operand<iPTR> {
-  let MIOperandInfo = (ops i32imm:$imm);
-}
-def memrs : Operand<iPTR> {   // memri where the immediate is a symbolLo64
-  let PrintMethod = "printMemRegImm";
-  let EncoderMethod = "getMemRIXEncoding";
-  let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
+  let MIOperandInfo = (ops i64imm:$imm);
 }
 def tlsreg : Operand<i64> {
   let EncoderMethod = "getTLSRegEncoding";
@@ -71,135 +66,136 @@ def HI48_64 : SDNodeXForm<imm, [{
 // Calls.
 //
 
+let Interpretation64Bit = 1 in {
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
+    def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+        Requires<[In64BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+                             "b${cond:cc}ctr ${cond:reg}", BrB, []>,
+        Requires<[In64BitMode]>;
+  }
+}
+
 let Defs = [LR8] in
   def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
                     PPC970_Unit_BRU;
 
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
-  // Convenient aliases for call instructions
-  let Uses = [RM] in {
-    def BL8_Darwin  : IForm<18, 0, 1,
-                            (outs), (ins calltarget:$func),
-                            "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA8_Darwin : IForm<18, 1, 1,
-                          (outs), (ins aaddr:$func),
-                          "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+  let Defs = [CTR8], Uses = [CTR8] in {
+    def BDZ8  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+                        "bdz $dst">;
+    def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+                        "bdnz $dst">;
   }
-  let Uses = [CTR8, RM] in {
-    def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
-                                  (outs), (ins),
-                                  "bctrl", BrB,
-                                  [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+
+  let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
+    def BDZLR8  : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+                              "bdzlr", BrB, []>;
+    def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+                              "bdnzlr", BrB, []>;
   }
 }
 
-// ELF 64 ABI Calls = Darwin ABI Calls
-// Used to define BL8_ELF and BLA8_ELF
+
+
 let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL8_ELF  : IForm<18, 0, 1,
-                         (outs), (ins calltarget:$func),
-                         "bl $func", BrB, []>;  // See Pat patterns below.
+    def BL8  : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+                     "bl $func", BrB, []>;  // See Pat patterns below.
 
-    let isCodeGenOnly = 1 in
-    def BL8_NOP_ELF  : IForm_and_DForm_4_zero<18, 0, 1, 24,
+    def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+                     "bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
+  }
+  let Uses = [RM], isCodeGenOnly = 1 in {
+    def BL8_NOP  : IForm_and_DForm_4_zero<18, 0, 1, 24,
                              (outs), (ins calltarget:$func),
                              "bl $func\n\tnop", BrB, []>;
 
-    let isCodeGenOnly = 1 in
-    def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+    def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
                                   (outs), (ins calltarget:$func, tlsgd:$sym),
                                   "bl $func($sym)\n\tnop", BrB, []>;
 
-    let isCodeGenOnly = 1 in
-    def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+    def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
                                   (outs), (ins calltarget:$func, tlsgd:$sym),
                                   "bl $func($sym)\n\tnop", BrB, []>;
 
-    def BLA8_ELF : IForm<18, 1, 1,
-                         (outs), (ins aaddr:$func),
-                         "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
-
-    let isCodeGenOnly = 1 in
-    def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
+    def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
                              (outs), (ins aaddr:$func),
                              "bla $func\n\tnop", BrB,
-                             [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
+                             [(PPCcall_nop (i64 imm:$func))]>;
   }
-  let Uses = [X11, CTR8, RM] in {
-    def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
-                               (outs), (ins),
-                               "bctrl", BrB,
-                               [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+  let Uses = [CTR8, RM] in {
+    def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+                              "bctrl", BrB, [(PPCbctrl)]>,
+                 Requires<[In64BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+                              "b${cond:cc}ctrl ${cond:reg}", BrB, []>,
+        Requires<[In64BitMode]>;
   }
 }
-
+} // Interpretation64Bit
 
 // Calls
-def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
-          (BL8_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
-          (BL8_Darwin texternalsym:$dst)>;
-
-def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
-          (BL8_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
-          (BL8_NOP_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
+          (BL8 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)),
+          (BL8_NOP tglobaladdr:$dst)>;
 
-def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
-          (BL8_ELF texternalsym:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
-          (BL8_NOP_ELF texternalsym:$dst)>;
-
-def : Pat<(PPCnop),
-          (NOP)>;
+def : Pat<(PPCcall (i64 texternalsym:$dst)),
+          (BL8 texternalsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
+          (BL8_NOP texternalsym:$dst)>;
 
 // Atomic operations
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
-      [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_SUB_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
-      [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_OR_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
-      [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_XOR_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
-      [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_AND_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
-      [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_NAND_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
-      [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
-      [(set G8RC:$dst, 
-                    (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+      [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
 
     def ATOMIC_SWAP_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
-      [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+      [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
   }
 }
 
 // Instructions to support atomic operations
 def LDARX : XForm_1<31,  84, (outs G8RC:$rD), (ins memrr:$ptr),
                    "ldarx $rD, $ptr", LdStLDARX,
-                   [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+                   [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
 
 let Defs = [CR0] in
 def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
                    "stdcx. $rS, $dst", LdStSTDCX,
-                   [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+                   [(PPCstcx i64:$rS, xoaddr:$dst)]>,
                    isDOT;
 
+let Interpretation64Bit = 1 in {
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
 def TCRETURNdi8 :Pseudo< (outs),
                         (ins calltarget:$dst, i32imm:$offset),
@@ -216,17 +212,12 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
                  "#TC_RETURNr8 $dst $offset",
                  []>;
 
+let isCodeGenOnly = 1 in {
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
-    isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in {
-  let isReturn = 1 in {
-    def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
-        Requires<[In64BitMode]>;
-  }
-
-  def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
-      Requires<[In64BitMode]>;
-}
+    isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+    Requires<[In64BitMode]>;
 
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
@@ -242,6 +233,9 @@ def TAILBA8   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
                   "ba $dst", BrB,
                   []>;
 
+}
+} // Interpretation64Bit
+
 def : Pat<(PPCtc_return (i64 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
 
@@ -251,28 +245,37 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
           (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
 
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
-  let Defs = [CTR8], Uses = [CTR8] in {
-    def BDZ8  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
-                        "bdz $dst">;
-    def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
-                        "bdnz $dst">;
-  }
-}
 
-// 64-but CR instructions
+// 64-bit CR instructions
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
 def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 
+let isCodeGenOnly = 1 in
 def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
                        "#MFCR8pseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
-            
+} // neverHasSideEffects = 1
+
+let neverHasSideEffects = 1 in
 def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
                      "mfcr $rT", SprMFCR>,
                      PPC970_MicroCode, PPC970_Unit_CRU;
 
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+  def EH_SjLj_SetJmp64  : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+                            "#EH_SJLJ_SETJMP64",
+                            [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+                          Requires<[In64BitMode]>;
+  let isTerminator = 1 in
+  def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+                            "#EH_SJLJ_LONGJMP64",
+                            [(PPCeh_sjlj_longjmp addr:$buf)]>,
+                          Requires<[In64BitMode]>;
+}
+
 //===----------------------------------------------------------------------===//
 // 64-bit SPR manipulation instrs.
 
@@ -281,13 +284,13 @@ def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
                            "mfctr $rT", SprMFSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
-let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
 def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
                            "mtctr $rS", SprMTSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 
-let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+let Pattern = [(set i64:$rT, readcyclecounter)] in
 def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
                           "mfspr $rT, 268", SprMFTB>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -298,8 +301,8 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
 
 let Defs = [X1], Uses = [X1] in
 def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
-                       [(set G8RC:$result,
-                             (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+                       [(set i64:$result,
+                             (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
 
 let Defs = [LR8] in {
 def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
@@ -311,234 +314,251 @@ def MFLR8  : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
                            "mflr $rT", SprMFSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
+} // Interpretation64Bit
 
 //===----------------------------------------------------------------------===//
 // Fixed point instructions.
 //
 
 let PPC970_Unit = 1 in {  // FXU Operations.
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
 def LI8  : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
                       "li $rD, $imm", IntSimple,
-                      [(set G8RC:$rD, immSExt16:$imm)]>;
+                      [(set i64:$rD, immSExt16:$imm)]>;
 def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
                       "lis $rD, $imm", IntSimple,
-                      [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+                      [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
 }
 
 // Logical ops.
-def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "nand $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
-def AND8 : XForm_6<31,  28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "and $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
-def ANDC8: XForm_6<31,  60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "andc $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
-def OR8  : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "or $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
-def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "nor $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
-def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "orc $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
-def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "eqv $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
-def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "xor $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+defm NAND8: XForm_6r<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                     "nand", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
+defm AND8 : XForm_6r<31,  28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                     "and", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+defm ANDC8: XForm_6r<31,  60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                     "andc", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+defm OR8  : XForm_6r<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                     "or", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
+defm NOR8 : XForm_6r<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                     "nor", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+defm ORC8 : XForm_6r<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                     "orc", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+defm EQV8 : XForm_6r<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                     "eqv", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
+defm XOR8 : XForm_6r<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                     "xor", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
 
 // Logical ops with immediate.
+let Defs = [CR0] in {
 def ANDIo8  : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "andi. $dst, $src1, $src2", IntGeneral,
-                      [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+                      [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
                       isDOT;
 def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                      "andis. $dst, $src1, $src2", IntGeneral,
-                    [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+                    [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
                      isDOT;
+}
 def ORI8    : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "ori $dst, $src1, $src2", IntSimple,
-                      [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+                      [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
 def ORIS8   : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "oris $dst, $src1, $src2", IntSimple,
-                    [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+                    [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
 def XORI8   : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "xori $dst, $src1, $src2", IntSimple,
-                      [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+                      [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
 def XORIS8  : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "xoris $dst, $src1, $src2", IntSimple,
-                   [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+                   [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
 
-def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "add $rT, $rA, $rB", IntSimple,
-                     [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+defm ADD8  : XOForm_1r<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "add", "$rT, $rA, $rB", IntSimple,
+                       [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
 // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
 // initial-exec thread-local storage model.
+let isCodeGenOnly = 1 in
 def ADD8TLS  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
                         "add $rT, $rA, $rB@tls", IntSimple,
-                        [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>;
+                        [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
                      
-let Defs = [CARRY] in {
-def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "addc $rT, $rA, $rB", IntGeneral,
-                     [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
-                     PPC970_DGroup_Cracked;
+defm ADDC8 : XOForm_1rc<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                        "addc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
+                        PPC970_DGroup_Cracked;
+let Defs = [CARRY] in
 def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
-                     [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
-}
-def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
-                     "addi $rD, $rA, $imm", IntSimple,
-                     [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDI8L  : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+                     [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
+def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
-                     [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+                     [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
-                     [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+                     [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
 
 let Defs = [CARRY] in {
 def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
-                     [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
-def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                      "subfc $rT, $rA, $rB", IntGeneral,
-                      [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
-                      PPC970_DGroup_Cracked;
-}
-def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "subf $rT, $rA, $rB", IntGeneral,
-                     [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
-def NEG8    : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "neg $rT, $rA", IntSimple,
-                       [(set G8RC:$rT, (ineg G8RC:$rA))]>;
-let Uses = [CARRY], Defs = [CARRY] in {
-def ADDE8   : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                       "adde $rT, $rA, $rB", IntGeneral,
-                       [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
-def ADDME8  : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "addme $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
-def ADDZE8  : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "addze $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
-def SUBFE8  : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                       "subfe $rT, $rA, $rB", IntGeneral,
-                       [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
-def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "subfme $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
-def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "subfze $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
-}
-
-
-def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulhd $rT, $rA, $rB", IntMulHW,
-                     [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
-def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulhdu $rT, $rA, $rB", IntMulHWU,
-                     [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
-
-def CMPD   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
-                          "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
-def CMPLD  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
-                          "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
-def CMPDI  : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
-                         "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
-def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
-                         "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
-
-def SLD  : XForm_6<31,  27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "sld $rA, $rS, $rB", IntRotateD,
-                   [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
-def SRD  : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "srd $rA, $rS, $rB", IntRotateD,
-                   [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
-let Defs = [CARRY] in {
-def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "srad $rA, $rS, $rB", IntRotateD,
-                   [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+                     [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
+defm SUBFC8 : XOForm_1r<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                        "subfc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
+                        PPC970_DGroup_Cracked;
+}
+defm SUBF8 : XOForm_1r<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "subf", "$rT, $rA, $rB", IntGeneral,
+                       [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+defm NEG8    : XOForm_3r<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                        "neg", "$rT, $rA", IntSimple,
+                        [(set i64:$rT, (ineg i64:$rA))]>;
+let Uses = [CARRY] in {
+defm ADDE8   : XOForm_1rc<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                          "adde", "$rT, $rA, $rB", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
+defm ADDME8  : XOForm_3rc<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                          "addme", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, -1))]>;
+defm ADDZE8  : XOForm_3rc<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                          "addze", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, 0))]>;
+defm SUBFE8  : XOForm_1rc<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                          "subfe", "$rT, $rA, $rB", IntGeneral,
+                          [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
+defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                          "subfme", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (sube -1, i64:$rA))]>;
+defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                          "subfze", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (sube 0, i64:$rA))]>;
 }
-                   
-def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsb $rA, $rS", IntSimple,
-                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
-def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsh $rA, $rS", IntSimple,
-                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
-
-def EXTSW  : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsw $rA, $rS", IntSimple,
-                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
-/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
-def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "extsw $rA, $rS", IntSimple,
-                      [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
-def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
-                      "extsw $rA, $rS", IntSimple,
-                      [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
 
-let Defs = [CARRY] in {
-def SRADI  : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
-                      "sradi $rA, $rS, $SH", IntRotateDI,
-                      [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
-}
-def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "cntlzd $rA, $rS", IntGeneral,
-                      [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
-
-def DIVD  : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "divd $rT, $rA, $rB", IntDivD,
-                     [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "divdu $rT, $rA, $rB", IntDivD,
-                     [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulld $rT, $rA, $rB", IntMulHD,
-                     [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
 
+defm MULHD : XOForm_1r<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "mulhd", "$rT, $rA, $rB", IntMulHW,
+                       [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
+defm MULHDU : XOForm_1r<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "mulhdu", "$rT, $rA, $rB", IntMulHWU,
+                       [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+}
+} // Interpretation64Bit
+
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPD   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+                            "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+  def CMPLD  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+                            "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+  def CMPDI  : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
+                           "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+  def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                           "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+}
+
+let neverHasSideEffects = 1 in {
+defm SLD  : XForm_6r<31,  27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                     "sld", "$rA, $rS, $rB", IntRotateD,
+                     [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRD  : XForm_6r<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                     "srd", "$rA, $rS, $rB", IntRotateD,
+                     [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRAD : XForm_6rc<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                      "srad", "$rA, $rS, $rB", IntRotateD,
+                      [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+
+let Interpretation64Bit = 1 in { 
+defm EXTSB8 : XForm_11r<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
+                        "extsb", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
+defm EXTSH8 : XForm_11r<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
+                        "extsh", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
+} // Interpretation64Bit
+
+defm EXTSW  : XForm_11r<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
+                        "extsw", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
+let Interpretation64Bit = 1 in
+defm EXTSW_32_64 : XForm_11r<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
+                             "extsw", "$rA, $rS", IntSimple,
+                             [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+
+defm SRADI  : XSForm_1rc<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
+                         "sradi", "$rA, $rS, $SH", IntRotateDI,
+                         [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+defm CNTLZD : XForm_11r<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
+                        "cntlzd", "$rA, $rS", IntGeneral,
+                        [(set i64:$rA, (ctlz i64:$rS))]>;
+defm POPCNTD : XForm_11r<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
+                         "popcntd", "$rA, $rS", IntGeneral,
+                         [(set i64:$rA, (ctpop i64:$rS))]>;
+
+// popcntw also does a population count on the high 32 bits (storing the
+// results in the high 32-bits of the output). We'll ignore that here (which is
+// safe because we never separately use the high part of the 64-bit registers).
+defm POPCNTW : XForm_11r<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
+                         "popcntw", "$rA, $rS", IntGeneral,
+                         [(set i32:$rA, (ctpop i32:$rS))]>;
+
+defm DIVD  : XOForm_1r<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "divd", "$rT, $rA, $rB", IntDivD,
+                       [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVDU : XOForm_1r<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "divdu", "$rT, $rA, $rB", IntDivD,
+                       [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm MULLD : XOForm_1r<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "mulld", "$rT, $rA, $rB", IntMulHD,
+                       [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+}
 
+let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
-def RLDIMI : MDForm_1<30, 3,
-                      (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
-                      "rldimi $rA, $rS, $SH, $MB", IntRotateDI,
-                      []>, isPPC64, RegConstraint<"$rSi = $rA">,
-                      NoEncode<"$rSi">;
+defm RLDIMI : MDForm_1r<30, 3, (outs G8RC:$rA),
+                        (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+                        "rldimi", "$rA, $rS, $SH, $MB", IntRotateDI,
+                        []>, isPPC64, RegConstraint<"$rSi = $rA">,
+                        NoEncode<"$rSi">;
 }
 
 // Rotate instructions.
-def RLDCL  : MDForm_1<30, 0,
-                      (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE),
-                      "rldcl $rA, $rS, $rB, $MBE", IntRotateD,
-                      []>, isPPC64;
-def RLDICL : MDForm_1<30, 0,
-                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
-                      "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
-                      []>, isPPC64;
-def RLDICR : MDForm_1<30, 1,
-                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
-                      "rldicr $rA, $rS, $SH, $MBE", IntRotateDI,
-                      []>, isPPC64;
-
-def RLWINM8 : MForm_2<21,
-                     (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
-                     "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>;
-
+defm RLDCL  : MDForm_1r<30, 0,
+                        (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE),
+                        "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD,
+                        []>, isPPC64;
+defm RLDICL : MDForm_1r<30, 0,
+                        (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
+                        "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
+                        []>, isPPC64;
+defm RLDICR : MDForm_1r<30, 1,
+                        (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
+                        "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
+                        []>, isPPC64;
+
+let Interpretation64Bit = 1 in {
+defm RLWINM8 : MForm_2r<21, (outs G8RC:$rA),
+                        (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                        "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral,
+                        []>;
+
+let isSelect = 1 in
 def ISEL8   : AForm_4<31, 15,
-                     (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+                     (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
+}  // Interpretation64Bit
+}  // neverHasSideEffects = 1
 }  // End FXU Operations.
 
 
@@ -549,130 +569,135 @@ def ISEL8   : AForm_4<31, 15,
 
 // Sign extending loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let Interpretation64Bit = 1 in
 def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
-                  [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+                  [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
 def LWA  : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
                     "lwa $rD, $src", LdStLWA,
-                    [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+                    [(set i64:$rD,
+                          (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
                     PPC970_DGroup_Cracked;
+let Interpretation64Bit = 1 in
 def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
-                   [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+                   [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
 def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
                    "lwax $rD, $src", LdStLHA,
-                   [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+                   [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
                    PPC970_DGroup_Cracked;
 
 // Update forms.
-let mayLoad = 1 in
-def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
-                            ptr_rc:$rA),
-                    "lhau $rD, $disp($rA)", LdStLHAU,
-                    []>, RegConstraint<"$rA = $ea_result">,
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Interpretation64Bit = 1 in
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+                    (ins memri:$addr),
+                    "lhau $rD, $addr", LdStLHAU,
+                    []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 // NO LWAU!
 
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+let Interpretation64Bit = 1 in
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                     (ins memrr:$addr),
                     "lhaux $rD, $addr", LdStLHAU,
-                    []>, RegConstraint<"$addr.offreg = $ea_result">,
+                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                     (ins memrr:$addr),
                     "lwaux $rD, $addr", LdStLHAU,
-                    []>, RegConstraint<"$addr.offreg = $ea_result">,
+                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                     NoEncode<"$ea_result">, isPPC64;
 }
+}
 
+let Interpretation64Bit = 1 in {
 // Zero extending loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
                   "lbz $rD, $src", LdStLoad,
-                  [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+                  [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
 def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
                   "lhz $rD, $src", LdStLoad,
-                  [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+                  [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
 def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
                   "lwz $rD, $src", LdStLoad,
-                  [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+                  [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
 
 def LBZX8 : XForm_1<31,  87, (outs G8RC:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", LdStLoad,
-                   [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+                   [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
 def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
                    "lhzx $rD, $src", LdStLoad,
-                   [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+                   [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
 def LWZX8 : XForm_1<31,  23, (outs G8RC:$rD), (ins memrr:$src),
                    "lwzx $rD, $src", LdStLoad,
-                   [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+                   [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
                    
                    
 // Update forms.
-let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lbzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lhzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lwzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lbzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lwzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 }
 }
+} // Interpretation64Bit
 
 
 // Full 8-byte loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
                     "ld $rD, $src", LdStLD,
-                    [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
-def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src),
-                    "ld $rD, $src", LdStLD,
-                    []>, isPPC64;
+                    [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
 // The following three definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
 def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
                   "#LDtoc",
-                  [(set G8RC:$rD,
-                     (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+                  [(set i64:$rD,
+                     (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
 def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
                   "#LDtocJTI",
-                  [(set G8RC:$rD,
-                     (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
+                  [(set i64:$rD,
+                     (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
 def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
                   "#LDtocCPT",
-                  [(set G8RC:$rD,
-                     (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
+                  [(set i64:$rD,
+                     (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
 
-let hasSideEffects = 1 in { 
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
 let RST = 2, DS = 2 in
 def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
                     "ld 2, 8($reg)", LdStLD,
-                    [(PPCload_toc G8RC:$reg)]>, isPPC64;
+                    [(PPCload_toc i64:$reg)]>, isPPC64;
                     
 let RST = 2, DS = 10, RA = 1 in
 def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
@@ -681,20 +706,24 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
 }
 def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
                    "ldx $rD, $src", LdStLD,
-                   [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
-                   
-let mayLoad = 1 in
-def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+                   [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+def LDBRX : XForm_1<31,  532, (outs G8RC:$rD), (ins memrr:$src),
+                   "ldbrx $rD, $src", LdStLoad,
+                   [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
                     "ldu $rD, $addr", LdStLDU,
                     []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
                     NoEncode<"$ea_result">;
 
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "ldux $rD, $addr", LdStLDU,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">, isPPC64;
 }
+}
 
 def : Pat<(PPCload ixaddr:$src),
           (LD ixaddr:$src)>;
@@ -702,188 +731,173 @@ def : Pat<(PPCload xaddr:$src),
           (LDX xaddr:$src)>;
 
 // Support for medium and large code model.
-def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
+def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
                        "#ADDIStocHA",
-                       [(set G8RC:$rD,
-                         (PPCaddisTocHA G8RC:$reg, tglobaladdr:$disp))]>,
+                       [(set i64:$rD,
+                         (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
                        isPPC64;
-def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
                    "#LDtocL",
-                   [(set G8RC:$rD,
-                     (PPCldTocL tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
-def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
+                   [(set i64:$rD,
+                     (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
                      "#ADDItocL",
-                     [(set G8RC:$rD,
-                       (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64;
+                     [(set i64:$rD,
+                       (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
 
 // Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
                          "#ADDISgotTprelHA",
-                         [(set G8RC:$rD,
-                           (PPCaddisGotTprelHA G8RC:$reg,
+                         [(set i64:$rD,
+                           (PPCaddisGotTprelHA i64:$reg,
                                                tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg),
+def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
                         "#LDgotTprelL",
-                        [(set G8RC:$rD,
-                          (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>,
+                        [(set i64:$rD,
+                          (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
                  isPPC64;
-def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g),
-          (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
+          (ADD8TLS $in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
                          "#ADDIStlsgdHA",
-                         [(set G8RC:$rD,
-                           (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                         [(set i64:$rD,
+                           (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
                        "#ADDItlsgdL",
-                       [(set G8RC:$rD,
-                         (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                       [(set i64:$rD,
+                         (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
 def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
                         "#GETtlsADDR",
-                        [(set G8RC:$rD,
-                          (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>,
+                        [(set i64:$rD,
+                          (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
                  isPPC64;
-def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
                          "#ADDIStlsldHA",
-                         [(set G8RC:$rD,
-                           (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                         [(set i64:$rD,
+                           (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
                        "#ADDItlsldL",
-                       [(set G8RC:$rD,
-                         (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                       [(set i64:$rD,
+                         (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
 def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
                           "#GETtlsldADDR",
-                          [(set G8RC:$rD,
-                            (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>,
+                          [(set i64:$rD,
+                            (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
                    isPPC64;
-def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
                           "#ADDISdtprelHA",
-                          [(set G8RC:$rD,
-                            (PPCaddisDtprelHA G8RC:$reg,
+                          [(set i64:$rD,
+                            (PPCaddisDtprelHA i64:$reg,
                                               tglobaltlsaddr:$disp))]>,
                    isPPC64;
-def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
                          "#ADDIdtprelL",
-                         [(set G8RC:$rD,
-                           (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                         [(set i64:$rD,
+                           (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
 
 let PPC970_Unit = 2 in {
+let Interpretation64Bit = 1 in {
 // Truncating stores.                       
 def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
                    "stb $rS, $src", LdStStore,
-                   [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+                   [(truncstorei8 i64:$rS, iaddr:$src)]>;
 def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
                    "sth $rS, $src", LdStStore,
-                   [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+                   [(truncstorei16 i64:$rS, iaddr:$src)]>;
 def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
                    "stw $rS, $src", LdStStore,
-                   [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+                   [(truncstorei32 i64:$rS, iaddr:$src)]>;
 def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
                    "stbx $rS, $dst", LdStStore,
-                   [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, 
+                   [(truncstorei8 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
                    "sthx $rS, $dst", LdStStore,
-                   [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, 
+                   [(truncstorei16 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
                    "stwx $rS, $dst", LdStStore,
-                   [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+                   [(truncstorei32 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
+} // Interpretation64Bit
+
 // Normal 8-byte stores.
 def STD  : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
                     "std $rS, $dst", LdStSTD,
-                    [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+                    [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
 def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
                    "stdx $rS, $dst", LdStSTD,
-                   [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+                   [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+                   PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdbrx $rS, $dst", LdStStore,
+                   [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
                    PPC970_DGroup_Cracked;
 }
 
-let PPC970_Unit = 2 in {
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+let Interpretation64Bit = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+                   "stbu $rS, $dst", LdStStoreUpd, []>,
+                   RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+                   "sthu $rS, $dst", LdStStoreUpd, []>,
+                   RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+                   "stwu $rS, $dst", LdStStoreUpd, []>,
+                   RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+                   "stdu $rS, $dst", LdStSTDU, []>,
+                   RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+                   isPPC64;
 
-def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                          (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, 
-                                         iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                        (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, 
-                                        iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                          (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
-                                          iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                                        s16immX4:$ptroff, ptr_rc:$ptrreg),
-                    "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
-                    [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, 
-                                                     iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
-                    isPPC64;
-
-
-def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
-                              (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                       (pre_truncsti8 G8RC:$rS,
-                                      ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+                    "stbux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-
-def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
-                              (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                       (pre_truncsti16 G8RC:$rS,
-                                       ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+                    "sthux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-
-def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
-                              (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                       (pre_truncsti32 G8RC:$rS,
-                                       ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+                    "stwux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
+} // Interpretation64Bit
 
-def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
-                              (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
-                    [(set ptr_rc:$ea_res,
-                       (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+                    "stdux $rS, $dst", LdStSTDU, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked, isPPC64;
-
-// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
-def STD_32  : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
-                       "std $rT, $dst", LdStSTD,
-                       [(PPCstd_32  GPRC:$rT, ixaddr:$dst)]>, isPPC64;
-def STDX_32  : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
-                       "stdx $rT, $dst", LdStSTD,
-                       [(PPCstd_32  GPRC:$rT, xaddr:$dst)]>, isPPC64,
-                       PPC970_DGroup_Cracked;
 }
 
+// Patterns to match the pre-inc stores.  We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STBUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STHUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STWUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STDUX $rS, $ptrreg, $ptroff)>;
 
 
 //===----------------------------------------------------------------------===//
@@ -891,13 +905,30 @@ def STDX_32  : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
 //
 
 
-let PPC970_Unit = 3, Uses = [RM] in {  // FPU Operations.
-def FCFID  : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fcfid $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
-def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fctidz $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+let PPC970_Unit = 3, neverHasSideEffects = 1,
+    Uses = [RM] in {  // FPU Operations.
+defm FCFID  : XForm_26r<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fcfid", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTIDZ : XForm_26r<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fctidz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+defm FCFIDU  : XForm_26r<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fcfidu", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+defm FCFIDS  : XForm_26r<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
+                        "fcfids", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+defm FCFIDUS : XForm_26r<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
+                        "fcfidus", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+defm FCTIDUZ : XForm_26r<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fctiduz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+defm FCTIWUZ : XForm_26r<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fctiwuz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
 }
 
 
@@ -906,13 +937,13 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
 //
 
 // Extensions and truncates to/from 32-bit regs.
-def : Pat<(i64 (zext GPRC:$in)),
-          (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32),
+def : Pat<(i64 (zext i32:$in)),
+          (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
                   0, 32)>;
-def : Pat<(i64 (anyext GPRC:$in)),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>;
-def : Pat<(i32 (trunc G8RC:$in)),
-          (EXTRACT_SUBREG G8RC:$in, sub_32)>;
+def : Pat<(i64 (anyext i32:$in)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>;
+def : Pat<(i32 (trunc i64:$in)),
+          (EXTRACT_SUBREG $in, sub_32)>;
 
 // Extending loads with i64 targets.
 def : Pat<(zextloadi1 iaddr:$src),
@@ -939,24 +970,24 @@ def : Pat<(extloadi32 xaddr:$src),
 // Standard shifts.  These are represented separately from the real shifts above
 // so that we can distinguish between shifts that allow 6-bit and 7-bit shift
 // amounts.
-def : Pat<(sra G8RC:$rS, GPRC:$rB),
-          (SRAD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(srl G8RC:$rS, GPRC:$rB),
-          (SRD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(shl G8RC:$rS, GPRC:$rB),
-          (SLD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(sra i64:$rS, i32:$rB),
+          (SRAD $rS, $rB)>;
+def : Pat<(srl i64:$rS, i32:$rB),
+          (SRD $rS, $rB)>;
+def : Pat<(shl i64:$rS, i32:$rB),
+          (SLD $rS, $rB)>;
 
 // SHL/SRL
-def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
-          (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
-def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
-          (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+def : Pat<(shl i64:$in, (i32 imm:$imm)),
+          (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl i64:$in, (i32 imm:$imm)),
+          (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>;
 
 // ROTL
-def : Pat<(rotl G8RC:$in, GPRC:$sh),
-          (RLDCL G8RC:$in, GPRC:$sh, 0)>;
-def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
-          (RLDICL G8RC:$in, imm:$imm, 0)>;
+def : Pat<(rotl i64:$in, i32:$sh),
+          (RLDCL $in, $sh, 0)>;
+def : Pat<(rotl i64:$in, (i32 imm:$imm)),
+          (RLDICL $in, imm:$imm, 0)>;
 
 // Hi and Lo for Darwin Global Addresses.
 def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
@@ -967,15 +998,25 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
 def : Pat<(PPClo tjumptable:$in , 0), (LI8  tjumptable:$in)>;
 def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
 def : Pat<(PPClo tblockaddress:$in, 0), (LI8  tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in),
-          (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in),
-          (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
-          (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
-          (ADDIS8 G8RC:$in, tconstpool:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
-          (ADDIS8 G8RC:$in, tjumptable:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
-          (ADDIS8 G8RC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in),
+          (ADDIS8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in),
+          (ADDI8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)),
+          (ADDIS8 $in, tglobaladdr:$g)>;
+def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)),
+          (ADDIS8 $in, tconstpool:$g)>;
+def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)),
+          (ADDIS8 $in, tjumptable:$g)>;
+def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS8 $in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+          (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+          (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+          (STDX $rS, xoaddr:$dst)>;
+
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 0ed7ff2cc4f9..a5ba4c8aebef 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -161,23 +161,64 @@ def vecspltisw : PatLeaf<(build_vector), [{
 //===----------------------------------------------------------------------===//
 // Helpers for defining instructions that directly correspond to intrinsics.
 
-// VA1a_Int - A VAForm_1a intrinsic definition.
-class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
+class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
   : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
               !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
-                       [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+                       [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
 
-// VX1_Int - A VXForm_1 intrinsic definition.
-class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                   ValueType InTy>
+  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+              !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+                       [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+
+// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
+// input types and an output type.
+class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                   ValueType In1Ty, ValueType In2Ty>
+  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+              !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+                       [(set OutTy:$vD,
+                         (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+
+// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
+class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
   : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
              !strconcat(opc, " $vD, $vA, $vB"), VecFP,
-             [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+             [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
 
-// VX2_Int - A VXForm_2 intrinsic definition.
-class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                  ValueType InTy>
+  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+             !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+             [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
+
+// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
+// input types and an output type.
+class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                  ValueType In1Ty, ValueType In2Ty>
+  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+             !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+             [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+
+// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
+class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
   : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
              !strconcat(opc, " $vD, $vB"), VecFP,
-             [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+             [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+
+// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                  ValueType InTy>
+  : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+             !strconcat(opc, " $vD, $vB"), VecFP,
+             [(set OutTy:$vD, (IntID InTy:$vB))]>;
 
 //===----------------------------------------------------------------------===//
 // Instruction Definitions.
@@ -185,6 +226,7 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
 def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
 let Predicates = [HasAltivec] in {
 
+let isCodeGenOnly = 1 in {
 def DSS      : DSS_Form<822, (outs),
                         (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
                         "dss $STRM", LdStLoad /*FIXME*/, []>;
@@ -216,129 +258,136 @@ def DSTST64  : DSS_Form<374, (outs),
 def DSTSTT64 : DSS_Form<374, (outs),
                         (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
                         "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+}
 
 def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
                       "mfvscr $vD", LdStStore,
-                      [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; 
+                      [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; 
 def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
                       "mtvscr $vB", LdStLoad,
-                      [(int_ppc_altivec_mtvscr VRRC:$vB)]>; 
+                      [(int_ppc_altivec_mtvscr v4i32:$vB)]>; 
 
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {  // Loads.
 def LVEBX: XForm_1<31,   7, (outs VRRC:$vD), (ins memrr:$src),
                    "lvebx $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+                   [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
 def LVEHX: XForm_1<31,  39, (outs VRRC:$vD), (ins memrr:$src),
                    "lvehx $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+                   [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
 def LVEWX: XForm_1<31,  71, (outs VRRC:$vD), (ins memrr:$src),
                    "lvewx $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+                   [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
 def LVX  : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
                    "lvx $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+                   [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
 def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
                    "lvxl $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+                   [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
 }
 
 def LVSL : XForm_1<31,   6, (outs VRRC:$vD), (ins memrr:$src),
                    "lvsl $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+                   [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
                    PPC970_Unit_LSU;
 def LVSR : XForm_1<31,  38, (outs VRRC:$vD), (ins memrr:$src),
                    "lvsr $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+                   [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
                    PPC970_Unit_LSU;
 
 let PPC970_Unit = 2 in {   // Stores.
 def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvebx $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
 def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvehx $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
 def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvewx $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
 def STVX  : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvx $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
 def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvxl $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
 }
 
 let PPC970_Unit = 5 in {  // VALU Operations.
 // VA-Form instructions.  3-input AltiVec ops.
 def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
                        "vmaddfp $vD, $vA, $vC, $vB", VecFP,
-                       [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
+                       [(set v4f32:$vD,
+                        (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
+
+// FIXME: The fma+fneg pattern won't match because fneg is not legal.
 def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
                        "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
-                       [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
-                                                  (fneg VRRC:$vB))))]>; 
+                       [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
+                                                  (fneg v4f32:$vB))))]>; 
+
+def VMHADDSHS  : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+                             v8i16>;
+def VMLADDUHM  : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
 
-def VMHADDSHS  : VA1a_Int<32, "vmhaddshs",  int_ppc_altivec_vmhaddshs>;
-def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
-def VMLADDUHM  : VA1a_Int<34, "vmladduhm",  int_ppc_altivec_vmladduhm>;
-def VPERM      : VA1a_Int<43, "vperm",      int_ppc_altivec_vperm>;
-def VSEL       : VA1a_Int<42, "vsel",       int_ppc_altivec_vsel>;
+def VPERM      : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
+                              v4i32, v4i32, v16i8>;
+def VSEL       : VA1a_Int_Ty<42, "vsel",  int_ppc_altivec_vsel, v4i32>;
 
 // Shuffles.
 def VSLDOI  : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
                        "vsldoi $vD, $vA, $vB, $SH", VecFP,
-                       [(set VRRC:$vD, 
-                         (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+                       [(set v16i8:$vD, 
+                         (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
 
 // VX-Form instructions.  AltiVec arithmetic ops.
 def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vaddfp $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+                      [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
                       
 def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vaddubm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
 def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vadduhm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
 def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vadduwm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
                       
-def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
-def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
-def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
-def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
-def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
-def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
-def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
+def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
+def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
+def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
+def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
+def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
+def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
                              
                              
 def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                     "vand $vD, $vA, $vB", VecFP,
-                    [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                    [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
 def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                      "vandc $vD, $vA, $vB", VecFP,
-                     [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
-                                          (vnot_ppc VRRC:$vB)))]>;
+                     [(set v4i32:$vD, (and v4i32:$vA,
+                                           (vnot_ppc v4i32:$vB)))]>;
 
 def VCFSX  : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vcfsx $vD, $vB, $UIMM", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+                      [(set v4f32:$vD,
+                             (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
 def VCFUX  : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vcfux $vD, $vB, $UIMM", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+                      [(set v4f32:$vD,
+                             (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
 def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vctsxs $vD, $vB, $UIMM", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+                      [(set v4i32:$vD,
+                             (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
 def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vctuxs $vD, $vB, $UIMM", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+                      [(set v4i32:$vD,
+                             (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
 
 // Defines with the UIM field set to 0 for floating-point
 // to integer (fp_to_sint/fp_to_uint) conversions and integer
@@ -346,203 +395,237 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
 let VA = 0 in {
 def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
                        "vcfsx $vD, $vB, 0", VecFP,
-                       [(set VRRC:$vD,
-                             (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>;
+                       [(set v4f32:$vD,
+                             (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
 def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
                         "vctuxs $vD, $vB, 0", VecFP,
-                        [(set VRRC:$vD,
-                               (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>;
+                        [(set v4i32:$vD,
+                               (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
 def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
                        "vcfux $vD, $vB, 0", VecFP,
-                       [(set VRRC:$vD,
-                               (int_ppc_altivec_vcfux VRRC:$vB, 0))]>;
+                       [(set v4f32:$vD,
+                               (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
 def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
                       "vctsxs $vD, $vB, 0", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>;
+                      [(set v4i32:$vD,
+                             (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
 }
-def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
-def VLOGEFP  : VX2_Int<458, "vlogefp",  int_ppc_altivec_vlogefp>;
-
-def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
-def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
-def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
-def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
-def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
-def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
-
-def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
-def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
-def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
-def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
-def VMAXUB : VX1_Int<   2, "vmaxub", int_ppc_altivec_vmaxub>;
-def VMAXUH : VX1_Int<  66, "vmaxuh", int_ppc_altivec_vmaxuh>;
-def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
-def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
-def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
-def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
-def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
-def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
-def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
-def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP  : VX2_Int_SP<458, "vlogefp",  int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
+def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
+def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
+def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>;
+def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>;
+def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>;
+
+def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>;
+def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>;
+def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>;
+def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>;
+def VMAXUB : VX1_Int_Ty<   2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>;
+def VMAXUH : VX1_Int_Ty<  66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>;
+def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>;
+def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>;
+def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>;
+def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>;
+def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
+def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
+def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
+def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
 
 def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrghb $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrghh $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrghw $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrglb $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrglh $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrglw $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
-
-def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
-def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
-def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
-def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
-def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
-def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
-
-def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
-def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
-def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
-def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
-def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
-def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
-def VMULOUB : VX1_Int<  8, "vmuloub", int_ppc_altivec_vmuloub>;
-def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+                      [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
+                            v4i32, v16i8, v4i32>;
+def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
+                            v4i32, v8i16, v4i32>;
+def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+                            v4i32, v8i16, v4i32>;
+def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
+                            v4i32, v16i8, v4i32>;
+def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
+                            v4i32, v8i16, v4i32>;
+def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+                            v4i32, v8i16, v4i32>;
+
+def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
+                          v8i16, v16i8>;
+def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
+                          v4i32, v8i16>;
+def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub,
+                          v8i16, v16i8>;
+def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh,
+                          v4i32, v8i16>;
+def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb,
+                          v8i16, v16i8>;
+def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh,
+                          v4i32, v8i16>;
+def VMULOUB : VX1_Int_Ty2<  8, "vmuloub", int_ppc_altivec_vmuloub,
+                          v8i16, v16i8>;
+def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
+                          v4i32, v8i16>;
                        
-def VREFP     : VX2_Int<266, "vrefp",     int_ppc_altivec_vrefp>;
-def VRFIM     : VX2_Int<714, "vrfim",     int_ppc_altivec_vrfim>;
-def VRFIN     : VX2_Int<522, "vrfin",     int_ppc_altivec_vrfin>;
-def VRFIP     : VX2_Int<650, "vrfip",     int_ppc_altivec_vrfip>;
-def VRFIZ     : VX2_Int<586, "vrfiz",     int_ppc_altivec_vrfiz>;
-def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+def VREFP     : VX2_Int_SP<266, "vrefp",     int_ppc_altivec_vrefp>;
+def VRFIM     : VX2_Int_SP<714, "vrfim",     int_ppc_altivec_vrfim>;
+def VRFIN     : VX2_Int_SP<522, "vrfin",     int_ppc_altivec_vrfin>;
+def VRFIP     : VX2_Int_SP<650, "vrfip",     int_ppc_altivec_vrfip>;
+def VRFIZ     : VX2_Int_SP<586, "vrfiz",     int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
 
-def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
 
 def VSUBFP  : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vsubfp $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+                      [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
 def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vsububm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
 def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vsubuhm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
 def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vsubuwm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
                       
-def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
-def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
-def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
-def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
-def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
-def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
-def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
-def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
-def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
-def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
-def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
+def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
+def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
+def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
+def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
+def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
+
+def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+
+def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+                          v4i32, v16i8, v4i32>;
+def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+                          v4i32, v8i16, v4i32>;
+def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+                          v4i32, v16i8, v4i32>;
 
 def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                     "vnor $vD, $vA, $vB", VecFP,
-                    [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
-                                                  VRRC:$vB)))]>;
+                    [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
+                                                   v4i32:$vB)))]>;
 def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vor $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
 def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vxor $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+
+def VRLB   : VX1_Int_Ty<   4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
+def VRLH   : VX1_Int_Ty<  68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
+def VRLW   : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>;
 
-def VRLB   : VX1_Int<   4, "vrlb", int_ppc_altivec_vrlb>;
-def VRLH   : VX1_Int<  68, "vrlh", int_ppc_altivec_vrlh>;
-def VRLW   : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+def VSL    : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl,  v4i32 >;
+def VSLO   : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>;
 
-def VSL    : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
-def VSLO   : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
-def VSLB   : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
-def VSLH   : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
-def VSLW   : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+def VSLB   : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
+def VSLH   : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
+def VSLW   : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
 
 def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vspltb $vD, $vB, $UIMM", VecPerm,
-                      [(set VRRC:$vD,
-                        (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+                      [(set v16i8:$vD,
+                        (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
 def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vsplth $vD, $vB, $UIMM", VecPerm,
-                      [(set VRRC:$vD,
-                        (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+                      [(set v16i8:$vD,
+                        (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
 def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vspltw $vD, $vB, $UIMM", VecPerm,
-                      [(set VRRC:$vD, 
-                        (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+                      [(set v16i8:$vD, 
+                        (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
 
-def VSR    : VX1_Int< 708, "vsr"  , int_ppc_altivec_vsr>;
-def VSRO   : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
-def VSRAB  : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
-def VSRAH  : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
-def VSRAW  : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
-def VSRB   : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
-def VSRH   : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
-def VSRW   : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+def VSR    : VX1_Int_Ty< 708, "vsr"  , int_ppc_altivec_vsr,  v4i32>;
+def VSRO   : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
+
+def VSRAB  : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>;
+def VSRAH  : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>;
+def VSRAW  : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>;
+def VSRB   : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>;
+def VSRH   : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
+def VSRW   : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
 
 
 def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
                        "vspltisb $vD, $SIMM", VecPerm,
-                       [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+                       [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
 def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
                        "vspltish $vD, $SIMM", VecPerm,
-                       [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+                       [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
 def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
                        "vspltisw $vD, $SIMM", VecPerm,
-                       [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+                       [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
 
 // Vector Pack.
-def VPKPX   : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
-def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
-def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
-def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
-def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKPX   : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
+                          v8i16, v4i32>;
+def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+                          v16i8, v8i16>;
+def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+                          v16i8, v8i16>;
+def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+                          v16i8, v4i32>;
+def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+                          v8i16, v4i32>;
 def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                        "vpkuhum $vD, $vA, $vB", VecFP,
-                       [(set VRRC:$vD,
-                         (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+                       [(set v16i8:$vD,
+                         (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+                          v16i8, v8i16>;
 def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                        "vpkuwum $vD, $vA, $vB", VecFP,
-                       [(set VRRC:$vD,
-                         (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+                       [(set v16i8:$vD,
+                         (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+                          v8i16, v4i32>;
 
 // Vector Unpack.
-def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
-def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
-def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
-def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
-def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
-def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
+                          v4i32, v8i16>;
+def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb,
+                          v8i16, v16i8>;
+def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh,
+                          v4i32, v8i16>;
+def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx,
+                          v4i32, v8i16>;
+def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb,
+                          v8i16, v16i8>;
+def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
+                          v4i32, v8i16>;
 
 
 // Altivec Comparisons.
 
 class VCMP<bits<10> xo, string asmstr, ValueType Ty>
   : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
-              [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+              [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
 class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
   : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
-              [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+              [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
   let Defs = [CR6];
   let RC = 1;
 }
@@ -581,13 +664,14 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
 def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
 def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
                       
+let isCodeGenOnly = 1 in
 def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
                       "vxor $vD, $vD, $vD", VecFP,
-                      [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+                      [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
 let IMM=-1 in {
 def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
                       "vspltisw $vD, -1", VecFP,
-                      [(set VRRC:$vD, (v4i32 immAllOnesV))]>;
+                      [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
 }
 } // VALU Operations.
 
@@ -600,31 +684,31 @@ def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
 def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
 
 //  * 32-bit
-def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
-          (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
+          (DST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
+          (DSTT 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
+          (DSTST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
+          (DSTSTT 1, imm:$STRM, $rA, $rB)>;
 
 //  * 64-bit
-def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
-          (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
+          (DST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
+          (DSTT64 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
+          (DSTST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
+          (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
 
 // Loads.
 def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
 
 // Stores.
-def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
-          (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+          (STVX $rS, xoaddr:$dst)>;
 
 // Bit conversions.
 def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
@@ -646,96 +730,99 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
 // Shuffles.
 
 // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
-def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
-        (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
-def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VPKUWUM VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef),
+        (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>;
+def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
+        (VPKUWUM $vA, $vA)>;
+def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
+        (VPKUHUM $vA, $vA)>;
 
 // Match vmrg*(x,x)
-def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGLB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGLH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGLW VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGHB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGHH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGHW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
+        (VMRGLB $vA, $vA)>;
+def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef),
+        (VMRGLH $vA, $vA)>;
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+        (VMRGLW $vA, $vA)>;
+def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef),
+        (VMRGHB $vA, $vA)>;
+def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
+        (VMRGHH $vA, $vA)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+        (VMRGHW $vA, $vA)>;
 
 // Logical Operations
-def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
 
-def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
-          (VNOR VRRC:$A, VRRC:$B)>;
-def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
-          (VANDC VRRC:$A, VRRC:$B)>;
+def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)),
+          (VNOR $A, $B)>;
+def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
+          (VANDC $A, $B)>;
 
-def : Pat<(fmul VRRC:$vA, VRRC:$vB),
-          (VMADDFP VRRC:$vA, VRRC:$vB,
+def : Pat<(fmul v4f32:$vA, v4f32:$vB),
+          (VMADDFP $vA, $vB,
              (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; 
 
 // Fused multiply add and multiply sub for packed float.  These are represented
 // separately from the real instructions above, for operations that must have
 // the additional precision, such as Newton-Rhapson (used by divide, sqrt)
-def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
-          (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
-          (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+          (VMADDFP $A, $B, $C)>;
+def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+          (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+          (VMADDFP $A, $B, $C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+          (VNMSUBFP $A, $B, $C)>;
 
-def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
-          (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
-          (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC),
+          (VPERM $vA, $vB, $vC)>;
 
-def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
-          (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
+def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;
 
 // Vector shifts
-def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
-          (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
-          (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
-          (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
-          (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
-          (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
-          (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
-          (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
-          (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
-          (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
+          (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
+          (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
+          (v4i32 (VSLW $vA, $vB))>;
+
+def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
+          (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
+          (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
+          (v4i32 (VSRW $vA, $vB))>;
+
+def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
+          (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
+          (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
+          (v4i32 (VSRAW $vA, $vB))>;
 
 // Float to integer and integer to float conversions
-def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))),
-           (VCTSXS_0 VRRC:$vA)>;
-def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))),
-           (VCTUXS_0 VRRC:$vA)>;
-def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))),
-           (VCFSX_0 VRRC:$vA)>;
-def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))),
-           (VCFUX_0 VRRC:$vA)>;
+def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
+           (VCTSXS_0 $vA)>;
+def : Pat<(v4i32 (fp_to_uint v4f32:$vA)),
+           (VCTUXS_0 $vA)>;
+def : Pat<(v4f32 (sint_to_fp v4i32:$vA)),
+           (VCFSX_0 $vA)>;
+def : Pat<(v4f32 (uint_to_fp v4i32:$vA)),
+           (VCFUX_0 $vA)>;
 
 // Floating-point rounding
-def : Pat<(v4f32 (ffloor (v4f32 VRRC:$vA))),
-          (VRFIM VRRC:$vA)>;
-def : Pat<(v4f32 (fceil (v4f32 VRRC:$vA))),
-          (VRFIP VRRC:$vA)>;
-def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))),
-          (VRFIZ VRRC:$vA)>;
-def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))),
-          (VRFIN VRRC:$vA)>;
+def : Pat<(v4f32 (ffloor v4f32:$vA)),
+          (VRFIM $vA)>;
+def : Pat<(v4f32 (fceil v4f32:$vA)),
+          (VRFIP $vA)>;
+def : Pat<(v4f32 (ftrunc v4f32:$vA)),
+          (VRFIZ $vA)>;
+def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
+          (VRFIN $vA)>;
 
 } // end HasAltivec
 
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index c3c171cd21fc..36f193be9936 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -35,6 +35,15 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
   let TSFlags{1}   = PPC970_Single;
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
+
+  // Fields used for relation models.
+  string BaseName = "";
+
+  // For cases where multiple instruction definitions really represent the
+  // same underlying instruction but with one definition for 64-bit arguments
+  // and one for 32-bit arguments, this bit breaks the degeneracy between
+  // the two forms and allows TableGen to generate mapping tables.
+  bit Interpretation64Bit = 0;
 }
 
 class PPC970_DGroup_First   { bits<1> PPC970_First = 1;  }
@@ -80,6 +89,10 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
   let TSFlags{1}   = PPC970_Single;
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
+
+  // Fields used for relation models.
+  string BaseName = "";
+  bit Interpretation64Bit = 0;
 }
 
 // 1.7.1 I-Form
@@ -120,6 +133,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
   let CR = 0;
 }
 
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+              dag OOL, dag IOL, string asmstr>
+  : I<opcode, OOL, IOL, asmstr, BrB> {
+  bits<14> BD;
+
+  let Inst{6-10}  = bo;
+  let Inst{11-15} = bi;
+  let Inst{16-29} = BD;
+  let Inst{30}    = aa;
+  let Inst{31}    = lk;
+}
+
 // 1.7.4 D-Form
 class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                  InstrItinClass itin, list<dag> pattern> 
@@ -165,7 +190,12 @@ class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
 
 class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
-  : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+  : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
+
+  // Even though ADDICo does not really have an RC bit, provide
+  // the declaration of one here so that isDOT has something to set.
+  bit RC = 0;
+}
 
 class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                  InstrItinClass itin, list<dag> pattern>
@@ -553,9 +583,9 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
   bits<7> BIBO;  // 2 bits of BI and 5 bits of BO.
   bits<3>  CR;
   
-  let BO = BIBO{2-6};
-  let BI{0-1} = BIBO{0-1};
-  let BI{2-4} = CR;
+  let BO = BIBO{4-0};
+  let BI{0-1} = BIBO{5-6};
+  let BI{2-4} = CR{0-2};
   let BH = 0;
 }
 
@@ -664,14 +694,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
 // This is probably 1.7.9, but I don't have the reference that uses this
 // numbering scheme...
 class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
-                      string cstr, InstrItinClass itin, list<dag>pattern>
+              InstrItinClass itin, list<dag>pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
   bits<8> FM;
   bits<5> rT;
 
   bit RC = 0;    // set by isDOT
   let Pattern = pattern;
-  let Constraints = cstr;
 
   let Inst{6} = 0;
   let Inst{7-14}  = FM;
@@ -765,16 +794,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
   bits<5> RT;
   bits<5> RA;
   bits<5> RB;
-  bits<7> BIBO;  // 2 bits of BI and 5 bits of BO (must be 12).
-  bits<3> CR;
+  bits<5> COND;
 
   let Pattern = pattern;
 
   let Inst{6-10}  = RT;
   let Inst{11-15} = RA;
   let Inst{16-20} = RB;
-  let Inst{21-23} = CR;
-  let Inst{24-25} = BIBO{6-5};
+  let Inst{21-25} = COND;
   let Inst{26-30} = xo;
   let Inst{31}    = 0;
 }
@@ -987,6 +1014,7 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
 //===----------------------------------------------------------------------===//
 class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
     : I<0, OOL, IOL, asmstr, NoItinerary> {
+  let isCodeGenOnly = 1;
   let PPC64 = 0;
   let Pattern = pattern;
   let Inst{31-0} = 0;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 7fe788093439..f38e42d2e4b1 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,8 +18,10 @@
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,6 +32,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define GET_INSTRMAP_INFO
 #define GET_INSTRINFO_CTOR
 #include "PPCGenInstrInfo.inc"
 
@@ -39,6 +42,9 @@ static cl::
 opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
             cl::desc("Disable analysis for CTR loops"));
 
+static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
+cl::desc("Disable compare instruction optimization"), cl::Hidden);
+
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
   : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
     TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
@@ -94,12 +100,18 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
 
 unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                            int &FrameIndex) const {
+  // Note: This list must be kept consistent with LoadRegFromStackSlot.
   switch (MI->getOpcode()) {
   default: break;
   case PPC::LD:
   case PPC::LWZ:
   case PPC::LFS:
   case PPC::LFD:
+  case PPC::RESTORE_CR:
+  case PPC::LVX:
+  case PPC::RESTORE_VRSAVE:
+    // Check for the operands added by addFrameReference (the immediate is the
+    // offset which defaults to 0).
     if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
         MI->getOperand(2).isFI()) {
       FrameIndex = MI->getOperand(2).getIndex();
@@ -112,12 +124,18 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
 
 unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
                                           int &FrameIndex) const {
+  // Note: This list must be kept consistent with StoreRegToStackSlot.
   switch (MI->getOpcode()) {
   default: break;
   case PPC::STD:
   case PPC::STW:
   case PPC::STFS:
   case PPC::STFD:
+  case PPC::SPILL_CR:
+  case PPC::STVX:
+  case PPC::SPILL_VRSAVE:
+    // Check for the operands added by addFrameReference (the immediate is the
+    // offset which defaults to 0).
     if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
         MI->getOperand(2).isFI()) {
       FrameIndex = MI->getOperand(2).getIndex();
@@ -135,7 +153,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   MachineFunction &MF = *MI->getParent()->getParent();
 
   // Normal instructions can be commuted the obvious way.
-  if (MI->getOpcode() != PPC::RLWIMI)
+  if (MI->getOpcode() != PPC::RLWIMI &&
+      MI->getOpcode() != PPC::RLWIMIo)
     return TargetInstrInfo::commuteInstruction(MI, NewMI);
 
   // Cannot commute if it has a non-zero rotate count.
@@ -405,6 +424,105 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return 2;
 }
 
+// Select analysis.
+bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+                const SmallVectorImpl<MachineOperand> &Cond,
+                unsigned TrueReg, unsigned FalseReg,
+                int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+  if (!TM.getSubtargetImpl()->hasISEL())
+    return false;
+
+  if (Cond.size() != 2)
+    return false;
+
+  // If this is really a bdnz-like condition, then it cannot be turned into a
+  // select.
+  if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+    return false;
+
+  // Check register classes.
+  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC =
+    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+  if (!RC)
+    return false;
+
+  // isel is for regular integer GPRs only.
+  if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
+      !PPC::G8RCRegClass.hasSubClassEq(RC))
+    return false;
+
+  // FIXME: These numbers are for the A2, how well they work for other cores is
+  // an open question. On the A2, the isel instruction has a 2-cycle latency
+  // but single-cycle throughput. These numbers are used in combination with
+  // the MispredictPenalty setting from the active SchedMachineModel.
+  CondCycles = 1;
+  TrueCycles = 1;
+  FalseCycles = 1;
+
+  return true;
+}
+
+void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI, DebugLoc dl,
+                                unsigned DestReg,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                unsigned TrueReg, unsigned FalseReg) const {
+  assert(Cond.size() == 2 &&
+         "PPC branch conditions have two components!");
+
+  assert(TM.getSubtargetImpl()->hasISEL() &&
+         "Cannot insert select on target without ISEL support");
+
+  // Get the register classes.
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC =
+    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+  assert(RC && "TrueReg and FalseReg must have overlapping register classes");
+  assert((PPC::GPRCRegClass.hasSubClassEq(RC) ||
+          PPC::G8RCRegClass.hasSubClassEq(RC)) &&
+         "isel is for regular integer GPRs only");
+
+  unsigned OpCode =
+    PPC::GPRCRegClass.hasSubClassEq(RC) ? PPC::ISEL : PPC::ISEL8;
+  unsigned SelectPred = Cond[0].getImm();
+
+  unsigned SubIdx;
+  bool SwapOps;
+  switch (SelectPred) {
+  default: llvm_unreachable("invalid predicate for isel");
+  case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+  case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+  case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+  case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+  case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+  case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+  case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+  case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+  }
+
+  unsigned FirstReg =  SwapOps ? FalseReg : TrueReg,
+           SecondReg = SwapOps ? TrueReg  : FalseReg;
+
+  // The first input register of isel cannot be r0. If it is a member
+  // of a register class that can be r0, then copy it first (the
+  // register allocator should eliminate the copy).
+  if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
+      MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
+    const TargetRegisterClass *FirstRC =
+      MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
+        &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
+    unsigned OldFirstReg = FirstReg;
+    FirstReg = MRI.createVirtualRegister(FirstRC);
+    BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
+      .addReg(OldFirstReg);
+  }
+
+  BuildMI(MBB, MI, dl, get(OpCode), DestReg)
+    .addReg(FirstReg).addReg(SecondReg)
+    .addReg(Cond[1].getReg(), 0, SubIdx);
+}
+
 void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I, DebugLoc DL,
                                unsigned DestReg, unsigned SrcReg,
@@ -440,40 +558,21 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                   int FrameIdx,
                                   const TargetRegisterClass *RC,
                                   SmallVectorImpl<MachineInstr*> &NewMIs,
-                                  bool &NonRI) const{
+                                  bool &NonRI, bool &SpillsVRS) const{
+  // Note: If additional store instructions are added here,
+  // update isStoreToStackSlot.
+
   DebugLoc DL;
   if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
-    if (SrcReg != PPC::LR) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
-                                         .addReg(SrcReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    } else {
-      // FIXME: this spills LR immediately to memory in one step.  To do this,
-      // we use R11, which we know cannot be used in the prolog/epilog.  This is
-      // a hack.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
-                                         .addReg(PPC::R11,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
   } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
-    if (SrcReg != PPC::LR8) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
-                                         .addReg(SrcReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    } else {
-      // FIXME: this spills LR immediately to memory in one step.  To do this,
-      // we use X11, which we know cannot be used in the prolog/epilog.  This is
-      // a hack.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
-                                         .addReg(PPC::X11,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
   } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
                                        .addReg(SrcReg,
@@ -522,7 +621,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
       Reg = PPC::CR7;
 
     return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
-                               &PPC::CRRCRegClass, NewMIs, NonRI);
+                               &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
 
   } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
@@ -530,6 +629,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                getKillRegState(isKill)),
                                        FrameIdx));
     NonRI = true;
+  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+    assert(TM.getSubtargetImpl()->isDarwin() &&
+           "VRSAVE only needs spill/restore on Darwin");
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    SpillsVRS = true;
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -549,10 +656,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setHasSpills();
 
-  bool NonRI = false;
-  if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI))
+  bool NonRI = false, SpillsVRS = false;
+  if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+                          NonRI, SpillsVRS))
     FuncInfo->setSpillsCR();
 
+  if (SpillsVRS)
+    FuncInfo->setSpillsVRSAVE();
+
   if (NonRI)
     FuncInfo->setHasNonRISpills();
 
@@ -573,25 +684,16 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                    unsigned DestReg, int FrameIdx,
                                    const TargetRegisterClass *RC,
                                    SmallVectorImpl<MachineInstr*> &NewMIs,
-                                   bool &NonRI) const{
+                                   bool &NonRI, bool &SpillsVRS) const{
+  // Note: If additional load instructions are added here,
+  // update isLoadFromStackSlot.
+
   if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
-    if (DestReg != PPC::LR) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                                 DestReg), FrameIdx));
-    } else {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                                 PPC::R11), FrameIdx));
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                               DestReg), FrameIdx));
   } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
-    if (DestReg != PPC::LR8) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
-                                         FrameIdx));
-    } else {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
-                                                 PPC::X11), FrameIdx));
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+                                       FrameIdx));
   } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
                                        FrameIdx));
@@ -632,12 +734,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
       Reg = PPC::CR7;
 
     return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
-                                &PPC::CRRCRegClass, NewMIs, NonRI);
+                                &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
 
   } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
                                        FrameIdx));
     NonRI = true;
+  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+    assert(TM.getSubtargetImpl()->isDarwin() &&
+           "VRSAVE only needs spill/restore on Darwin");
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+                                               get(PPC::RESTORE_VRSAVE),
+                                               DestReg),
+                                       FrameIdx));
+    SpillsVRS = true;
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -659,10 +769,14 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setHasSpills();
 
-  bool NonRI = false;
-  if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI))
+  bool NonRI = false, SpillsVRS = false;
+  if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+                           NonRI, SpillsVRS))
     FuncInfo->setSpillsCR();
 
+  if (SpillsVRS)
+    FuncInfo->setSpillsVRSAVE();
+
   if (NonRI)
     FuncInfo->setHasNonRISpills();
 
@@ -699,6 +813,578 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
+bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const {
+  // For some instructions, it is legal to fold ZERO into the RA register field.
+  // A zero immediate should always be loaded with a single li.
+  unsigned DefOpc = DefMI->getOpcode();
+  if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
+    return false;
+  if (!DefMI->getOperand(1).isImm())
+    return false;
+  if (DefMI->getOperand(1).getImm() != 0)
+    return false;
+
+  // Note that we cannot here invert the arguments of an isel in order to fold
+  // a ZERO into what is presented as the second argument. All we have here
+  // is the condition bit, and that might come from a CR-logical bit operation.
+
+  const MCInstrDesc &UseMCID = UseMI->getDesc();
+
+  // Only fold into real machine instructions.
+  if (UseMCID.isPseudo())
+    return false;
+
+  unsigned UseIdx;
+  for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx)
+    if (UseMI->getOperand(UseIdx).isReg() &&
+        UseMI->getOperand(UseIdx).getReg() == Reg)
+      break;
+
+  assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI");
+  assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
+
+  const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx];
+
+  // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
+  // register (which might also be specified as a pointer class kind).
+  if (UseInfo->isLookupPtrRegClass()) {
+    if (UseInfo->RegClass /* Kind */ != 1)
+      return false;
+  } else {
+    if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
+        UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
+      return false;
+  }
+
+  // Make sure this is not tied to an output register (or otherwise
+  // constrained). This is true for ST?UX registers, for example, which
+  // are tied to their output registers.
+  if (UseInfo->Constraints != 0)
+    return false;
+
+  unsigned ZeroReg;
+  if (UseInfo->isLookupPtrRegClass()) {
+    bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+    ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
+  } else {
+    ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
+              PPC::ZERO8 : PPC::ZERO;
+  }
+
+  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+  UseMI->getOperand(UseIdx).setReg(ZeroReg);
+
+  if (DeleteDef)
+    DefMI->eraseFromParent();
+
+  return true;
+}
+
+static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+       I != IE; ++I)
+    if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
+      return true;
+  return false;
+}
+
+// We should make sure that, if we're going to predicate both sides of a
+// condition (a diamond), that both sides don't define the counter register. We
+// can predicate counter-decrement-based branches, but while that predicates
+// the branching, it does not predicate the counter decrement. If we tried to
+// merge the triangle into one predicated block, we'd decrement the counter
+// twice.
+bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                     unsigned NumT, unsigned ExtraT,
+                     MachineBasicBlock &FMBB,
+                     unsigned NumF, unsigned ExtraF,
+                     const BranchProbability &Probability) const {
+  return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
+}
+
+
+bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
+  // The predicated branches are identified by their type, not really by the
+  // explicit presence of a predicate. Furthermore, some of them can be
+  // predicated more than once. Because if conversion won't try to predicate
+  // any instruction which already claims to be predicated (by returning true
+  // here), always return false. In doing so, we let isPredicable() be the
+  // final word on whether not the instruction can be (further) predicated.
+
+  return false;
+}
+
+bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  if (!MI->isTerminator())
+    return false;
+
+  // Conditional branch is a special case.
+  if (MI->isBranch() && !MI->isBarrier())
+    return true;
+
+  return !isPredicated(MI);
+}
+
+bool PPCInstrInfo::PredicateInstruction(
+                     MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned OpC = MI->getOpcode();
+  if (OpC == PPC::BLR) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+      bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+      MI->setDesc(get(Pred[0].getImm() ?
+                      (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
+                      (isPPC64 ? PPC::BDZLR8  : PPC::BDZLR)));
+    } else {
+      MI->setDesc(get(PPC::BCLR));
+      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+        .addImm(Pred[0].getImm())
+        .addReg(Pred[1].getReg());
+    }
+
+    return true;
+  } else if (OpC == PPC::B) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+      bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+      MI->setDesc(get(Pred[0].getImm() ?
+                      (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+                      (isPPC64 ? PPC::BDZ8  : PPC::BDZ)));
+    } else {
+      MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+      MI->RemoveOperand(0);
+
+      MI->setDesc(get(PPC::BCC));
+      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+        .addImm(Pred[0].getImm())
+        .addReg(Pred[1].getReg())
+        .addMBB(MBB);
+    }
+
+    return true;
+  } else if (OpC == PPC::BCTR  || OpC == PPC::BCTR8 ||
+             OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
+      llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
+
+    bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
+    bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+    MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
+                              (setLR ? PPC::BCCTRL  : PPC::BCCTR)));
+    MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+      .addImm(Pred[0].getImm())
+      .addReg(Pred[1].getReg());
+    return true;
+  }
+
+  return false;
+}
+
+bool PPCInstrInfo::SubsumesPredicate(
+                     const SmallVectorImpl<MachineOperand> &Pred1,
+                     const SmallVectorImpl<MachineOperand> &Pred2) const {
+  assert(Pred1.size() == 2 && "Invalid PPC first predicate");
+  assert(Pred2.size() == 2 && "Invalid PPC second predicate");
+
+  if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
+    return false;
+  if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
+    return false;
+
+  PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
+  PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
+
+  if (P1 == P2)
+    return true;
+
+  // Does P1 subsume P2, e.g. GE subsumes GT.
+  if (P1 == PPC::PRED_LE &&
+      (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
+    return true;
+  if (P1 == PPC::PRED_GE &&
+      (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
+    return true;
+
+  return false;
+}
+
+bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  // Note: At the present time, the contents of Pred from this function is
+  // unused by IfConversion. This implementation follows ARM by pushing the
+  // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
+  // predicate, instructions defining CTR or CTR8 are also included as
+  // predicate-defining instructions.
+
+  const TargetRegisterClass *RCs[] =
+    { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
+      &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
+      const TargetRegisterClass *RC = RCs[c];
+      if (MO.isReg()) {
+        if (MO.isDef() && RC->contains(MO.getReg())) {
+          Pred.push_back(MO);
+          Found = true;
+        }
+      } else if (MO.isRegMask()) {
+        for (TargetRegisterClass::iterator I = RC->begin(),
+             IE = RC->end(); I != IE; ++I)
+          if (MO.clobbersPhysReg(*I)) {
+            Pred.push_back(MO);
+            Found = true;
+          }
+      }
+    }
+  }
+
+  return Found;
+}
+
+bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
+  unsigned OpC = MI->getOpcode();
+  switch (OpC) {
+  default:
+    return false;
+  case PPC::B:
+  case PPC::BLR:
+  case PPC::BCTR:
+  case PPC::BCTR8:
+  case PPC::BCTRL:
+  case PPC::BCTRL8:
+    return true;
+  }
+}
+
+bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI,
+                                  unsigned &SrcReg, unsigned &SrcReg2,
+                                  int &Mask, int &Value) const {
+  unsigned Opc = MI->getOpcode();
+
+  switch (Opc) {
+  default: return false;
+  case PPC::CMPWI:
+  case PPC::CMPLWI:
+  case PPC::CMPDI:
+  case PPC::CMPLDI:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = 0;
+    Value = MI->getOperand(2).getImm();
+    Mask = 0xFFFF;
+    return true;
+  case PPC::CMPW:
+  case PPC::CMPLW:
+  case PPC::CMPD:
+  case PPC::CMPLD:
+  case PPC::FCMPUS:
+  case PPC::FCMPUD:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = MI->getOperand(2).getReg();
+    return true;
+  }
+}
+
+/// Assume the flags are set by MI(a,b), return the condition code if we modify
+/// the instructions such that flags are set by MI(b,a).
+PPC::Predicate static getSwappedPredicate(PPC::Predicate Opcode) {
+  switch (Opcode) {
+  case PPC::PRED_EQ: return PPC::PRED_EQ;
+  case PPC::PRED_NE: return PPC::PRED_NE;
+  case PPC::PRED_LT: return PPC::PRED_GT;
+  case PPC::PRED_GE: return PPC::PRED_LE;
+  case PPC::PRED_GT: return PPC::PRED_LT;
+  case PPC::PRED_LE: return PPC::PRED_GE;
+  case PPC::PRED_NU: return PPC::PRED_NU;
+  case PPC::PRED_UN: return PPC::PRED_UN;
+  }
+  llvm_unreachable("Unknown PPC branch opcode!");
+}
+ 
+bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
+                                        unsigned SrcReg, unsigned SrcReg2,
+                                        int Mask, int Value,
+                                        const MachineRegisterInfo *MRI) const {
+  if (DisableCmpOpt)
+    return false;
+
+  int OpC = CmpInstr->getOpcode();
+  unsigned CRReg = CmpInstr->getOperand(0).getReg();
+  bool isFP = OpC == PPC::FCMPUS || OpC == PPC::FCMPUD;
+  unsigned CRRecReg = isFP ? PPC::CR1 : PPC::CR0;
+
+  // The record forms set the condition register based on a signed comparison
+  // with zero (so says the ISA manual). This is not as straightforward as it
+  // seems, however, because this is always a 64-bit comparison on PPC64, even
+  // for instructions that are 32-bit in nature (like slw for example).
+  // So, on PPC32, for unsigned comparisons, we can use the record forms only
+  // for equality checks (as those don't depend on the sign). On PPC64,
+  // we are restricted to equality for unsigned 64-bit comparisons and for
+  // signed 32-bit comparisons the applicability is more restricted.
+  bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+  bool is32BitSignedCompare   = OpC ==  PPC::CMPWI || OpC == PPC::CMPW;
+  bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
+  bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
+
+  // Get the unique definition of SrcReg.
+  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+  if (!MI) return false;
+  int MIOpC = MI->getOpcode();
+
+  bool equalityOnly = false;
+  bool noSub = false;
+  if (isPPC64) {
+    if (is32BitSignedCompare) {
+      // We can perform this optimization only if MI is sign-extending.
+      if (MIOpC == PPC::SRAW  || MIOpC == PPC::SRAWo ||
+          MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo ||
+          MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo ||
+          MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo ||
+          MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) {
+        noSub = true;
+      } else
+        return false;
+    } else if (is32BitUnsignedCompare) {
+      // We can perform this optimization, equality only, if MI is
+      // zero-extending.
+      if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
+          MIOpC == PPC::SLW    || MIOpC == PPC::SLWo ||
+          MIOpC == PPC::SRW    || MIOpC == PPC::SRWo) {
+        noSub = true;
+        equalityOnly = true;
+      } else
+        return false;
+    } else if (!isFP)
+      equalityOnly = is64BitUnsignedCompare;
+  } else if (!isFP)
+    equalityOnly = is32BitUnsignedCompare;
+
+  if (equalityOnly) {
+    // We need to check the uses of the condition register in order to reject
+    // non-equality comparisons.
+    for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
+         IE = MRI->use_end(); I != IE; ++I) {
+      MachineInstr *UseMI = &*I;
+      if (UseMI->getOpcode() == PPC::BCC) {
+        unsigned Pred = UseMI->getOperand(0).getImm();
+        if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE)
+          continue;
+
+        return false;
+      } else if (UseMI->getOpcode() == PPC::ISEL ||
+                 UseMI->getOpcode() == PPC::ISEL8) {
+        unsigned SubIdx = UseMI->getOperand(3).getSubReg();
+        if (SubIdx == PPC::sub_eq)
+          continue;
+
+        return false;
+      } else
+        return false;
+    }
+  }
+
+  // Get ready to iterate backward from CmpInstr.
+  MachineBasicBlock::iterator I = CmpInstr, E = MI,
+                              B = CmpInstr->getParent()->begin();
+
+  // Scan forward to find the first use of the compare.
+  for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
+       I != EL; ++I) {
+    bool FoundUse = false;
+    for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg),
+         JE = MRI->use_end(); J != JE; ++J)
+      if (&*J == &*I) {
+        FoundUse = true;
+        break;
+      }
+
+    if (FoundUse)
+      break;
+  }
+
+  // Early exit if we're at the beginning of the BB.
+  if (I == B) return false;
+
+  // There are two possible candidates which can be changed to set CR[01].
+  // One is MI, the other is a SUB instruction.
+  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+  MachineInstr *Sub = NULL;
+  if (SrcReg2 != 0)
+    // MI is not a candidate for CMPrr.
+    MI = NULL;
+  // FIXME: Conservatively refuse to convert an instruction which isn't in the
+  // same BB as the comparison. This is to allow the check below to avoid calls
+  // (and other explicit clobbers); instead we should really check for these
+  // more explicitly (in at least a few predecessors).
+  else if (MI->getParent() != CmpInstr->getParent() || Value != 0) {
+    // PPC does not have a record-form SUBri.
+    return false;
+  }
+
+  // Search for Sub.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  --I;
+  for (; I != E && !noSub; --I) {
+    const MachineInstr &Instr = *I;
+    unsigned IOpC = Instr.getOpcode();
+
+    if (&*I != CmpInstr && (
+        Instr.modifiesRegister(CRRecReg, TRI) ||
+        Instr.readsRegister(CRRecReg, TRI)))
+      // This instruction modifies or uses the record condition register after
+      // the one we want to change. While we could do this transformation, it
+      // would likely not be profitable. This transformation removes one
+      // instruction, and so even forcing RA to generate one move probably
+      // makes it unprofitable.
+      return false;
+
+    // Check whether CmpInstr can be made redundant by the current instruction.
+    if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
+         OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
+        (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
+        ((Instr.getOperand(1).getReg() == SrcReg &&
+          Instr.getOperand(2).getReg() == SrcReg2) ||
+        (Instr.getOperand(1).getReg() == SrcReg2 &&
+         Instr.getOperand(2).getReg() == SrcReg))) {
+      Sub = &*I;
+      break;
+    }
+
+    if (isFP && (IOpC == PPC::FSUB || IOpC == PPC::FSUBS) &&
+        ((Instr.getOperand(1).getReg() == SrcReg &&
+          Instr.getOperand(2).getReg() == SrcReg2) ||
+        (Instr.getOperand(1).getReg() == SrcReg2 &&
+         Instr.getOperand(2).getReg() == SrcReg))) {
+      Sub = &*I;
+      break;
+    }
+
+    if (I == B)
+      // The 'and' is below the comparison instruction.
+      return false;
+  }
+
+  // Return false if no candidates exist.
+  if (!MI && !Sub)
+    return false;
+
+  // The single candidate is called MI.
+  if (!MI) MI = Sub;
+
+  int NewOpC = -1;
+  MIOpC = MI->getOpcode();
+  if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+    NewOpC = MIOpC;
+  else {
+    NewOpC = PPC::getRecordFormOpcode(MIOpC);
+    if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
+      NewOpC = MIOpC;
+  }
+
+  // FIXME: On the non-embedded POWER architectures, only some of the record
+  // forms are fast, and we should use only the fast ones.
+
+  // The defining instruction has a record form (or is already a record
+  // form). It is possible, however, that we'll need to reverse the condition
+  // code of the users.
+  if (NewOpC == -1)
+    return false;
+
+  SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
+  SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
+
+  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
+  // needs to be updated to be based on SUB.  Push the condition code
+  // operands to OperandsToUpdate.  If it is safe to remove CmpInstr, the
+  // condition code of these operands will be modified.
+  bool ShouldSwap = false;
+  if (Sub) {
+    ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+      Sub->getOperand(2).getReg() == SrcReg;
+
+    // The operands to subf are the opposite of sub, so only in the fixed-point
+    // case, invert the order.
+    if (!isFP)
+      ShouldSwap = !ShouldSwap;
+  }
+
+  if (ShouldSwap)
+    for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
+         IE = MRI->use_end(); I != IE; ++I) {
+      MachineInstr *UseMI = &*I;
+      if (UseMI->getOpcode() == PPC::BCC) {
+        PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
+        assert((!equalityOnly ||
+                Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
+               "Invalid predicate for equality-only optimization");
+        PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)),
+                                getSwappedPredicate(Pred)));
+      } else if (UseMI->getOpcode() == PPC::ISEL ||
+                 UseMI->getOpcode() == PPC::ISEL8) {
+        unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
+        assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
+               "Invalid CR bit for equality-only optimization");
+
+        if (NewSubReg == PPC::sub_lt)
+          NewSubReg = PPC::sub_gt;
+        else if (NewSubReg == PPC::sub_gt)
+          NewSubReg = PPC::sub_lt;
+
+        SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)),
+                                                 NewSubReg));
+      } else // We need to abort on a user we don't understand.
+        return false;
+    }
+
+  // Create a new virtual register to hold the value of the CR set by the
+  // record-form instruction. If the instruction was not previously in
+  // record form, then set the kill flag on the CR.
+  CmpInstr->eraseFromParent();
+
+  MachineBasicBlock::iterator MII = MI;
+  BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(),
+          get(TargetOpcode::COPY), CRReg)
+    .addReg(CRRecReg, MIOpC != NewOpC ? RegState::Kill : 0);
+
+  if (MIOpC != NewOpC) {
+    // We need to be careful here: we're replacing one instruction with
+    // another, and we need to make sure that we get all of the right
+    // implicit uses and defs. On the other hand, the caller may be holding
+    // an iterator to this instruction, and so we can't delete it (this is
+    // specifically the case if this is the instruction directly after the
+    // compare).
+
+    const MCInstrDesc &NewDesc = get(NewOpC);
+    MI->setDesc(NewDesc);
+
+    if (NewDesc.ImplicitDefs)
+      for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
+           *ImpDefs; ++ImpDefs)
+        if (!MI->definesRegister(*ImpDefs))
+          MI->addOperand(*MI->getParent()->getParent(),
+                         MachineOperand::CreateReg(*ImpDefs, true, true));
+    if (NewDesc.ImplicitUses)
+      for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
+           *ImpUses; ++ImpUses)
+        if (!MI->readsRegister(*ImpUses))
+          MI->addOperand(*MI->getParent()->getParent(),
+                         MachineOperand::CreateReg(*ImpUses, false, true));
+  }
+
+  // Modify the condition code of operands in OperandsToUpdate.
+  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+  for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
+    PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
+
+  for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
+    SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
+
+  return true;
+}
+
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
 ///
@@ -714,10 +1400,159 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case PPC::GC_LABEL:
   case PPC::DBG_VALUE:
     return 0;
-  case PPC::BL8_NOP_ELF:
-  case PPC::BLA8_NOP_ELF:
+  case PPC::BL8_NOP:
+  case PPC::BLA8_NOP:
     return 8;
   default:
     return 4; // PowerPC instructions are all 4 bytes
   }
 }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-early-ret"
+STATISTIC(NumBCLR, "Number of early conditional returns");
+STATISTIC(NumBLR,  "Number of early returns");
+
+namespace llvm {
+  void initializePPCEarlyReturnPass(PassRegistry&);
+}
+
+namespace {
+  // PPCEarlyReturn pass - For simple functions without epilogue code, move
+  // returns up, and create conditional returns, to avoid unnecessary
+  // branch-to-blr sequences.
+  struct PPCEarlyReturn : public MachineFunctionPass {
+    static char ID;
+    PPCEarlyReturn() : MachineFunctionPass(ID) {
+      initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
+    }
+
+    const PPCTargetMachine *TM;
+    const PPCInstrInfo *TII;
+
+protected:
+    bool processBlock(MachineBasicBlock &ReturnMBB) {
+      bool Changed = false;
+
+      MachineBasicBlock::iterator I = ReturnMBB.begin();
+      I = ReturnMBB.SkipPHIsAndLabels(I);
+
+      // The block must be essentially empty except for the blr.
+      if (I == ReturnMBB.end() || I->getOpcode() != PPC::BLR ||
+          I != ReturnMBB.getLastNonDebugInstr())
+        return Changed;
+
+      SmallVector<MachineBasicBlock*, 8> PredToRemove;
+      for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
+           PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+        bool OtherReference = false, BlockChanged = false;
+        for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
+          if (J->getOpcode() == PPC::B) {
+            if (J->getOperand(0).getMBB() == &ReturnMBB) {
+              // This is an unconditional branch to the return. Replace the
+	      // branch with a blr.
+              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBLR;
+              continue;
+            }
+          } else if (J->getOpcode() == PPC::BCC) {
+            if (J->getOperand(2).getMBB() == &ReturnMBB) {
+              // This is a conditional branch to the return. Replace the branch
+              // with a bclr.
+              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
+                .addImm(J->getOperand(0).getImm())
+                .addReg(J->getOperand(1).getReg());
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBCLR;
+              continue;
+            }
+          } else if (J->isBranch()) {
+            if (J->isIndirectBranch()) {
+              if (ReturnMBB.hasAddressTaken())
+                OtherReference = true;
+            } else
+              for (unsigned i = 0; i < J->getNumOperands(); ++i)
+                if (J->getOperand(i).isMBB() &&
+                    J->getOperand(i).getMBB() == &ReturnMBB)
+                  OtherReference = true;
+          } else if (!J->isTerminator() && !J->isDebugValue())
+            break;
+
+          if (J == (*PI)->begin())
+            break;
+
+          --J;
+        }
+
+        if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+          OtherReference = true;
+
+	// Predecessors are stored in a vector and can't be removed here.
+        if (!OtherReference && BlockChanged) {
+          PredToRemove.push_back(*PI);
+        }
+
+        if (BlockChanged)
+          Changed = true;
+      }
+
+      for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
+        PredToRemove[i]->removeSuccessor(&ReturnMBB);
+
+      if (Changed && !ReturnMBB.hasAddressTaken()) {
+        // We now might be able to merge this blr-only block into its
+        // by-layout predecessor.
+        if (ReturnMBB.pred_size() == 1 &&
+            (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
+          // Move the blr into the preceding block.
+          MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
+          PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
+          PrevMBB.removeSuccessor(&ReturnMBB);
+        }
+
+        if (ReturnMBB.pred_empty())
+          ReturnMBB.eraseFromParent();
+      }
+
+      return Changed;
+    }
+
+public:
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+      TII = TM->getInstrInfo();
+
+      bool Changed = false;
+
+      // If the function does not have at least two blocks, then there is
+      // nothing to do.
+      if (MF.size() < 2)
+        return Changed;
+
+      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+        MachineBasicBlock &B = *I++; 
+        if (processBlock(B))
+          Changed = true;
+      }
+
+      return Changed;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
+                "PowerPC Early-Return Creation", false, false)
+
+char PPCEarlyReturn::ID = 0;
+FunctionPass*
+llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
+
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 5d4ae915b451..34a1a73a1804 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -72,12 +72,12 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                            unsigned SrcReg, bool isKill, int FrameIdx,
                            const TargetRegisterClass *RC,
                            SmallVectorImpl<MachineInstr*> &NewMIs,
-                           bool &NonRI) const;
+                           bool &NonRI, bool &SpillsVRS) const;
   bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                             unsigned DestReg, int FrameIdx,
                             const TargetRegisterClass *RC,
                             SmallVectorImpl<MachineInstr*> &NewMIs,
-                            bool &NonRI) const;
+                            bool &NonRI, bool &SpillsVRS) const;
 public:
   explicit PPCInstrInfo(PPCTargetMachine &TM);
 
@@ -120,6 +120,17 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
+
+  // Select analysis.
+  virtual bool canInsertSelect(const MachineBasicBlock&,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               unsigned, unsigned, int&, int&, int&) const;
+  virtual void insertSelect(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI, DebugLoc DL,
+                            unsigned DstReg,
+                            const SmallVectorImpl<MachineOperand> &Cond,
+                            unsigned TrueReg, unsigned FalseReg) const;
+
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
@@ -146,6 +157,66 @@ class PPCInstrInfo : public PPCGenInstrInfo {
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
+  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const;
+
+  // If conversion by predication (only supported by some branch instructions).
+  // All of the profitability checks always return true; it is always
+  // profitable to use the predicated branches.
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                   unsigned NumCycles, unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const {
+    return true;
+  }
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumT, unsigned ExtraT,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumF, unsigned ExtraF,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCycles,
+                                         const BranchProbability
+                                         &Probability) const {
+    return true;
+  }
+
+  virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                         MachineBasicBlock &FMBB) const {
+    return false;
+  }
+
+  // Predication support.
+  bool isPredicated(const MachineInstr *MI) const;
+
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  virtual bool isPredicable(MachineInstr *MI) const;
+
+  // Comparison optimization.
+
+
+  virtual bool analyzeCompare(const MachineInstr *MI,
+                              unsigned &SrcReg, unsigned &SrcReg2,
+                              int &Mask, int &Value) const;
+
+  virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
+                                    unsigned SrcReg, unsigned SrcReg2,
+                                    int Mask, int Value,
+                                    const MachineRegisterInfo *MRI) const;
+
   /// GetInstSize - Return the number of bytes of code the specified
   /// instruction may be.  This returns the maximum number of bytes.
   ///
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 460e94342dc8..d3d96ea877ca 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -20,6 +20,10 @@ include "PPCInstrFormats.td"
 def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
   SDTCisVT<0, f64>, SDTCisPtrTy<1>
 ]>;
+def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
 def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
 def SDT_PPCCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                          SDTCisVT<1, i32> ]>;
@@ -36,10 +40,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
 ]>;
 
 def SDT_PPClbrx : SDTypeProfile<1, 2, [
-  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;
 def SDT_PPCstbrx : SDTypeProfile<0, 3, [
-  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;
 
 def SDT_PPClarx : SDTypeProfile<1, 1, [
@@ -53,32 +57,36 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
   SDTCisPtrTy<0>, SDTCisVT<1, i32>
 ]>;
 
-def SDT_PPCnop : SDTypeProfile<0, 0, []>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
 
-def PPCfcfid  : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfre    : SDNode<"PPCISD::FRE",     SDTFPUnaryOp, []>;
+def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+
+def PPCfcfid  : SDNode<"PPCISD::FCFID",   SDTFPUnaryOp, []>;
+def PPCfcfidu : SDNode<"PPCISD::FCFIDU",  SDTFPUnaryOp, []>;
+def PPCfcfids : SDNode<"PPCISD::FCFIDS",  SDTFPRoundOp, []>;
+def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
 def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
 def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
+def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
 def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
                        [SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
+                       [SDNPHasChain, SDNPMayLoad]>;
+def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
+                       [SDNPHasChain, SDNPMayLoad]>;
+
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs   : SDNode<"PPCISD::MFFS",
+                       SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
 
-// This sequence is used for long double->int conversions.  It changes the
-// bits in the FPSCR which is not modelled.  
-def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
-                        [SDNPOutGlue]>;
-def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
-                       [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
-                        SDTCisVT<3, f64>]>,
-                       [SDNPInGlue]>;
 
 def PPCfsel   : SDNode<"PPCISD::FSEL",  
    // Type constraint for fsel.
@@ -113,10 +121,6 @@ def PPCsrl        : SDNode<"PPCISD::SRL"       , SDTIntShiftOp>;
 def PPCsra        : SDNode<"PPCISD::SRA"       , SDTIntShiftOp>;
 def PPCshl        : SDNode<"PPCISD::SHL"       , SDTIntShiftOp>;
 
-def PPCextsw_32   : SDNode<"PPCISD::EXTSW_32"  , SDTIntUnaryOp>;
-def PPCstd_32     : SDNode<"PPCISD::STD_32"    , SDTStore,
-                           [SDNPHasChain, SDNPMayStore]>;
-
 // These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
                            [SDNPHasChain, SDNPOutGlue]>;
@@ -124,16 +128,12 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
-                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                             SDNPVariadic]>;
-def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
-                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                            SDNPVariadic]>;
-def PPCcall_nop_SVR4  : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
-                               [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                                SDNPVariadic]>;
-def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCcall  : SDNode<"PPCISD::CALL", SDT_PPCCall,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                       SDNPVariadic]>;
+def PPCcall_nop  : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
+                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                           SDNPVariadic]>;
 def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -144,13 +144,9 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
                              SDNPInGlue, SDNPOutGlue]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
-                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                               SDNPVariadic]>;
-
-def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
-                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                             SDNPVariadic]>;
+def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                       SDNPVariadic]>;
 
 def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
                            [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -158,6 +154,14 @@ def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
 def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
                         [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
+def PPCeh_sjlj_setjmp  : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+                                SDTypeProfile<1, 1, [SDTCisInt<0>,
+                                                     SDTCisPtrTy<1>]>,
+                                [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+                                SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+                                [SDNPHasChain, SDNPSideEffect]>;
+
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
 def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
 
@@ -278,15 +282,44 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
   return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
 }], HI16>;
 
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+                            (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+                          (ops node:$val, node:$base, node:$offset),
+                          (pre_store node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+                              (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Flag Definitions.
 
 class isPPC64 { bit PPC64 = 1; }
-class isDOT   {
-  list<Register> Defs = [CR0];
-  bit RC  = 1;
-}
+class isDOT   { bit RC = 1; }
 
 class RegConstraint<string C> {
   string Constraints = C;
@@ -314,9 +347,6 @@ def s16imm  : Operand<i32> {
 def u16imm  : Operand<i32> {
   let PrintMethod = "printU16ImmOperand";
 }
-def s16immX4  : Operand<i32> {   // Multiply imm by 4 before printing.
-  let PrintMethod = "printS16X4ImmOperand";
-}
 def directbrtarget : Operand<OtherVT> {
   let PrintMethod = "printBranchOperand";
   let EncoderMethod = "getDirectBrEncoding";
@@ -344,26 +374,37 @@ def crbitm: Operand<i8> {
   let EncoderMethod = "get_crbitm_encoding";
 }
 // Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def ptr_rc_nor0 : PointerLikeRegClass<1>;
+
+def dispRI : Operand<iPTR>;
+def dispRIX : Operand<iPTR>;
+
 def memri : Operand<iPTR> {
   let PrintMethod = "printMemRegImm";
-  let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
+  let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
   let EncoderMethod = "getMemRIEncoding";
 }
 def memrr : Operand<iPTR> {
   let PrintMethod = "printMemRegReg";
-  let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
+  let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
 }
 def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
   let PrintMethod = "printMemRegImmShifted";
-  let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
+  let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
   let EncoderMethod = "getMemRIXEncoding";
 }
 
-// PowerPC Predicate operand.  20 = (0<<5)|20 = always, CR0 is a dummy reg
-// that doesn't matter.
-def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
-                                     (ops (i32 20), (i32 zero_reg))> {
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+  let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
   let PrintMethod = "printPredicateOperand";
+  let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
 }
 
 // Define PowerPC specific addressing mode.
@@ -372,9 +413,12 @@ def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",    [], []>;
 def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
 def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
 
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr   : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
 /// This is just the offset part of iaddr, used for preinc.
 def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
-def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.
@@ -382,6 +426,238 @@ def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
 def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
 def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
 
+//===----------------------------------------------------------------------===//
+// PowerPC Multiclass Definitions.
+
+multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                     string asmbase, string asmstr, InstrItinClass itin,
+                     list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_11<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_11<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : MForm_2<opcode, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : MForm_2<opcode, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : MDForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : MDForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XSForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XSForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_26<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : XForm_26<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_2<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_2<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_3<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_3<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Definitions.
 
@@ -401,17 +677,22 @@ def UPDATE_VRSAVE    : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
 
 let Defs = [R1], Uses = [R1] in
 def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
-                       [(set GPRC:$result,
-                             (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+                       [(set i32:$result,
+                             (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
                          
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
 let usesCustomInserter = 1,    // Expanded after instruction selection.
     PPC970_Single = 1 in {
-  def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+  // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
+  // because either operand might become the first operand in an isel, and
+  // that operand cannot be r0.
+  def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
+                              GPRC_NOR0:$T, GPRC_NOR0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I4",
                               []>;
-  def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+  def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
+                              G8RC_NOX0:$T, G8RC_NOX0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I8",
                               []>;
   def SELECT_CC_F4  : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
@@ -438,12 +719,16 @@ def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
                      "#RESTORE_CR", []>;
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
-  let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in
-    def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
-                          "b${p:cc}lr ${p:reg}", BrB, 
-                          [(retflag)]>;
-  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+  let isReturn = 1, Uses = [LR, RM] in
+    def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
+                           [(retflag)]>;
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
     def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+                            "b${cond:cc}ctr ${cond:reg}", BrB, []>;
+  }
 }
 
 let Defs = [LR] in
@@ -460,10 +745,21 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   // BCC represents an arbitrary conditional branch on a predicate.
   // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
   // a two-value operand where a dag node expects two operands. :(
-  let isCodeGenOnly = 1 in
+  let isCodeGenOnly = 1 in {
     def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
                     "b${cond:cc} ${cond:reg}, $dst"
                     /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+    let isReturn = 1, Uses = [LR, RM] in
+    def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+                           "b${cond:cc}lr ${cond:reg}", BrB, []>;
+
+    let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
+      def BDZLR  : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+                             "bdzlr", BrB, []>;
+      def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+                             "bdnzlr", BrB, []>;
+    }
+  }
 
   let Defs = [CTR], Uses = [CTR] in {
     def BDZ  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
@@ -473,46 +769,33 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   }
 }
 
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
-  // Convenient aliases for call instructions
-  let Uses = [RM] in {
-    def BL_Darwin  : IForm<18, 0, 1,
-                           (outs), (ins calltarget:$func), 
-                           "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA_Darwin : IForm<18, 1, 1, 
-                          (outs), (ins aaddr:$func),
-                          "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
-  }
-  let Uses = [CTR, RM] in {
-    def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
-                                  (outs), (ins),
-                                  "bctrl", BrB,
-                                  [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+// The unconditional BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+  let Defs = [LR], Uses = [RM] in {
+    def BCLalways  : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+                            "bcl 20, 31, $dst">;
   }
 }
 
-// SVR4 ABI Calls.
 let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL_SVR4  : IForm<18, 0, 1,
-                        (outs), (ins calltarget:$func), 
-                        "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA_SVR4 : IForm<18, 1, 1,
-                        (outs), (ins aaddr:$func),
-                        "bla $func", BrB,
-                        [(PPCcall_SVR4 (i32 imm:$func))]>;
+    def BL  : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+                    "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+                    "bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
   }
   let Uses = [CTR, RM] in {
-    def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
-                                (outs), (ins),
-                                "bctrl", BrB,
-                                [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+    def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+                             "bctrl", BrB, [(PPCbctrl)]>,
+                Requires<[In32BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+                             "b${cond:cc}ctrl ${cond:reg}", BrB, []>;
   }
 }
 
-
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
 def TCRETURNdi :Pseudo< (outs),
                         (ins calltarget:$dst, i32imm:$offset),
@@ -531,6 +814,8 @@ def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
                  []>;
 
 
+let isCodeGenOnly = 1 in {
+
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
     isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM]  in
 def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
@@ -544,6 +829,7 @@ def TAILB   : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
                   "b $dst", BrB,
                   []>;
 
+}
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
     isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
@@ -551,6 +837,22 @@ def TAILBA   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
                   "ba $dst", BrB,
                   []>;
 
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+  def EH_SjLj_SetJmp32  : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+                            "#EH_SJLJ_SETJMP32",
+                            [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+                          Requires<[In32BitMode]>;
+  let isTerminator = 1 in
+  def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+                            "#EH_SJLJ_LONGJMP32",
+                            [(PPCeh_sjlj_longjmp addr:$buf)]>,
+                          Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+                        "#EH_SjLj_Setup\t$dst", []>;
+}
 
 // DCB* instructions.
 def DCBA   : DCB_Form<758, 0, (outs), (ins memrr:$dst),
@@ -586,93 +888,90 @@ let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
-      [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
-      [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
-      [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
-      [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
-      [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
-      [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_ADD_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
-      [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
-      [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
-      [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
-      [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
-      [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
-      [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_ADD_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
-      [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
-      [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
-      [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
-      [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
-      [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
-      [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
-      [(set GPRC:$dst, 
-                    (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
     def ATOMIC_CMP_SWAP_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
-      [(set GPRC:$dst, 
-                    (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
     def ATOMIC_CMP_SWAP_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
-      [(set GPRC:$dst, 
-                    (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
 
     def ATOMIC_SWAP_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
-      [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
     def ATOMIC_SWAP_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
-      [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
     def ATOMIC_SWAP_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
-      [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
   }
 }
 
 // Instructions to support atomic operations
 def LWARX : XForm_1<31,  20, (outs GPRC:$rD), (ins memrr:$src),
                    "lwarx $rD, $src", LdStLWARX,
-                   [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+                   [(set i32:$rD, (PPClarx xoaddr:$src))]>;
 
 let Defs = [CR0] in
 def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stwcx. $rS, $dst", LdStSTWCX,
-                   [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+                   [(PPCstcx i32:$rS, xoaddr:$dst)]>,
                    isDOT;
 
 let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
@@ -686,94 +985,94 @@ def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
                   "lbz $rD, $src", LdStLoad,
-                  [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+                  [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
 def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
-                  [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+                  [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
 def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
                   "lhz $rD, $src", LdStLoad,
-                  [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+                  [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
 def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
                   "lwz $rD, $src", LdStLoad,
-                  [(set GPRC:$rD, (load iaddr:$src))]>;
+                  [(set i32:$rD, (load iaddr:$src))]>;
 
 def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
                   "lfs $rD, $src", LdStLFD,
-                  [(set F4RC:$rD, (load iaddr:$src))]>;
+                  [(set f32:$rD, (load iaddr:$src))]>;
 def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
                   "lfd $rD, $src", LdStLFD,
-                  [(set F8RC:$rD, (load iaddr:$src))]>;
+                  [(set f64:$rD, (load iaddr:$src))]>;
 
 
 // Unindexed (r+i) Loads with Update (preinc).
-let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lbzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lhau $rD, $addr", LdStLHAU,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lhzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lwzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                   "lfsu $rD, $addr", LdStLFDU,
                   []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                   "lfdu $rD, $addr", LdStLFDU,
                   []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
 
 // Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lbzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhaux $rD, $addr", LdStLHAU,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lwzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lfsux $rD, $addr", LdStLFDU,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lfdux $rD, $addr", LdStLFDU,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 }
 }
@@ -783,32 +1082,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZX : XForm_1<31,  87, (outs GPRC:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+                   [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
 def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
-                   [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+                   [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
 def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
                    "lhzx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+                   [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
 def LWZX : XForm_1<31,  23, (outs GPRC:$rD), (ins memrr:$src),
                    "lwzx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (load xaddr:$src))]>;
+                   [(set i32:$rD, (load xaddr:$src))]>;
                    
                    
 def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
                    "lhbrx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+                   [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
 def LWBRX : XForm_1<31,  534, (outs GPRC:$rD), (ins memrr:$src),
                    "lwbrx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+                   [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
 
 def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
                       "lfsx $frD, $src", LdStLFD,
-                      [(set F4RC:$frD, (load xaddr:$src))]>;
+                      [(set f32:$frD, (load xaddr:$src))]>;
 def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
                       "lfdx $frD, $src", LdStLFD,
-                      [(set F8RC:$frD, (load xaddr:$src))]>;
+                      [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+                      "lfiwax $frD, $src", LdStLFD,
+                      [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
+def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+                      "lfiwzx $frD, $src", LdStLFD,
+                      [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -819,137 +1125,128 @@ def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
 let PPC970_Unit = 2 in {
 def STB  : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
                    "stb $rS, $src", LdStStore,
-                   [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+                   [(truncstorei8 i32:$rS, iaddr:$src)]>;
 def STH  : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
                    "sth $rS, $src", LdStStore,
-                   [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+                   [(truncstorei16 i32:$rS, iaddr:$src)]>;
 def STW  : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
                    "stw $rS, $src", LdStStore,
-                   [(store GPRC:$rS, iaddr:$src)]>;
+                   [(store i32:$rS, iaddr:$src)]>;
 def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
                    "stfs $rS, $dst", LdStSTFD,
-                   [(store F4RC:$rS, iaddr:$dst)]>;
+                   [(store f32:$rS, iaddr:$dst)]>;
 def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
                    "stfd $rS, $dst", LdStSTFD,
-                   [(store F8RC:$rS, iaddr:$dst)]>;
+                   [(store f64:$rS, iaddr:$dst)]>;
 }
 
 // Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2 in {
-def STBU  : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                          (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, 
-                                         iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU  : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                        (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, 
-                                        iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU  : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, 
-                                                     iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
-                    [(set ptr_rc:$ea_res, (pre_store F4RC:$rS,  ptr_rc:$ptrreg, 
-                                          iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
-                    [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, 
-                                          iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU  : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+                    "stbu $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU  : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+                    "sthu $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU  : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+                    "stwu $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+                    "stfsu $rS, $dst", LdStSTFDU, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+                    "stfdu $rS, $dst", LdStSTFDU, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 }
 
+// Patterns to match the pre-inc stores.  We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STBU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STHU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STWU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
 
 // Indexed (r+r) Stores.
-//
 let PPC970_Unit = 2 in {
 def STBX  : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stbx $rS, $dst", LdStStore,
-                   [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, 
+                   [(truncstorei8 i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 def STHX  : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
                    "sthx $rS, $dst", LdStStore,
-                   [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, 
+                   [(truncstorei16 i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 def STWX  : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stwx $rS, $dst", LdStStore,
-                   [(store GPRC:$rS, xaddr:$dst)]>,
+                   [(store i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
  
-def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
-                             (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                   "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                   [(set ptr_rc:$ea_res,
-                      (pre_truncsti8 GPRC:$rS,
-                                     ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                   RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                   PPC970_DGroup_Cracked;
- 
-def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
-                             (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                   "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                   [(set ptr_rc:$ea_res,
-                      (pre_truncsti16 GPRC:$rS,
-                                      ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                   RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                   PPC970_DGroup_Cracked;
-                 
-def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
-                             (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                   "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                   [(set ptr_rc:$ea_res,
-                      (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                   RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                   PPC970_DGroup_Cracked;
-
-def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
-                              (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
-                    [(set ptr_rc:$ea_res,
-                       (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                    PPC970_DGroup_Cracked;
-
-def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
-                              (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
-                    [(set ptr_rc:$ea_res,
-                       (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                    PPC970_DGroup_Cracked;
-
 def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
                    "sthbrx $rS, $dst", LdStStore,
-                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, 
+                   [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
                    PPC970_DGroup_Cracked;
 def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stwbrx $rS, $dst", LdStStore,
-                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+                   [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
                    PPC970_DGroup_Cracked;
 
 def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
                      "stfiwx $frS, $dst", LdStSTFD,
-                     [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+                     [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
                      
 def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
                      "stfsx $frS, $dst", LdStSTFD,
-                     [(store F4RC:$frS, xaddr:$dst)]>;
+                     [(store f32:$frS, xaddr:$dst)]>;
 def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
                      "stfdx $frS, $dst", LdStSTFD,
-                     [(store F8RC:$frS, xaddr:$dst)]>;
+                     [(store f64:$frS, xaddr:$dst)]>;
 }
 
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+                    "stbux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+                    "sthux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+                    "stwux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+                    "stfsux $rS, $dst", LdStSTFDU, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+                    "stfdux $rS, $dst", LdStSTFDU, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+}
+
+// Patterns to match the pre-inc stores.  We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STBUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STHUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STWUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STFDUX $rS, $ptrreg, $ptroff)>;
+
 def SYNC : XForm_24_sync<31, 598, (outs), (ins),
                         "sync", LdStSync,
                         [(int_ppc_sync)]>;
@@ -959,157 +1256,206 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
 //
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
-                     [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
-def ADDIL  : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
-                     "addi $rD, $rA, $imm", IntSimple,
-                     [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
-let Defs = [CARRY] in {
+                     [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
+let BaseName = "addic" in {
+let Defs = [CARRY] in
 def ADDIC  : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
-                     [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
-                     PPC970_DGroup_Cracked;
+                     [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
+                     RecFormRel, PPC970_DGroup_Cracked;
+let Defs = [CARRY, CR0] in
 def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "addic. $rD, $rA, $imm", IntGeneral,
-                     []>;
+                     []>, isDOT, RecFormRel;
 }
-def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
-                     [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
-def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+                     [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+let isCodeGenOnly = 1 in
+def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
                      "la $rD, $sym($rA)", IntGeneral,
-                     [(set GPRC:$rD, (add GPRC:$rA,
+                     [(set i32:$rD, (add i32:$rA,
                                           (PPClo tglobaladdr:$sym, 0)))]>;
 def MULLI  : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "mulli $rD, $rA, $imm", IntMulLI,
-                     [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
-let Defs = [CARRY] in {
+                     [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in
 def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
-                     [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
-}
+                     [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
   def LI  : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
                        "li $rD, $imm", IntSimple,
-                       [(set GPRC:$rD, immSExt16:$imm)]>;
+                       [(set i32:$rD, immSExt16:$imm)]>;
   def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
                        "lis $rD, $imm", IntSimple,
-                       [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+                       [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
 }
 }
 
 let PPC970_Unit = 1 in {  // FXU Operations.
+let Defs = [CR0] in {
 def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "andi. $dst, $src1, $src2", IntGeneral,
-                    [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+                    [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
                     isDOT;
 def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "andis. $dst, $src1, $src2", IntGeneral,
-                    [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+                    [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
                     isDOT;
+}
 def ORI   : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "ori $dst, $src1, $src2", IntSimple,
-                    [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+                    [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
 def ORIS  : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "oris $dst, $src1, $src2", IntSimple,
-                    [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+                    [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
 def XORI  : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "xori $dst, $src1, $src2", IntSimple,
-                    [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+                    [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
 def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "xoris $dst, $src1, $src2", IntSimple,
-                    [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+                    [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
 def NOP   : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
                          []>;
-def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
-                        "cmpwi $crD, $rA, $imm", IntCompare>;
-def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
-                         "cmplwi $dst, $src1, $src2", IntCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
+                          "cmpwi $crD, $rA, $imm", IntCompare>;
+  def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                           "cmplwi $dst, $src1, $src2", IntCompare>;
+}
 }
 
+let PPC970_Unit = 1, neverHasSideEffects = 1 in {  // FXU Operations.
+defm NAND : XForm_6r<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "nand", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
+defm AND  : XForm_6r<31,  28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "and", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+defm ANDC : XForm_6r<31,  60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "andc", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+defm OR   : XForm_6r<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "or", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
+defm NOR  : XForm_6r<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "nor", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+defm ORC  : XForm_6r<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "orc", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+defm EQV  : XForm_6r<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "eqv", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
+defm XOR  : XForm_6r<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "xor", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+defm SLW  : XForm_6r<31,  24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "slw", "$rA, $rS, $rB", IntGeneral,
+                     [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
+defm SRW  : XForm_6r<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                     "srw", "$rA, $rS, $rB", IntGeneral,
+                     [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
+defm SRAW : XForm_6rc<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                      "sraw", "$rA, $rS, $rB", IntShift,
+                      [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
+}
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "nand $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
-def AND  : XForm_6<31,  28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "and $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
-def ANDC : XForm_6<31,  60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "andc $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
-def OR   : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "or $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
-def NOR  : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "nor $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
-def ORC  : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "orc $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
-def EQV  : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "eqv $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
-def XOR  : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "xor $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
-def SLW  : XForm_6<31,  24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "slw $rA, $rS, $rB", IntGeneral,
-                   [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
-def SRW  : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "srw $rA, $rS, $rB", IntGeneral,
-                   [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
-let Defs = [CARRY] in {
-def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "sraw $rA, $rS, $rB", IntShift,
-                   [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+let neverHasSideEffects = 1 in {
+defm SRAWI : XForm_10rc<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), 
+                        "srawi", "$rA, $rS, $SH", IntShift,
+                        [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
+defm CNTLZW : XForm_11r<31,  26, (outs GPRC:$rA), (ins GPRC:$rS),
+                        "cntlzw", "$rA, $rS", IntGeneral,
+                        [(set i32:$rA, (ctlz i32:$rS))]>;
+defm EXTSB  : XForm_11r<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
+                        "extsb", "$rA, $rS", IntSimple,
+                        [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
+defm EXTSH  : XForm_11r<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
+                        "extsh", "$rA, $rS", IntSimple,
+                        [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
 }
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPW   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+                            "cmpw $crD, $rA, $rB", IntCompare>;
+  def CMPLW  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+                            "cmplw $crD, $rA, $rB", IntCompare>;
 }
-
-let PPC970_Unit = 1 in {  // FXU Operations.
-let Defs = [CARRY] in {
-def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), 
-                     "srawi $rA, $rS, $SH", IntShift,
-                     [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
-}
-def CNTLZW : XForm_11<31,  26, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "cntlzw $rA, $rS", IntGeneral,
-                      [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
-def EXTSB  : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "extsb $rA, $rS", IntSimple,
-                      [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
-def EXTSH  : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "extsh $rA, $rS", IntSimple,
-                      [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
-
-def CMPW   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
-                          "cmpw $crD, $rA, $rB", IntCompare>;
-def CMPLW  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
-                          "cmplw $crD, $rA, $rB", IntCompare>;
 }
 let PPC970_Unit = 3 in {  // FPU Operations.
 //def FCMPO  : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
 //                      "fcmpo $crD, $fA, $fB", FPCompare>;
-def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
-                      "fcmpu $crD, $fA, $fB", FPCompare>;
-def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
-                      "fcmpu $crD, $fA, $fB", FPCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
+                        "fcmpu $crD, $fA, $fB", FPCompare>;
+  def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
+                        "fcmpu $crD, $fA, $fB", FPCompare>;
+}
 
 let Uses = [RM] in {
-  def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "fctiwz $frD, $frB", FPGeneral,
-                        [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
-  def FRSP   : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
-                        "frsp $frD, $frB", FPGeneral,
-                        [(set F4RC:$frD, (fround F8RC:$frB))]>;
-  def FSQRT  : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "fsqrt $frD, $frB", FPSqrt,
-                        [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
-  def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "fsqrts $frD, $frB", FPSqrt,
-                        [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+  let neverHasSideEffects = 1 in {
+  defm FCTIWZ : XForm_26r<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
+                          "fctiwz", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
+  defm FRSP   : XForm_26r<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
+                          "frsp", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fround f64:$frB))]>;
+
+  // The frin -> nearbyint mapping is valid only in fast-math mode.
+  let Interpretation64Bit = 1 in
+  defm FRIND  : XForm_26r<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
+                          "frin", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (fnearbyint f64:$frB))]>;
+  defm FRINS  : XForm_26r<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
+                          "frin", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fnearbyint f32:$frB))]>;
+  }
+
+  // These pseudos expand to rint but also set FE_INEXACT when the result does
+  // not equal the argument.
+  let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
+    def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+                            "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
+    def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+                            "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+  }
+
+  let neverHasSideEffects = 1 in {
+  let Interpretation64Bit = 1 in
+  defm FRIPD  : XForm_26r<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
+                          "frip", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (fceil f64:$frB))]>;
+  defm FRIPS  : XForm_26r<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
+                          "frip", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fceil f32:$frB))]>;
+  let Interpretation64Bit = 1 in
+  defm FRIZD  : XForm_26r<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
+                          "friz", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (ftrunc f64:$frB))]>;
+  defm FRIZS  : XForm_26r<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
+                          "friz", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (ftrunc f32:$frB))]>;
+  let Interpretation64Bit = 1 in
+  defm FRIMD  : XForm_26r<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
+                          "frim", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (ffloor f64:$frB))]>;
+  defm FRIMS  : XForm_26r<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
+                          "frim", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (ffloor f32:$frB))]>;
+
+  defm FSQRT  : XForm_26r<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
+                          "fsqrt", "$frD, $frB", FPSqrt,
+                          [(set f64:$frD, (fsqrt f64:$frB))]>;
+  defm FSQRTS : XForm_26r<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
+                          "fsqrts", "$frD, $frB", FPSqrt,
+                          [(set f32:$frD, (fsqrt f32:$frB))]>;
+  }
   }
 }
 
@@ -1117,36 +1463,54 @@ let Uses = [RM] in {
 /// often coalesced away and we don't want the dispatch group builder to think
 /// that they will fill slots (which could cause the load of a LSU reject to
 /// sneak into a d-group with a store).
-def FMR   : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
-                     "fmr $frD, $frB", FPGeneral,
-                     []>,  // (set F4RC:$frD, F4RC:$frB)
-                     PPC970_Unit_Pseudo;
+let neverHasSideEffects = 1 in
+defm FMR   : XForm_26r<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+                       "fmr", "$frD, $frB", FPGeneral,
+                       []>,  // (set f32:$frD, f32:$frB)
+                       PPC970_Unit_Pseudo;
 
-let PPC970_Unit = 3 in {  // FPU Operations.
+let PPC970_Unit = 3, neverHasSideEffects = 1 in {  // FPU Operations.
 // These are artificially split into two different forms, for 4/8 byte FP.
-def FABSS  : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fabs $frD, $frB", FPGeneral,
-                      [(set F4RC:$frD, (fabs F4RC:$frB))]>;
-def FABSD  : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fabs $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (fabs F8RC:$frB))]>;
-def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fnabs $frD, $frB", FPGeneral,
-                      [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
-def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fnabs $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
-def FNEGS  : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fneg $frD, $frB", FPGeneral,
-                      [(set F4RC:$frD, (fneg F4RC:$frB))]>;
-def FNEGD  : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fneg $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (fneg F8RC:$frB))]>;
-}
-                      
+defm FABSS  : XForm_26r<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "fabs", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fabs f32:$frB))]>;
+let Interpretation64Bit = 1 in
+defm FABSD  : XForm_26r<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fabs", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fabs f64:$frB))]>;
+defm FNABSS : XForm_26r<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "fnabs", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
+let Interpretation64Bit = 1 in
+defm FNABSD : XForm_26r<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fnabs", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
+defm FNEGS  : XForm_26r<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "fneg", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fneg f32:$frB))]>;
+let Interpretation64Bit = 1 in
+defm FNEGD  : XForm_26r<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fneg", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fneg f64:$frB))]>;
+
+// Reciprocal estimates.
+defm FRE      : XForm_26r<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
+                          "fre", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfre f64:$frB))]>;
+defm FRES     : XForm_26r<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
+                          "fres", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (PPCfre f32:$frB))]>;
+defm FRSQRTE  : XForm_26r<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
+                          "frsqrte", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+defm FRSQRTES : XForm_26r<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
+                          "frsqrtes", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
+}
 
 // XL-Form instructions.  condition register logical ops.
 //
+let neverHasSideEffects = 1 in
 def MCRF   : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
                       "mcrf $BF, $BFA", BrMCR>,
              PPC970_DGroup_First, PPC970_Unit_CRU;
@@ -1161,6 +1525,7 @@ def CROR  : XLForm_1<19, 449, (outs CRBITRC:$CRD),
                       "cror $CRD, $CRA, $CRB", BrCR,
                       []>;
 
+let isCodeGenOnly = 1 in {
 def CRSET  : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
               "creqv $dst, $dst, $dst", BrCR,
               []>;
@@ -1178,6 +1543,7 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
               "crxor 6, 6, 6", BrCR,
               [(PPCcr6unset)]>;
 }
+}
 
 // XFX-Form instructions.  Instructions that deal with SPRs.
 //
@@ -1186,7 +1552,7 @@ def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
                           "mfctr $rT", SprMFSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
-let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
 def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
                           "mtctr $rS", SprMTSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -1213,6 +1579,30 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
                              "mfspr $rT, 256", IntGeneral>,
                PPC970_DGroup_First, PPC970_Unit_FXU;
 
+let isCodeGenOnly = 1 in {
+  def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+                                (outs VRSAVERC:$reg), (ins GPRC:$rS),
+                                "mtspr 256, $rS", IntGeneral>,
+                  PPC970_DGroup_Single, PPC970_Unit_FXU;
+  def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+                                (ins VRSAVERC:$reg),
+                                "mfspr $rT, 256", IntGeneral>,
+                  PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+                     "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+                     "#RESTORE_VRSAVE", []>;
+
+let neverHasSideEffects = 1 in {
 def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1227,215 +1617,205 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
 // instruction to keep the register allocator from becoming confused.
 //
 // FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+let isCodeGenOnly = 1 in
 def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
                        "#MFCRpseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
-            
-def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
-                     "mfcr $rT", SprMFCR>,
-                     PPC970_MicroCode, PPC970_Unit_CRU;
 
 def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
                        "mfocrf $rT, $FXM", SprMFCR>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
+} // neverHasSideEffects = 1
+
+let neverHasSideEffects = 1 in
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
+                     "mfcr $rT", SprMFCR>,
+                     PPC970_MicroCode, PPC970_Unit_CRU;
 
-// Instructions to manipulate FPSCR.  Only long double handling uses these.
-// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+  def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+                      [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
 
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR.  Note that FPSCR is not modeled at the DAG level.
 let Uses = [RM], Defs = [RM] in { 
   def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
-                         "mtfsb0 $FM", IntMTFSB0,
-                        [(PPCmtfsb0 (i32 imm:$FM))]>,
+                        "mtfsb0 $FM", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
   def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
-                         "mtfsb1 $FM", IntMTFSB0,
-                        [(PPCmtfsb1 (i32 imm:$FM))]>,
+                        "mtfsb1 $FM", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
-  // MTFSF does not actually produce an FP result.  We pretend it copies
-  // input reg B to the output.  If we didn't do this it would look like the
-  // instruction had no outputs (because we aren't modelling the FPSCR) and
-  // it would be deleted.
-  def MTFSF  : XFLForm<63, 711, (outs F8RC:$FRA),
-                                (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
-                         "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
-                         [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), 
-                                                     F8RC:$rT, F8RC:$FRB))]>,
+  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+                       "mtfsf $FM, $rT", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 let Uses = [RM] in {
   def MFFS   : XForm_42<63, 583, (outs F8RC:$rT), (ins), 
                          "mffs $rT", IntMFFS,
-                         [(set F8RC:$rT, (PPCmffs))]>,
-               PPC970_DGroup_Single, PPC970_Unit_FPU;
-  def FADDrtz: AForm_2<63, 21,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fadd $FRT, $FRA, $FRB", FPAddSub,
-                      [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+                         [(set f64:$rT, (PPCmffs))]>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 
 
-let PPC970_Unit = 1 in {  // FXU Operations.
-
+let PPC970_Unit = 1, neverHasSideEffects = 1 in {  // FXU Operations.
 // XO-Form instructions.  Arithmetic instructions that can set overflow bit
 //
-def ADD4  : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "add $rT, $rA, $rB", IntSimple,
-                     [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
-let Defs = [CARRY] in {
-def ADDC  : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "addc $rT, $rA, $rB", IntGeneral,
-                     [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
-                     PPC970_DGroup_Cracked;
-}
-def DIVW  : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "divw $rT, $rA, $rB", IntDivW,
-                     [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "divwu $rT, $rA, $rB", IntDivW,
-                     [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mulhw $rT, $rA, $rB", IntMulHW,
-                     [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
-def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mulhwu $rT, $rA, $rB", IntMulHWU,
-                     [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
-def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mullw $rT, $rA, $rB", IntMulHW,
-                     [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
-def SUBF  : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "subf $rT, $rA, $rB", IntGeneral,
-                     [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
-let Defs = [CARRY] in {
-def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "subfc $rT, $rA, $rB", IntGeneral,
-                     [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
-                     PPC970_DGroup_Cracked;
-}
-def NEG    : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "neg $rT, $rA", IntSimple,
-                      [(set GPRC:$rT, (ineg GPRC:$rA))]>;
-let Uses = [CARRY], Defs = [CARRY] in {
-def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                      "adde $rT, $rA, $rB", IntGeneral,
-                      [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
-def ADDME  : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "addme $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
-def ADDZE  : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "addze $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
-def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                      "subfe $rT, $rA, $rB", IntGeneral,
-                      [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
-def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "subfme $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
-def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "subfze $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+defm ADD4  : XOForm_1r<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                       "add", "$rT, $rA, $rB", IntSimple,
+                       [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+defm ADDC  : XOForm_1rc<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                        "addc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
+                        PPC970_DGroup_Cracked;
+defm DIVW  : XOForm_1r<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                       "divw", "$rT, $rA, $rB", IntDivW,
+                       [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVWU : XOForm_1r<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                       "divwu", "$rT, $rA, $rB", IntDivW,
+                       [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm MULHW : XOForm_1r<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                       "mulhw", "$rT, $rA, $rB", IntMulHW,
+                       [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
+defm MULHWU : XOForm_1r<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                       "mulhwu", "$rT, $rA, $rB", IntMulHWU,
+                       [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
+defm MULLW : XOForm_1r<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                       "mullw", "$rT, $rA, $rB", IntMulHW,
+                       [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+defm SUBF  : XOForm_1r<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                       "subf", "$rT, $rA, $rB", IntGeneral,
+                       [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+defm SUBFC : XOForm_1rc<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                        "subfc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
+                        PPC970_DGroup_Cracked;
+defm NEG    : XOForm_3r<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                        "neg", "$rT, $rA", IntSimple,
+                        [(set i32:$rT, (ineg i32:$rA))]>;
+let Uses = [CARRY] in {
+defm ADDE  : XOForm_1rc<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                        "adde", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
+defm ADDME  : XOForm_3rc<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                         "addme", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (adde i32:$rA, -1))]>;
+defm ADDZE  : XOForm_3rc<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                         "addze", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (adde i32:$rA, 0))]>;
+defm SUBFE : XOForm_1rc<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                        "subfe", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
+defm SUBFME : XOForm_3rc<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                         "subfme", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (sube -1, i32:$rA))]>;
+defm SUBFZE : XOForm_3rc<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                         "subfze", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (sube 0, i32:$rA))]>;
 }
 }
 
 // A-Form instructions.  Most of the instructions executed in the FPU are of
 // this type.
 //
-let PPC970_Unit = 3 in {  // FPU Operations.
+let PPC970_Unit = 3, neverHasSideEffects = 1 in {  // FPU Operations.
 let Uses = [RM] in {
-  def FMADD : AForm_1<63, 29, 
+  defm FMADD : AForm_1r<63, 29, 
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT,
-                            (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
-  def FMADDS : AForm_1<59, 29,
+                      "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
+  defm FMADDS : AForm_1r<59, 29,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT,
-                            (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
-  def FMSUB : AForm_1<63, 28,
+                      "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
+  defm FMSUB : AForm_1r<63, 28,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT,
-                            (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
-  def FMSUBS : AForm_1<59, 28,
+                      "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set f64:$FRT,
+                            (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
+  defm FMSUBS : AForm_1r<59, 28,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT,
-                            (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
-  def FNMADD : AForm_1<63, 31,
+                      "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f32:$FRT,
+                            (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
+  defm FNMADD : AForm_1r<63, 31,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT,
-                            (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
-  def FNMADDS : AForm_1<59, 31,
+                      "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set f64:$FRT,
+                            (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
+  defm FNMADDS : AForm_1r<59, 31,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT,
-                            (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
-  def FNMSUB : AForm_1<63, 30,
+                      "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f32:$FRT,
+                            (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
+  defm FNMSUB : AForm_1r<63, 30,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
-                                                  (fneg F8RC:$FRB))))]>;
-  def FNMSUBS : AForm_1<59, 30,
+                      "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+                                                 (fneg f64:$FRB))))]>;
+  defm FNMSUBS : AForm_1r<59, 30,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
-                                                  (fneg F4RC:$FRB))))]>;
+                      "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+                                                 (fneg f32:$FRB))))]>;
 }
 // FSEL is artificially split into 4 and 8-byte forms for the result.  To avoid
 // having 4 of these, force the comparison to always be an 8-byte double (code
 // should use an FMRSD if the input comparison value really wants to be a float)
 // and 4/8 byte forms for the result and operand type..
-def FSELD : AForm_1<63, 23,
-                    (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                    "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                    [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
-def FSELS : AForm_1<63, 23,
-                     (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                     "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                    [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+let Interpretation64Bit = 1 in
+defm FSELD : AForm_1r<63, 23,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
+defm FSELS : AForm_1r<63, 23,
+                      (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
 let Uses = [RM] in {
-  def FADD  : AForm_2<63, 21,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fadd $FRT, $FRA, $FRB", FPAddSub,
-                      [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
-  def FADDS : AForm_2<59, 21,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fadds $FRT, $FRA, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
-  def FDIV  : AForm_2<63, 18,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fdiv $FRT, $FRA, $FRB", FPDivD,
-                      [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
-  def FDIVS : AForm_2<59, 18,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fdivs $FRT, $FRA, $FRB", FPDivS,
-                      [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
-  def FMUL  : AForm_3<63, 25,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
-                      "fmul $FRT, $FRA, $FRC", FPFused,
-                      [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>;
-  def FMULS : AForm_3<59, 25,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
-                      "fmuls $FRT, $FRA, $FRC", FPGeneral,
-                      [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>;
-  def FSUB  : AForm_2<63, 20,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fsub $FRT, $FRA, $FRB", FPAddSub,
-                      [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
-  def FSUBS : AForm_2<59, 20,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fsubs $FRT, $FRA, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+  defm FADD  : AForm_2r<63, 21,
+                        (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                        "fadd", "$FRT, $FRA, $FRB", FPAddSub,
+                        [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+  defm FADDS : AForm_2r<59, 21,
+                        (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                        "fadds", "$FRT, $FRA, $FRB", FPGeneral,
+                        [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+  defm FDIV  : AForm_2r<63, 18,
+                        (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                        "fdiv", "$FRT, $FRA, $FRB", FPDivD,
+                        [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+  defm FDIVS : AForm_2r<59, 18,
+                        (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                        "fdivs", "$FRT, $FRA, $FRB", FPDivS,
+                        [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+  defm FMUL  : AForm_3r<63, 25,
+                        (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
+                        "fmul", "$FRT, $FRA, $FRC", FPFused,
+                        [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+  defm FMULS : AForm_3r<59, 25,
+                        (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
+                        "fmuls", "$FRT, $FRA, $FRC", FPGeneral,
+                        [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+  defm FSUB  : AForm_2r<63, 20,
+                        (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                        "fsub", "$FRT, $FRA, $FRB", FPAddSub,
+                        [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+  defm FSUBS : AForm_2r<59, 20,
+                        (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                        "fsubs", "$FRT, $FRA, $FRB", FPGeneral,
+                        [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
   }
 }
 
+let neverHasSideEffects = 1 in {
 let PPC970_Unit = 1 in {  // FXU Operations.
+  let isSelect = 1 in
   def ISEL  : AForm_4<31, 15,
-                     (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+                     (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
 }
@@ -1445,26 +1825,29 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 //
 let isCommutable = 1 in {
 // RLWIMI can be commuted if the rotate amount is zero.
-def RLWIMI : MForm_2<20,
-                     (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, 
-                      u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
-                      []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
-                      NoEncode<"$rSi">;
+defm RLWIMI : MForm_2r<20, (outs GPRC:$rA),
+                       (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, 
+                       u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate,
+                       []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+                       NoEncode<"$rSi">;
 }
+let BaseName = "rlwinm" in {
 def RLWINM : MForm_2<21,
                      (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
                      "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>;
+                     []>, RecFormRel;
+let Defs = [CR0] in
 def RLWINMo : MForm_2<21,
-                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
-                     "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>, isDOT, PPC970_DGroup_Cracked;
-def RLWNM  : MForm_2<23,
-                     (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
-                     "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
-                     []>;
+                      (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                      "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                      []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
 }
-
+defm RLWNM  : MForm_2r<23, (outs GPRC:$rA),
+                       (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+                       "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral,
+                       []>;
+}
+} // neverHasSideEffects = 1
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Patterns
@@ -1475,47 +1858,43 @@ def : Pat<(i32 imm:$imm),
           (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
 
 // Implement the 'not' operation with the NOR instruction.
-def NOT : Pat<(not GPRC:$in),
-              (NOR GPRC:$in, GPRC:$in)>;
+def NOT : Pat<(not i32:$in),
+              (NOR $in, $in)>;
 
 // ADD an arbitrary immediate.
-def : Pat<(add GPRC:$in, imm:$imm),
-          (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+def : Pat<(add i32:$in, imm:$imm),
+          (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
 // OR an arbitrary immediate.
-def : Pat<(or GPRC:$in, imm:$imm),
-          (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(or i32:$in, imm:$imm),
+          (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
 // XOR an arbitrary immediate.
-def : Pat<(xor GPRC:$in, imm:$imm),
-          (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(xor i32:$in, imm:$imm),
+          (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
 // SUBFIC
-def : Pat<(sub  immSExt16:$imm, GPRC:$in),
-          (SUBFIC GPRC:$in, imm:$imm)>;
+def : Pat<(sub immSExt16:$imm, i32:$in),
+          (SUBFIC $in, imm:$imm)>;
 
 // SHL/SRL
-def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
-          (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
-def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
-          (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+def : Pat<(shl i32:$in, (i32 imm:$imm)),
+          (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl i32:$in, (i32 imm:$imm)),
+          (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
 
 // ROTL
-def : Pat<(rotl GPRC:$in, GPRC:$sh),
-          (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
-def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
-          (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+def : Pat<(rotl i32:$in, i32:$sh),
+          (RLWNM $in, $sh, 0, 31)>;
+def : Pat<(rotl i32:$in, (i32 imm:$imm)),
+          (RLWINM $in, imm:$imm, 0, 31)>;
 
 // RLWNM
-def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
-          (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
+          (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
 
 // Calls
-def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
-          (BL_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
-          (BL_Darwin texternalsym:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
-          (BL_SVR4 tglobaladdr:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
-          (BL_SVR4 texternalsym:$dst)>;
+def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
+          (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i32 texternalsym:$dst)),
+          (BL texternalsym:$dst)>;
 
 
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
@@ -1538,28 +1917,28 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
 def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
 def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
 def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in),
-          (ADDIS GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in),
-          (ADDIL GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
-          (ADDIS GPRC:$in, tglobaladdr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
-          (ADDIS GPRC:$in, tconstpool:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
-          (ADDIS GPRC:$in, tjumptable:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
-          (ADDIS GPRC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
+          (ADDIS $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
+          (ADDI $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
+          (ADDIS $in, tglobaladdr:$g)>;
+def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
+          (ADDIS $in, tconstpool:$g)>;
+def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
+          (ADDIS $in, tjumptable:$g)>;
+def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS $in, tblockaddress:$g)>;
 
 // Standard shifts.  These are represented separately from the real shifts above
 // so that we can distinguish between shifts that allow 5-bit and 6-bit shift
 // amounts.
-def : Pat<(sra GPRC:$rS, GPRC:$rB),
-          (SRAW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(srl GPRC:$rS, GPRC:$rB),
-          (SRW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(shl GPRC:$rS, GPRC:$rB),
-          (SLW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(sra i32:$rS, i32:$rB),
+          (SRAW $rS, $rB)>;
+def : Pat<(srl i32:$rS, i32:$rB),
+          (SRW $rS, $rB)>;
+def : Pat<(shl i32:$rS, i32:$rB),
+          (SLW $rS, $rB)>;
 
 def : Pat<(zextloadi1 iaddr:$src),
           (LBZ iaddr:$src)>;
@@ -1582,8 +1961,8 @@ def : Pat<(f64 (extloadf32 iaddr:$src)),
 def : Pat<(f64 (extloadf32 xaddr:$src)),
           (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
 
-def : Pat<(f64 (fextend F4RC:$src)),
-          (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+def : Pat<(f64 (fextend f32:$src)),
+          (COPY_TO_REGCLASS $src, F8RC)>;
 
 // Memory barriers
 def : Pat<(membarrier (i32 imm /*ll*/),
@@ -1595,5 +1974,15 @@ def : Pat<(membarrier (i32 imm /*ll*/),
 
 def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
 
+// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+          (FNMSUB $A, $C, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+          (FNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
+          (FNMSUBS $A, $C, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
+          (FNMSUBS $A, $C, $B)>;
+
 include "PPCInstrAltivec.td"
 include "PPCInstr64Bit.td"
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index b1636a20b83f..40d1f3a5fc27 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -47,6 +47,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// SpillsCR - Indicates whether CR is spilled in the current function.
   bool SpillsCR;
 
+  /// Indicates whether VRSAVE is spilled in the current function.
+  bool SpillsVRSAVE;
+
   /// LRStoreRequired - The bool indicates whether there is some explicit use of
   /// the LR/LR8 stack slot that is not obvious from scanning the code.  This
   /// requires that the code generator produce a store of LR to the stack on
@@ -81,6 +84,11 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
   int CRSpillFrameIndex;
 
+  /// If any of CR[2-4] need to be saved in the prologue and restored in the
+  /// epilogue then they are added to this array. This is used for the
+  /// 64-bit SVR4 ABI.
+  SmallVector<unsigned, 3> MustSaveCRs;
+
 public:
   explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
@@ -88,6 +96,7 @@ class PPCFunctionInfo : public MachineFunctionInfo {
       HasSpills(false),
       HasNonRISpills(false),
       SpillsCR(false),
+      SpillsVRSAVE(false),
       LRStoreRequired(false),
       MinReservedArea(0),
       TailCallSPDelta(0),
@@ -127,6 +136,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   void setSpillsCR()       { SpillsCR = true; }
   bool isCRSpilled() const { return SpillsCR; }
 
+  void setSpillsVRSAVE()       { SpillsVRSAVE = true; }
+  bool isVRSAVESpilled() const { return SpillsVRSAVE; }
+
   void setLRStoreRequired() { LRStoreRequired = true; }
   bool isLRStoreRequired() const { return LRStoreRequired; }
 
@@ -147,6 +159,10 @@ class PPCFunctionInfo : public MachineFunctionInfo {
 
   int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
   void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
+
+  const SmallVector<unsigned, 3> &
+    getMustSaveCRs() const { return MustSaveCRs; }
+  void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e2c7221be376..2be6324fd7be 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -68,7 +68,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
   ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
   ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
   ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
-  ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+  ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
 }
 
 /// getPointerRegClass - Return the register class to use to hold pointers.
@@ -76,6 +76,14 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
 const TargetRegisterClass *
 PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
                                                                        const {
+  // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
+  // when it checks for ZERO folding.
+  if (Kind == 1) {
+    if (Subtarget.isPPC64())
+      return &PPC::G8RC_NOX0RegClass;
+    return &PPC::GPRC_NOR0RegClass;
+  }
+
   if (Subtarget.isPPC64())
     return &PPC::G8RCRegClass;
   return &PPC::GPRCRegClass;
@@ -99,12 +107,35 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
   return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
 }
 
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+  // The naming here is inverted: The CSR_NoRegs_Altivec has the
+  // Altivec registers masked so that they're not saved and restored around
+  // instructions with this preserved mask.
+
+  if (!Subtarget.hasAltivec())
+    return CSR_NoRegs_Altivec_RegMask;
+
+  if (Subtarget.isDarwin())
+    return CSR_NoRegs_Darwin_RegMask;
+  return CSR_NoRegs_RegMask;
+}
+
 BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const PPCFrameLowering *PPCFI =
     static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
 
-  Reserved.set(PPC::R0);
+  // The ZERO register is not really a register, but the representation of r0
+  // when used in instructions that treat r0 as the constant 0.
+  Reserved.set(PPC::ZERO);
+  Reserved.set(PPC::ZERO8);
+
+  // The FP register is also not really a register, but is the representation
+  // of the frame pointer register used by ISD::FRAMEADDR.
+  Reserved.set(PPC::FP);
+  Reserved.set(PPC::FP8);
+
   Reserved.set(PPC::R1);
   Reserved.set(PPC::LR);
   Reserved.set(PPC::LR8);
@@ -117,16 +148,14 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   }
   
   // On PPC64, r13 is the thread pointer. Never allocate this register.
-  // Note that this is over conservative, as it also prevents allocation of R31
-  // when the FP is not needed.
   if (Subtarget.isPPC64()) {
     Reserved.set(PPC::R13);
-    Reserved.set(PPC::R31);
 
-    Reserved.set(PPC::X0);
     Reserved.set(PPC::X1);
     Reserved.set(PPC::X13);
-    Reserved.set(PPC::X31);
+
+    if (PPCFI->needsFP(MF))
+      Reserved.set(PPC::X31);
 
     // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
     if (Subtarget.isSVR4ABI()) {
@@ -149,6 +178,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   switch (RC->getID()) {
   default:
     return 0;
+  case PPC::G8RC_NOX0RegClassID:
+  case PPC::GPRC_NOR0RegClassID: 
   case PPC::G8RCRegClassID:
   case PPC::GPRCRegClassID: {
     unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -174,8 +205,7 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 ///   stwxu  R0, SP, Rnegsize   ; add and update the SP with the negated size
 ///   addi   Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
 ///
-void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
-                                        int SPAdj, RegScavenger *RS) const {
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   // Get the instruction.
   MachineInstr &MI = *II;
   // Get the instruction's basic block.
@@ -271,22 +301,19 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
 ///   stw rA, FI               ; Store rA to the frame.
 ///
 void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
-                                      unsigned FrameIndex, int SPAdj,
-                                      RegScavenger *RS) const {
+                                      unsigned FrameIndex) const {
   // Get the instruction.
   MachineInstr &MI = *II;       // ; SPILL_CR <SrcReg>, <offset>
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
   DebugLoc dl = MI.getDebugLoc();
 
-  // FIXME: Once LLVM supports creating virtual registers here, or the register
-  // scavenger can return multiple registers, stop using reserved registers
-  // here.
-  (void) SPAdj;
-  (void) RS;
-
   bool LP64 = Subtarget.isPPC64();
-  unsigned Reg = LP64 ? PPC::X0 : PPC::R0;
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
   unsigned SrcReg = MI.getOperand(0).getReg();
 
   // We need to store the CR in the low 4-bits of the saved value. First, issue
@@ -296,16 +323,20 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
     
   // If the saved register wasn't CR0, shift the bits left so that they are in
   // CR0's slot.
-  if (SrcReg != PPC::CR0)
+  if (SrcReg != PPC::CR0) {
+    unsigned Reg1 = Reg;
+    Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
     // rlwinm rA, rA, ShiftBits, 0, 31.
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
-      .addReg(Reg, RegState::Kill)
-      .addImm(getPPCRegisterNumbering(SrcReg) * 4)
+      .addReg(Reg1, RegState::Kill)
+      .addImm(getEncodingValue(SrcReg) * 4)
       .addImm(0)
       .addImm(31);
+  }
 
   addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
-                    .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+                    .addReg(Reg, RegState::Kill),
                     FrameIndex);
 
   // Discard the pseudo instruction.
@@ -313,22 +344,19 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
 }
 
 void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
-                                      unsigned FrameIndex, int SPAdj,
-                                      RegScavenger *RS) const {
+                                      unsigned FrameIndex) const {
   // Get the instruction.
   MachineInstr &MI = *II;       // ; <DestReg> = RESTORE_CR <offset>
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
   DebugLoc dl = MI.getDebugLoc();
 
-  // FIXME: Once LLVM supports creating virtual registers here, or the register
-  // scavenger can return multiple registers, stop using reserved registers
-  // here.
-  (void) SPAdj;
-  (void) RS;
-
   bool LP64 = Subtarget.isPPC64();
-  unsigned Reg = LP64 ? PPC::X0 : PPC::R0;
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
   unsigned DestReg = MI.getOperand(0).getReg();
   assert(MI.definesRegister(DestReg) &&
     "RESTORE_CR does not define its destination");
@@ -339,15 +367,67 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
   // If the reloaded register isn't CR0, shift the bits right so that they are
   // in the right CR's slot.
   if (DestReg != PPC::CR0) {
-    unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+    unsigned Reg1 = Reg;
+    Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+    unsigned ShiftBits = getEncodingValue(DestReg)*4;
     // rlwinm r11, r11, 32-ShiftBits, 0, 31.
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
-             .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+             .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0)
              .addImm(31);
   }
 
   BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
-             .addReg(Reg);
+             .addReg(Reg, RegState::Kill);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+                                          unsigned FrameIndex) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;       // ; SPILL_VRSAVE <SrcReg>, <offset>
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+  unsigned SrcReg = MI.getOperand(0).getReg();
+
+  BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+          .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+    
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+                    .addReg(Reg, RegState::Kill),
+                    FrameIndex);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+                                         unsigned FrameIndex) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;       // ; <DestReg> = RESTORE_VRSAVE <offset>
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+  unsigned DestReg = MI.getOperand(0).getReg();
+  assert(MI.definesRegister(DestReg) &&
+    "RESTORE_VRSAVE does not define its destination");
+
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+                              Reg), FrameIndex);
+
+  BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+             .addReg(Reg, RegState::Kill);
 
   // Discard the pseudo instruction.
   MBB.erase(II);
@@ -374,6 +454,33 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
   return false;
 }
 
+// Figure out if the offset in the instruction is shifted right two bits. This
+// is true for instructions like "STD", which the machine implicitly adds two
+// low zeros to.
+static bool usesIXAddr(const MachineInstr &MI) {
+  unsigned OpC = MI.getOpcode();
+
+  switch (OpC) {
+  default:
+    return false;
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::STD:
+    return true;
+  }
+}
+
+// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
+static unsigned getOffsetONFromFION(const MachineInstr &MI,
+                                    unsigned FIOperandNum) {
+  // Take into account whether it's an add or mem instruction
+  unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
+  if (MI.isInlineAsm())
+    OffsetOperandNo = FIOperandNum-1;
+
+  return OffsetOperandNo;
+}
+
 void
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, unsigned FIOperandNum,
@@ -391,10 +498,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   DebugLoc dl = MI.getDebugLoc();
 
-  // Take into account whether it's an add or mem instruction
-  unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
-  if (MI.isInlineAsm())
-    OffsetOperandNo = FIOperandNum-1;
+  unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
 
   // Get the frame index.
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -409,16 +513,22 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Special case for dynamic alloca.
   if (FPSI && FrameIndex == FPSI &&
       (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
-    lowerDynamicAlloc(II, SPAdj, RS);
+    lowerDynamicAlloc(II);
     return;
   }
 
-  // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
+  // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
   if (OpC == PPC::SPILL_CR) {
-    lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+    lowerCRSpilling(II, FrameIndex);
     return;
   } else if (OpC == PPC::RESTORE_CR) {
-    lowerCRRestore(II, FrameIndex, SPAdj, RS);
+    lowerCRRestore(II, FrameIndex);
+    return;
+  } else if (OpC == PPC::SPILL_VRSAVE) {
+    lowerVRSAVESpilling(II, FrameIndex);
+    return;
+  } else if (OpC == PPC::RESTORE_VRSAVE) {
+    lowerVRSAVERestore(II, FrameIndex);
     return;
   }
 
@@ -430,36 +540,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                                 (is64Bit ? PPC::X1 : PPC::R1),
                                               false);
 
-  // Figure out if the offset in the instruction is shifted right two bits. This
-  // is true for instructions like "STD", which the machine implicitly adds two
-  // low zeros to.
-  bool isIXAddr = false;
-  switch (OpC) {
-  case PPC::LWA:
-  case PPC::LD:
-  case PPC::STD:
-  case PPC::STD_32:
-    isIXAddr = true;
-    break;
-  }
+  // Figure out if the offset in the instruction is shifted right two bits.
+  bool isIXAddr = usesIXAddr(MI);
 
-  bool noImmForm = false;
-  switch (OpC) {
-  case PPC::LVEBX:
-  case PPC::LVEHX:
-  case PPC::LVEWX:
-  case PPC::LVX:
-  case PPC::LVXL:
-  case PPC::LVSL:
-  case PPC::LVSR:
-  case PPC::STVEBX:
-  case PPC::STVEHX:
-  case PPC::STVEWX:
-  case PPC::STVX:
-  case PPC::STVXL:
-    noImmForm = true;
-    break;
-  }
+  // If the instruction is not present in ImmToIdxMap, then it has no immediate
+  // form (and must be r+r).
+  bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
 
   // Now add the frame object offset to the offset from r1.
   int Offset = MFI->getObjectOffset(FrameIndex);
@@ -497,13 +583,15 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
-  unsigned SReg = MF.getRegInfo().createVirtualRegister(is64Bit ? G8RC : GPRC);
+  const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
+  unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+           SReg = MF.getRegInfo().createVirtualRegister(RC);
 
   // Insert a set of rA with the full offset value before the ld, st, or add
-  BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
+  BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
     .addImm(Offset >> 16);
   BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
-    .addReg(SReg, RegState::Kill)
+    .addReg(SRegHi, RegState::Kill)
     .addImm(Offset);
 
   // Convert into indexed form of the instruction:
@@ -545,3 +633,124 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const {
 unsigned PPCRegisterInfo::getEHHandlerRegister() const {
   return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
 }
+
+/// Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool PPCRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+  assert(Offset < 0 && "Local offset must be negative");
+
+  unsigned FIOperandNum = 0;
+  while (!MI->getOperand(FIOperandNum).isFI()) {
+    ++FIOperandNum;
+    assert(FIOperandNum < MI->getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+
+  if (!usesIXAddr(*MI))
+    Offset += MI->getOperand(OffsetOperandNo).getImm();
+  else
+    Offset += MI->getOperand(OffsetOperandNo).getImm() << 2;
+
+  // It's the load/store FI references that cause issues, as it can be difficult
+  // to materialize the offset if it won't fit in the literal field. Estimate
+  // based on the size of the local frame and some conservative assumptions
+  // about the rest of the stack frame (note, this is pre-regalloc, so
+  // we don't know everything for certain yet) whether this offset is likely
+  // to be out of range of the immediate. Return true if so.
+
+  // We only generate virtual base registers for loads and stores that have
+  // an r+i form. Return false for everything else.
+  unsigned OpC = MI->getOpcode();
+  if (!ImmToIdxMap.count(OpC))
+    return false;
+
+  // Don't generate a new virtual base register just to add zero to it.
+  if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) &&
+      MI->getOperand(2).getImm() == 0)
+    return false;
+
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
+  const PPCFrameLowering *PPCFI =
+    static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+  unsigned StackEst =
+    PPCFI->determineFrameLayout(MF, false, true);
+
+  // If we likely don't need a stack frame, then we probably don't need a
+  // virtual base register either.
+  if (!StackEst)
+    return false;
+
+  // Estimate an offset from the stack pointer.
+  // The incoming offset is relating to the SP at the start of the function,
+  // but when we access the local it'll be relative to the SP after local
+  // allocation, so adjust our SP-relative offset by that allocation size.
+  Offset += StackEst;
+
+  // The frame pointer will point to the end of the stack, so estimate the
+  // offset as the difference between the object offset and the FP location.
+  return !isFrameOffsetLegal(MI, Offset);
+}
+
+/// Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void PPCRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                             unsigned BaseReg, int FrameIdx,
+                             int64_t Offset) const {
+  unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+
+  MachineBasicBlock::iterator Ins = MBB->begin();
+  DebugLoc DL;                  // Defaults to "unknown"
+  if (Ins != MBB->end())
+    DL = Ins->getDebugLoc();
+
+  const MCInstrDesc &MCID = TII.get(ADDriOpc);
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const MachineFunction &MF = *MBB->getParent();
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+
+  BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+    .addFrameIndex(FrameIdx).addImm(Offset);
+}
+
+void
+PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                   unsigned BaseReg, int64_t Offset) const {
+  MachineInstr &MI = *I;
+
+  unsigned FIOperandNum = 0;
+  while (!MI.getOperand(FIOperandNum).isFI()) {
+    ++FIOperandNum;
+    assert(FIOperandNum < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
+  unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
+
+  bool isIXAddr = usesIXAddr(MI);
+  if (!isIXAddr)
+    Offset += MI.getOperand(OffsetOperandNo).getImm();
+  else
+    Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+  // Figure out if the offset in the instruction is shifted right two bits.
+  if (isIXAddr)
+    Offset >>= 2;    // The actual encoded value has the low two bits zero.
+
+  MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+}
+
+bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                         int64_t Offset) const {
+  return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+         (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
+}
+
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 5f89f630ed97..7a48b4b9799e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -15,8 +15,8 @@
 #ifndef POWERPC32_REGISTERINFO_H
 #define POWERPC32_REGISTERINFO_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "PPC.h"
-#include <map>
 
 #define GET_REGINFO_HEADER
 #include "PPCGenRegisterInfo.inc"
@@ -27,7 +27,7 @@ class TargetInstrInfo;
 class Type;
 
 class PPCRegisterInfo : public PPCGenRegisterInfo {
-  std::map<unsigned, unsigned> ImmToIdxMap;
+  DenseMap<unsigned, unsigned> ImmToIdxMap;
   const PPCSubtarget &Subtarget;
   const TargetInstrInfo &TII;
 public:
@@ -44,6 +44,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
   /// Code Generation virtual methods...
   const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
   const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+  const uint32_t *getNoPreservedMask() const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
@@ -60,18 +61,35 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
     return true;
   }
 
-  void lowerDynamicAlloc(MachineBasicBlock::iterator II,
-                         int SPAdj, RegScavenger *RS) const;
-  void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
-                       int SPAdj, RegScavenger *RS) const;
-  void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
-                       int SPAdj, RegScavenger *RS) const;
+  virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+    return true;
+  }
+
+  void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+  void lowerCRSpilling(MachineBasicBlock::iterator II,
+                       unsigned FrameIndex) const;
+  void lowerCRRestore(MachineBasicBlock::iterator II,
+                      unsigned FrameIndex) const;
+  void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+                           unsigned FrameIndex) const;
+  void lowerVRSAVERestore(MachineBasicBlock::iterator II,
+                          unsigned FrameIndex) const;
+
   bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
 			    int &FrameIdx) const;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = NULL) const;
 
+  // Support for virtual base registers.
+  bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+  void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                    unsigned BaseReg, int FrameIdx,
+                                    int64_t Offset) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I,
+                         unsigned BaseReg, int64_t Offset) const;
+  bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 8ee9b1ec9f78..57a25f5143fa 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -27,40 +27,40 @@ class PPCReg<string n> : Register<n> {
 
 // GPR - One of the 32 32-bit general-purpose registers
 class GPR<bits<5> num, string n> : PPCReg<n> {
-  field bits<5> Num = num;
+  let HWEncoding{4-0} = num;
 }
 
 // GP8 - One of the 32 64-bit general-purpose registers
 class GP8<GPR SubReg, string n> : PPCReg<n> {
-  field bits<5> Num = SubReg.Num;
+  let HWEncoding = SubReg.HWEncoding;
   let SubRegs = [SubReg];
   let SubRegIndices = [sub_32];
 }
 
 // SPR - One of the 32-bit special-purpose registers
 class SPR<bits<10> num, string n> : PPCReg<n> {
-  field bits<10> Num = num;
+  let HWEncoding{9-0} = num;
 }
 
 // FPR - One of the 32 64-bit floating-point registers
 class FPR<bits<5> num, string n> : PPCReg<n> {
-  field bits<5> Num = num;
+  let HWEncoding{4-0} = num;
 }
 
 // VR - One of the 32 128-bit vector registers
 class VR<bits<5> num, string n> : PPCReg<n> {
-  field bits<5> Num = num;
+  let HWEncoding{4-0} = num;
 }
 
 // CR - One of the 8 4-bit condition registers
 class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
-  field bits<3> Num = num;
+  let HWEncoding{2-0} = num;
   let SubRegs = subregs;
 }
 
 // CRBIT - One of the 32 1-bit condition register fields
 class CRBIT<bits<5> num, string n> : PPCReg<n> {
-  field bits<5> Num = num;
+  let HWEncoding{4-0} = num;
 }
 
 // General-purpose registers
@@ -86,6 +86,14 @@ foreach Index = 0-31 in {
                 DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
 }
 
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO  : GPR<0, "0">;
+def ZERO8 : GP8<ZERO, "0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP   : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8  : GP8<FP, "**FRAME POINTER**">;
+
 // Condition register bits
 def CR0LT : CRBIT< 0, "0">;
 def CR0GT : CRBIT< 1, "1">;
@@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">;
 // then nonvolatiles in reverse order since stmw/lmw save from rN to r31
 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
                                                 (sequence "R%u", 30, 13),
-                                                R31, R0, R1, LR)>;
+                                                R31, R0, R1, FP)>;
 
 def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
                                                 (sequence "X%u", 30, 14),
-                                                X31, X13, X0, X1, LR8)>;
+                                                X31, X13, X0, X1, FP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
 
 // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
 // ABI the size of the Floating-point register save area is determined by the
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index ba63b5cd8faf..8d5838e54e15 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -749,3 +749,18 @@ def PPCA2Itineraries : ProcessorItineraries<
                               [15, 7],
                               [FPR_Bypass, FPR_Bypass]>
 ]>;
+
+// ===---------------------------------------------------------------------===//
+// A2 machine model for scheduling and other instruction cost heuristics.
+
+def PPCA2Model : SchedMachineModel {
+  let IssueWidth = 1;  // 2 micro-ops are dispatched per cycle.
+  let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+  let LoadLatency = 6; // Optimistic load latency assuming bypass.
+                       // This is overriden by OperandCycles if the
+                       // Itineraries are queried instead.
+  let MispredictPenalty = 13;
+
+  let Itineraries = PPCA2Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 7c02ea099c14..c64998d52a0c 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -92,3 +92,18 @@ def G5Itineraries : ProcessorItineraries<
   InstrItinData<VecVSL      , [InstrStage<2, [VIU1]>]>,
   InstrItinData<VecVSR      , [InstrStage<3, [VPU]>]>
 ]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def G5Model : SchedMachineModel {
+  let IssueWidth = 4;  // 4 (non-branch) instructions are dispatched per cycle.
+  let MinLatency = 0;  // Out-of-order dispatch.
+  let LoadLatency = 3; // Optimistic load latency assuming bypass.
+                       // This is overriden by OperandCycles if the
+                       // Itineraries are queried instead.
+  let MispredictPenalty = 16;
+
+  let Itineraries = G5Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 18e4c07942b9..a8f2b3f47d1b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -38,8 +38,18 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
   , HasAltivec(false)
   , HasQPX(false)
   , HasFSQRT(false)
+  , HasFRE(false)
+  , HasFRES(false)
+  , HasFRSQRTE(false)
+  , HasFRSQRTES(false)
+  , HasRecipPrec(false)
   , HasSTFIWX(false)
+  , HasLFIWAX(false)
+  , HasFPRND(false)
+  , HasFPCVT(false)
   , HasISEL(false)
+  , HasPOPCNTD(false)
+  , HasLDBRX(false)
   , IsBookE(false)
   , HasLazyResolverStubs(false)
   , IsJITCodeModel(false)
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 15885bd2dfb2..65b4d211fc6a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -77,8 +77,15 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   bool HasAltivec;
   bool HasQPX;
   bool HasFSQRT;
+  bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
+  bool HasRecipPrec;
   bool HasSTFIWX;
+  bool HasLFIWAX;
+  bool HasFPRND;
+  bool HasFPCVT;
   bool HasISEL;
+  bool HasPOPCNTD;
+  bool HasLDBRX;
   bool IsBookE;
   bool HasLazyResolverStubs;
   bool IsJITCodeModel;
@@ -154,11 +161,21 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
 
   // Specific obvious features.
   bool hasFSQRT() const { return HasFSQRT; }
+  bool hasFRE() const { return HasFRE; }
+  bool hasFRES() const { return HasFRES; }
+  bool hasFRSQRTE() const { return HasFRSQRTE; }
+  bool hasFRSQRTES() const { return HasFRSQRTES; }
+  bool hasRecipPrec() const { return HasRecipPrec; }
   bool hasSTFIWX() const { return HasSTFIWX; }
+  bool hasLFIWAX() const { return HasLFIWAX; }
+  bool hasFPRND() const { return HasFPRND; }
+  bool hasFPCVT() const { return HasFPCVT; }
   bool hasAltivec() const { return HasAltivec; }
   bool hasQPX() const { return HasQPX; }
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
+  bool hasPOPCNTD() const { return HasPOPCNTD; }
+  bool hasLDBRX() const { return HasLDBRX; }
   bool isBookE() const { return IsBookE; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index fe851c1b6fb8..14dc794195da 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -86,8 +86,14 @@ class PPCPassConfig : public TargetPassConfig {
     return getTM<PPCTargetMachine>();
   }
 
+  const PPCSubtarget &getPPCSubtarget() const {
+    return *getPPCTargetMachine().getSubtargetImpl();
+  }
+
   virtual bool addPreRegAlloc();
+  virtual bool addILPOpts();
   virtual bool addInstSelector();
+  virtual bool addPreSched2();
   virtual bool addPreEmitPass();
 };
 } // namespace
@@ -103,13 +109,31 @@ bool PPCPassConfig::addPreRegAlloc() {
   return false;
 }
 
+bool PPCPassConfig::addILPOpts() {
+  if (getPPCSubtarget().hasISEL()) {
+    addPass(&EarlyIfConverterID);
+    return true;
+  }
+
+  return false;
+}
+
 bool PPCPassConfig::addInstSelector() {
   // Install an instruction selector.
   addPass(createPPCISelDag(getPPCTargetMachine()));
   return false;
 }
 
+bool PPCPassConfig::addPreSched2() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&IfConverterID);
+
+  return true;
+}
+
 bool PPCPassConfig::addPreEmitPass() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createPPCEarlyReturnPass());
   // Must run branch selection immediately preceding the asm printer.
   addPass(createPPCBranchSelectionPass());
   return false;
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 5e9ad347d332..2504ba70c25a 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -86,7 +86,9 @@ class PPCTTI : public ImmutablePass, public TargetTransformInfo {
   virtual unsigned getNumberOfRegisters(bool Vector) const;
   virtual unsigned getRegisterBitWidth(bool Vector) const;
   virtual unsigned getMaximumUnrollFactor() const;
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind,
+                                          OperandValueKind) const;
   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
                                   int Index, Type *SubTp) const;
   virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -122,9 +124,8 @@ llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
 
 PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  // FIXME: PPC currently does not have custom popcnt lowering even though
-  // there is hardware support. Once this is fixed, update this function
-  // to reflect the real capabilities of the hardware.
+  if (ST->hasPOPCNTD() && TyWidth <= 64)
+    return PSK_FastHardware;
   return PSK_Software;
 }
 
@@ -167,11 +168,14 @@ unsigned PPCTTI::getMaximumUnrollFactor() const {
   return 2;
 }
 
-unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                        OperandValueKind Op1Info,
+                                        OperandValueKind Op2Info) const {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   // Fallback to the default implementation.
-  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty);
+  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+                                                     Op2Info);
 }
 
 unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index b6763aa73802..514f8407c972 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -1,7 +1,6 @@
 //===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
 
 TODO:
-* gpr0 allocation
 * lmw/stmw pass a la arm load store optimizer for prolog/epilog
 
 ===-------------------------------------------------------------------------===
@@ -127,25 +126,6 @@ produced this with bdnz, the loop would be a single dispatch group.
 
 ===-------------------------------------------------------------------------===
 
-Compile:
-
-void foo(int *P) {
- if (P)  *P = 0;
-}
-
-into:
-
-_foo:
-        cmpwi cr0,r3,0
-        beqlr cr0
-        li r0,0
-        stw r0,0(r3)
-        blr
-
-This is effectively a simple form of predication.
-
-===-------------------------------------------------------------------------===
-
 Lump the constant pool for each function into ONE pic object, and reference
 pieces of it as offsets from the start.  For functions like this (contrived
 to have lots of constants obviously):
@@ -204,12 +184,6 @@ http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
 
 ===-------------------------------------------------------------------------===
 
-Implement Newton-Rhapson method for improving estimate instructions to the
-correct accuracy, and implementing divide as multiply by reciprocal when it has
-more than one use.  Itanium would want this too.
-
-===-------------------------------------------------------------------------===
-
 Compile offsets from allocas:
 
 int *%test() {
@@ -536,20 +510,6 @@ void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
 
 ===-------------------------------------------------------------------------===
 
-Complete the signed i32 to FP conversion code using 64-bit registers
-transformation, good for PI.  See PPCISelLowering.cpp, this comment:
-
-     // FIXME: disable this lowered code.  This generates 64-bit register values,
-     // and we don't model the fact that the top part is clobbered by calls.  We
-     // need to flag these together so that the value isn't live across a call.
-     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
-Also, if the registers are spilled to the stack, we have to ensure that all
-64-bits of them are save/restored, otherwise we will miscompile the code.  It
-sounds like we need to get the 64-bit register classes going.
-
-===-------------------------------------------------------------------------===
-
 %struct.B = type { i8, [3 x i8] }
 
 define void @bar(%struct.B* %b) {
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index e099a9fc3194..0b01433cc926 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,8 @@ class AMDGPUTargetMachine;
 // R600 Passes
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
 
 // SI Passes
 FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index f6001445f4b3..d8a380de839c 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -19,9 +19,14 @@
 
 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPU.h"
+#include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
@@ -50,15 +55,51 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   if (OutStreamer.hasRawTextSupport()) {
     OutStreamer.EmitRawText("@" + MF.getName() + ":");
   }
-  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+
+  const MCSectionELF *ConfigSection = getObjFileLowering().getContext()
+                                              .getELFSection(".AMDGPU.config",
+                                              ELF::SHT_NULL, 0,
+                                              SectionKind::getReadOnly());
+  OutStreamer.SwitchSection(ConfigSection);
   if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-    EmitProgramInfo(MF);
+    EmitProgramInfoSI(MF);
+  } else {
+    EmitProgramInfoR600(MF);
   }
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
   EmitFunctionBody();
   return false;
 }
 
-void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
+  unsigned MaxGPR = 0;
+  const R600RegisterInfo * RI =
+                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                    I != E; ++I) {
+      MachineInstr &MI = *I;
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        MachineOperand & MO = MI.getOperand(op_idx);
+        if (!MO.isReg())
+          continue;
+        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
+
+        // Register with value > 127 aren't GPR
+        if (HWReg > 127)
+          continue;
+        MaxGPR = std::max(MaxGPR, HWReg);
+      }
+    }
+  }
+  OutStreamer.EmitIntValue(MaxGPR + 1, 4);
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
   unsigned MaxSGPR = 0;
   unsigned MaxVGPR = 0;
   bool VCCUsed = false;
@@ -107,6 +148,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
         } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
           isSGPR = false;
           width = 2;
+        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 3;
         } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
           isSGPR = true;
           width = 4;
@@ -139,7 +183,19 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
     MaxSGPR += 2;
   }
   SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
-  OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
-  OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
-  OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+  unsigned RsrcReg;
+  switch (MFI->ShaderType) {
+  default: // Fall through
+  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
+  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
+  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
+  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
+  }
+
+  OutStreamer.EmitIntValue(RsrcReg, 4);
+  OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
+  if (MFI->ShaderType == ShaderType::PIXEL) {
+    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+  }
 }
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 3812282b1798..f425ef419463 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -33,7 +33,8 @@ class AMDGPUAsmPrinter : public AsmPrinter {
 
   /// \brief Emit register usage information so that the GPU driver
   /// can correctly setup the GPU state.
-  void EmitProgramInfo(MachineFunction &MF);
+  void EmitProgramInfoR600(MachineFunction &MF);
+  void EmitProgramInfoSI(MachineFunction &MF);
 
   /// Implemented in AMDGPUMCInstLower.cpp
   virtual void EmitInstruction(const MachineInstr *MI);
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
index 45ae37ef0c7f..9c30515a3420 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -32,8 +32,14 @@ def CC_SI : CallingConv<[
     VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
     VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
     VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
-  ]>>>
+  ]>>>,
 
+  // This is the default for i64 values.
+  // XXX: We should change this once clang understands the CC_AMDGPU.
+  CCIfType<[i64], CCAssignToRegWithShadow<
+   [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+   [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+  >>
 ]>;
 
 def CC_AMDGPU : CallingConv<[
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 5995b6f5e80a..a266df535d56 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -60,6 +60,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::MUL, MVT::i64, Expand);
+
   setOperationAction(ISD::UDIV, MVT::i32, Expand);
   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
   setOperationAction(ISD::UREM, MVT::i32, Expand);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index f31b6466bd46..c2a79ea99959 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -116,6 +116,7 @@ enum {
   BRANCH_COND,
   // End AMDIL ISD Opcodes
   BITALIGN,
+  BUFFER_STORE,
   DWORDADDR,
   FRACT,
   FMAX,
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index e740348717c7..4b37a53687d8 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -94,6 +94,7 @@ class Constants {
 int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
 int TWO_PI_INV = 0x3e22f983;
+int FP_UINT_MAX_PLUS_1 = 0x4f800000;	// 1 << 32 in floating point encoding
 }
 def CONST : Constants;
 
@@ -260,6 +261,26 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
   (vt rc:$addr)
 >;
 
+// BFI_INT patterns
+
+multiclass BFIPatterns <Instruction BFI_INT> {
+
+  // Definition from ISA doc:
+  // (y & x) | (z & ~x)
+  def : Pat <
+    (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
+    (BFI_INT $x, $y, $z)
+  >;
+
+  // SHA-256 Ch function
+  // z ^ (x & (y ^ z))
+  def : Pat <
+    (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
+    (BFI_INT $x, $y, $z)
+  >;
+
+}
+
 include "R600Instructions.td"
 
 include "SIInstrInfo.td"
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
new file mode 100644
index 000000000000..0223ec8e4f3f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -0,0 +1,22 @@
+#include "AMDGPUMachineFunction.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
+
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
+    MachineFunctionInfo() {
+  AttributeSet Set = MF.getFunction()->getAttributes();
+  Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
+                                 ShaderTypeAttribute);
+
+  if (A.isStringAttribute()) {
+    StringRef Str = A.getValueAsString();
+    if (Str.getAsInteger(0, ShaderType))
+      llvm_unreachable("Can't parse shader type!");
+  }
+}
+
+}
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
new file mode 100644
index 000000000000..21c8c51dae45
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -0,0 +1,29 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMACHINEFUNCTION_H
+#define AMDGPUMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AMDGPUMachineFunction : public MachineFunctionInfo {
+private:
+  static const char *ShaderTypeAttribute;
+public:
+  AMDGPUMachineFunction(const MachineFunction &MF);
+  unsigned ShaderType;
+};
+
+}
+#endif // AMDGPUMACHINEFUNCTION_H
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
index b723433c161c..dea43b874c6f 100644
--- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -17,6 +17,7 @@
 
 #include "AMDGPU.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/RegionPass.h"
@@ -40,13 +41,14 @@ typedef SmallVector<BBValuePair, 2> BBValueVector;
 
 typedef SmallPtrSet<BasicBlock *, 8> BBSet;
 
-typedef DenseMap<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap;
+
 typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
 typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
 typedef DenseMap<BasicBlock *, Value *> BBPredicates;
 typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
 typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
-typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap;
 
 // The name for newly created blocks.
 
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 01857475440b..e7ea876e2abb 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -151,7 +151,9 @@ bool AMDGPUPassConfig::addPreEmitPass() {
   if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
     addPass(createAMDGPUCFGPreparationPass(*TM));
     addPass(createAMDGPUCFGStructurizerPass(*TM));
+    addPass(createR600EmitClauseMarkers(*TM));
     addPass(createR600ExpandSpecialInstrsPass(*TM));
+    addPass(createR600ControlFlowFinalizer(*TM));
     addPass(&FinalizeMachineBundlesID);
   } else {
     addPass(createSILowerControlFlowPass(*TM));
diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp
index 9605fbe63340..19792b73e2d9 100644
--- a/lib/Target/R600/AMDILDeviceInfo.cpp
+++ b/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -79,7 +79,9 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
           " on 32bit pointers!");
 #endif
     return new AMDGPUNIDevice(ptr);
-  } else if (deviceName == "SI") {
+  } else if (deviceName == "SI" ||
+             deviceName == "tahiti" || deviceName == "pitcairn" ||
+             deviceName == "verde"  || deviceName == "oland") {
     return new AMDGPUSIDevice(ptr);
   } else {
 #if DEBUG
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index fa8f62de9c0a..ba75a44ea1f1 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -191,6 +191,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
         RegSeqArgs, 2 * N->getNumOperands() + 1);
   }
+  case ISD::BUILD_PAIR: {
+    SDValue RC, SubReg0, SubReg1;
+    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+      break;
+    }
+    if (N->getValueType(0) == MVT::i128) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
+    } else if (N->getValueType(0) == MVT::i64) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+    } else {
+      llvm_unreachable("Unhandled value type for BUILD_PAIR");
+    }
+    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
+                            N->getOperand(1), SubReg1 };
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                  N->getDebugLoc(), N->getValueType(0), Ops);
+  }
+
   case ISD::ConstantFP:
   case ISD::Constant: {
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 63c59e1cb5bb..8efba5846bef 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_target(R600CodeGen
   AMDGPUFrameLowering.cpp
   AMDGPUIndirectAddressing.cpp
   AMDGPUMCInstLower.cpp
+  AMDGPUMachineFunction.cpp
   AMDGPUSubtarget.cpp
   AMDGPUStructurizeCFG.cpp
   AMDGPUTargetMachine.cpp
@@ -34,6 +35,8 @@ add_llvm_target(R600CodeGen
   AMDGPUConvertToISA.cpp
   AMDGPUInstrInfo.cpp
   AMDGPURegisterInfo.cpp
+  R600ControlFlowFinalizer.cpp
+  R600EmitClauseMarkers.cpp
   R600ExpandSpecialInstrs.cpp
   R600InstrInfo.cpp
   R600ISelLowering.cpp
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 98fca432670d..a3397f3a4204 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -44,7 +44,6 @@ class AMDGPUAsmBackend : public MCAsmBackend {
   AMDGPUAsmBackend(const Target &T)
     : MCAsmBackend() {}
 
-  virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
   virtual unsigned getNumFixupKinds() const { return 0; };
   virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                           uint64_t Value) const;
@@ -71,16 +70,6 @@ void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
   }
 }
 
-MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
-                                           StringRef CPU) {
-  return new AMDGPUAsmBackend(T);
-}
-
-AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
-                                                        raw_ostream &OS) const {
-  return new AMDGPUMCObjectWriter(OS);
-}
-
 void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                   unsigned DataSize, uint64_t Value) const {
 
@@ -88,3 +77,21 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   assert(Fixup.getKind() == FK_PCRel_4);
   *Dst = (Value - 4) / 4;
 }
+
+//===----------------------------------------------------------------------===//
+// ELFAMDGPUAsmBackend class
+//===----------------------------------------------------------------------===//
+
+class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
+public:
+  ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createAMDGPUELFObjectWriter(OS);
+  }
+};
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+                                           StringRef CPU) {
+  return new ELFAMDGPUAsmBackend(T);
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
new file mode 100644
index 000000000000..48fac9fa7747
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -0,0 +1,39 @@
+//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  AMDGPUELFObjectWriter();
+protected:
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const {
+    llvm_unreachable("Not implemented");
+  }
+
+};
+
+
+} // End anonymous namespace
+
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
+  : MCELFObjectTargetWriter(false, 0, 0, false) { }
+
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+  MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
+  return createELFObjectWriter(MOTW, OS, true);
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 4d3d3e7945e0..b7cdd7c8cde9 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
   //===--- Dwarf Emission Directives -----------------------------------===//
   HasLEB128 = true;
   SupportsDebugInformation = true;
-  ExceptionsType = ExceptionHandling::None;
-  DwarfUsesInlineInfoSection = false;
   DwarfSectionOffsetDirective = ".offset";
 
 }
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 072ee49b6311..45d009c2a04f 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -88,7 +88,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
                                     MCCodeEmitter *_Emitter,
                                     bool RelaxAll,
                                     bool NoExecStack) {
-  return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+  return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false);
 }
 
 extern "C" void LLVMInitializeR600TargetMC() {
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index 363a4af3f3a4..09d0d5b61cd8 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -23,9 +23,11 @@ class MCAsmBackend;
 class MCCodeEmitter;
 class MCContext;
 class MCInstrInfo;
+class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class raw_ostream;
 
 extern Target TheAMDGPUTarget;
 
@@ -41,6 +43,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
 
 MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
                                      StringRef CPU);
+
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
 } // End llvm namespace
 
 #define GET_REGINFO_ENUM
diff --git a/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
index 37e714c2e7b8..3ccdf420601d 100644
--- a/lib/Target/R600/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
 
 add_llvm_library(LLVMR600Desc
   AMDGPUAsmBackend.cpp
+  AMDGPUELFObjectWriter.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUMCAsmInfo.cpp
   R600MCCodeEmitter.cpp
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index d20716000d3c..416d71064d63 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -66,8 +66,6 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
   void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
                     raw_ostream &OS) const;
   void EmitDst(const MCInst &MI, raw_ostream &OS) const;
-  void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
-                    raw_ostream &OS) const;
   void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
 
   void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
@@ -103,7 +101,8 @@ enum InstrTypes {
   INSTR_FC,
   INSTR_NATIVE,
   INSTR_VTX,
-  INSTR_EXPORT
+  INSTR_EXPORT,
+  INSTR_CFALU
 };
 
 enum FCInstr {
@@ -140,9 +139,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
 
 void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                                        SmallVectorImpl<MCFixup> &Fixups) const {
-  if (isTexOp(MI.getOpcode())) {
-    EmitTexInstr(MI, Fixups, OS);
-  } else if (isFCOp(MI.getOpcode())){
+  if (isFCOp(MI.getOpcode())){
     EmitFCInstr(MI, OS);
   } else if (MI.getOpcode() == AMDGPU::RETURN ||
     MI.getOpcode() == AMDGPU::BUNDLE ||
@@ -150,6 +147,10 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     return;
   } else {
     switch(MI.getOpcode()) {
+    case AMDGPU::STACK_SIZE: {
+      EmitByte(MI.getOperand(0).getImm(), OS);
+      break;
+    }
     case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
     case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
       uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
@@ -175,16 +176,117 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
       Emit(InstWord2, OS);
       break;
     }
+    case AMDGPU::TEX_LD:
+    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+    case AMDGPU::TEX_SAMPLE:
+    case AMDGPU::TEX_SAMPLE_C:
+    case AMDGPU::TEX_SAMPLE_L:
+    case AMDGPU::TEX_SAMPLE_C_L:
+    case AMDGPU::TEX_SAMPLE_LB:
+    case AMDGPU::TEX_SAMPLE_C_LB:
+    case AMDGPU::TEX_SAMPLE_G:
+    case AMDGPU::TEX_SAMPLE_C_G:
+    case AMDGPU::TEX_GET_GRADIENTS_H:
+    case AMDGPU::TEX_GET_GRADIENTS_V:
+    case AMDGPU::TEX_SET_GRADIENTS_H:
+    case AMDGPU::TEX_SET_GRADIENTS_V: {
+      unsigned Opcode = MI.getOpcode();
+      bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+      unsigned OpOffset = HasOffsets ? 3 : 0;
+      int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+      int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+      uint32_t SrcSelect[4] = {0, 1, 2, 3};
+      uint32_t Offsets[3] = {0, 0, 0};
+      uint64_t CoordType[4] = {1, 1, 1, 1};
+
+      if (HasOffsets)
+        for (unsigned i = 0; i < 3; i++) {
+          int SignedOffset = MI.getOperand(i + 2).getImm();
+          Offsets[i] = (SignedOffset & 0x1F);
+        }
+          
+
+      if (TextureType == TEXTURE_RECT ||
+          TextureType == TEXTURE_SHADOWRECT) {
+        CoordType[ELEMENT_X] = 0;
+        CoordType[ELEMENT_Y] = 0;
+      }
+
+      if (TextureType == TEXTURE_1D_ARRAY ||
+          TextureType == TEXTURE_SHADOW1D_ARRAY) {
+        if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+            Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+          CoordType[ELEMENT_Y] = 0;
+        } else {
+          CoordType[ELEMENT_Z] = 0;
+          SrcSelect[ELEMENT_Z] = ELEMENT_Y;
+        }
+      } else if (TextureType == TEXTURE_2D_ARRAY ||
+          TextureType == TEXTURE_SHADOW2D_ARRAY) {
+        CoordType[ELEMENT_Z] = 0;
+      }
+
+
+      if ((TextureType == TEXTURE_SHADOW1D ||
+          TextureType == TEXTURE_SHADOW2D ||
+          TextureType == TEXTURE_SHADOWRECT ||
+          TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+          Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+          Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+        SrcSelect[ELEMENT_W] = ELEMENT_Z;
+      }
+
+      uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+          CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+          CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+      uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+          SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+          SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+          Offsets[2] << 10;
+
+      EmitByte(INSTR_TEX, OS);
+      Emit(Word01, OS);
+      Emit(Word2, OS);
+      break;
+    }
+    case AMDGPU::CF_ALU:
+    case AMDGPU::CF_ALU_PUSH_BEFORE: {
+      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+      EmitByte(INSTR_CFALU, OS);
+      Emit(Inst, OS);
+      break;
+    }
+    case AMDGPU::CF_TC_EG:
+    case AMDGPU::CF_VC_EG:
+    case AMDGPU::CF_CALL_FS_EG:
+    case AMDGPU::CF_TC_R600:
+    case AMDGPU::CF_VC_R600:
+    case AMDGPU::CF_CALL_FS_R600:
+      return;
+    case AMDGPU::WHILE_LOOP_EG:
+    case AMDGPU::END_LOOP_EG:
+    case AMDGPU::LOOP_BREAK_EG:
+    case AMDGPU::CF_CONTINUE_EG:
+    case AMDGPU::CF_JUMP_EG:
+    case AMDGPU::CF_ELSE_EG:
+    case AMDGPU::POP_EG:
+    case AMDGPU::WHILE_LOOP_R600:
+    case AMDGPU::END_LOOP_R600:
+    case AMDGPU::LOOP_BREAK_R600:
+    case AMDGPU::CF_CONTINUE_R600:
+    case AMDGPU::CF_JUMP_R600:
+    case AMDGPU::CF_ELSE_R600:
+    case AMDGPU::POP_R600:
     case AMDGPU::EG_ExportSwz:
     case AMDGPU::R600_ExportSwz:
     case AMDGPU::EG_ExportBuf:
     case AMDGPU::R600_ExportBuf: {
       uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_EXPORT, OS);
+      EmitByte(INSTR_NATIVE, OS);
       Emit(Inst, OS);
       break;
     }
-
     default:
       EmitALUInstr(MI, Fixups, OS);
       break;
@@ -334,99 +436,6 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
   Emit(InlineConstant.i, OS);
 }
 
-void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
-                                     SmallVectorImpl<MCFixup> &Fixups,
-                                     raw_ostream &OS) const {
-
-  unsigned Opcode = MI.getOpcode();
-  bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
-  unsigned OpOffset = hasOffsets ? 3 : 0;
-  int64_t Resource = MI.getOperand(OpOffset + 2).getImm();
-  int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
-  int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
-  unsigned srcSelect[4] = {0, 1, 2, 3};
-
-  // Emit instruction type
-  EmitByte(1, OS);
-
-  // Emit instruction
-  EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
-
-  // Emit resource id
-  EmitByte(Resource, OS);
-
-  // Emit source register
-  EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
-
-  // XXX: Emit src isRelativeAddress
-  EmitByte(0, OS);
-
-  // Emit destination register
-  EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
-
-  // XXX: Emit dst isRealtiveAddress
-  EmitByte(0, OS);
-
-  // XXX: Emit dst select
-  EmitByte(0, OS); // X
-  EmitByte(1, OS); // Y
-  EmitByte(2, OS); // Z
-  EmitByte(3, OS); // W
-
-  // XXX: Emit lod bias
-  EmitByte(0, OS);
-
-  // XXX: Emit coord types
-  unsigned coordType[4] = {1, 1, 1, 1};
-
-  if (TextureType == TEXTURE_RECT
-      || TextureType == TEXTURE_SHADOWRECT) {
-    coordType[ELEMENT_X] = 0;
-    coordType[ELEMENT_Y] = 0;
-  }
-
-  if (TextureType == TEXTURE_1D_ARRAY
-      || TextureType == TEXTURE_SHADOW1D_ARRAY) {
-    if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
-      coordType[ELEMENT_Y] = 0;
-    } else {
-      coordType[ELEMENT_Z] = 0;
-      srcSelect[ELEMENT_Z] = ELEMENT_Y;
-    }
-  } else if (TextureType == TEXTURE_2D_ARRAY
-             || TextureType == TEXTURE_SHADOW2D_ARRAY) {
-    coordType[ELEMENT_Z] = 0;
-  }
-
-  for (unsigned i = 0; i < 4; i++) {
-    EmitByte(coordType[i], OS);
-  }
-
-  // XXX: Emit offsets
-  if (hasOffsets)
-	  for (unsigned i = 2; i < 5; i++)
-		  EmitByte(MI.getOperand(i).getImm()<<1, OS);
-  else
-	  EmitNullBytes(3, OS);
-
-  // Emit sampler id
-  EmitByte(Sampler, OS);
-
-  // XXX:Emit source select
-  if ((TextureType == TEXTURE_SHADOW1D
-      || TextureType == TEXTURE_SHADOW2D
-      || TextureType == TEXTURE_SHADOWRECT
-      || TextureType == TEXTURE_SHADOW1D_ARRAY)
-      && Opcode != AMDGPU::TEX_SAMPLE_C_L
-      && Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
-    srcSelect[ELEMENT_W] = ELEMENT_Z;
-  }
-
-  for (unsigned i = 0; i < 4; i++) {
-    EmitByte(srcSelect[i], OS);
-  }
-}
-
 void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
 
   // Emit instruction type
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index e27abccbe13f..5af83209a0d5 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -39,8 +39,6 @@ class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
   void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
   const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
-  const MCSubtargetInfo &STI;
-  MCContext &Ctx;
 
   /// \brief Can this operand also contain immediate values?
   bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
@@ -51,7 +49,7 @@ class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
 public:
   SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
                   const MCSubtargetInfo &sti, MCContext &ctx)
-    : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+    : MCII(mcii), MRI(mri) { }
 
   ~SIMCCodeEmitter() { }
 
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index 868810c613b3..b9229d499d18 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -15,6 +15,7 @@ class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Featur
 : Processor<Name, itin, Features>;
 def : Proc<"",           R600_EG_Itin, [FeatureR600ALUInst]>;
 def : Proc<"r600",       R600_EG_Itin, [FeatureR600ALUInst]>;
+def : Proc<"rv670",      R600_EG_Itin, [FeatureR600ALUInst, FeatureFP64]>;
 def : Proc<"rv710",      R600_EG_Itin, []>;
 def : Proc<"rv730",      R600_EG_Itin, []>;
 def : Proc<"rv770",      R600_EG_Itin, [FeatureFP64]>;
@@ -26,5 +27,8 @@ def : Proc<"barts",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"turks",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"caicos",     R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
 def : Proc<"cayman",     R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
-
+def : Proc<"SI",         SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"tahiti",     SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"pitcairn",   SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"verde",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"oland",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
new file mode 100644
index 000000000000..bc1ca58b863e
--- /dev/null
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,337 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "r600cf"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+  enum ControlFlowInstruction {
+    CF_TC,
+    CF_CALL_FS,
+    CF_WHILE_LOOP,
+    CF_END_LOOP,
+    CF_LOOP_BREAK,
+    CF_LOOP_CONTINUE,
+    CF_JUMP,
+    CF_ELSE,
+    CF_POP
+  };
+
+  static char ID;
+  const R600InstrInfo *TII;
+  unsigned MaxFetchInst;
+  const AMDGPUSubtarget &ST;
+
+  bool isFetch(const MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::TEX_VTX_CONSTBUF:
+    case AMDGPU::TEX_VTX_TEXBUF:
+    case AMDGPU::TEX_LD:
+    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+    case AMDGPU::TEX_GET_GRADIENTS_H:
+    case AMDGPU::TEX_GET_GRADIENTS_V:
+    case AMDGPU::TEX_SET_GRADIENTS_H:
+    case AMDGPU::TEX_SET_GRADIENTS_V:
+    case AMDGPU::TEX_SAMPLE:
+    case AMDGPU::TEX_SAMPLE_C:
+    case AMDGPU::TEX_SAMPLE_L:
+    case AMDGPU::TEX_SAMPLE_C_L:
+    case AMDGPU::TEX_SAMPLE_LB:
+    case AMDGPU::TEX_SAMPLE_C_LB:
+    case AMDGPU::TEX_SAMPLE_G:
+    case AMDGPU::TEX_SAMPLE_C_G:
+    case AMDGPU::TXD:
+    case AMDGPU::TXD_SHADOW:
+    case AMDGPU::VTX_READ_GLOBAL_8_eg:
+    case AMDGPU::VTX_READ_GLOBAL_32_eg:
+    case AMDGPU::VTX_READ_GLOBAL_128_eg:
+    case AMDGPU::VTX_READ_PARAM_8_eg:
+    case AMDGPU::VTX_READ_PARAM_16_eg:
+    case AMDGPU::VTX_READ_PARAM_32_eg:
+    case AMDGPU::VTX_READ_PARAM_128_eg:
+     return true;
+    default:
+      return false;
+    }
+  }
+
+  bool IsTrivialInst(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::KILL:
+    case AMDGPU::RETURN:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
+      switch (CFI) {
+      case CF_TC:
+        return TII->get(AMDGPU::CF_TC_R600);
+      case CF_CALL_FS:
+        return TII->get(AMDGPU::CF_CALL_FS_R600);
+      case CF_WHILE_LOOP:
+        return TII->get(AMDGPU::WHILE_LOOP_R600);
+      case CF_END_LOOP:
+        return TII->get(AMDGPU::END_LOOP_R600);
+      case CF_LOOP_BREAK:
+        return TII->get(AMDGPU::LOOP_BREAK_R600);
+      case CF_LOOP_CONTINUE:
+        return TII->get(AMDGPU::CF_CONTINUE_R600);
+      case CF_JUMP:
+        return TII->get(AMDGPU::CF_JUMP_R600);
+      case CF_ELSE:
+        return TII->get(AMDGPU::CF_ELSE_R600);
+      case CF_POP:
+        return TII->get(AMDGPU::POP_R600);
+      }
+    } else {
+      switch (CFI) {
+      case CF_TC:
+        return TII->get(AMDGPU::CF_TC_EG);
+      case CF_CALL_FS:
+        return TII->get(AMDGPU::CF_CALL_FS_EG);
+      case CF_WHILE_LOOP:
+        return TII->get(AMDGPU::WHILE_LOOP_EG);
+      case CF_END_LOOP:
+        return TII->get(AMDGPU::END_LOOP_EG);
+      case CF_LOOP_BREAK:
+        return TII->get(AMDGPU::LOOP_BREAK_EG);
+      case CF_LOOP_CONTINUE:
+        return TII->get(AMDGPU::CF_CONTINUE_EG);
+      case CF_JUMP:
+        return TII->get(AMDGPU::CF_JUMP_EG);
+      case CF_ELSE:
+        return TII->get(AMDGPU::CF_ELSE_EG);
+      case CF_POP:
+        return TII->get(AMDGPU::POP_EG);
+      }
+    }
+  }
+
+  MachineBasicBlock::iterator
+  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+      unsigned CfAddress) const {
+    MachineBasicBlock::iterator ClauseHead = I;
+    unsigned AluInstCount = 0;
+    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+      if (IsTrivialInst(I))
+        continue;
+      if (!isFetch(I))
+        break;
+      AluInstCount ++;
+      if (AluInstCount > MaxFetchInst)
+        break;
+    }
+    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+        getHWInstrDesc(CF_TC))
+        .addImm(CfAddress) // ADDR
+        .addImm(AluInstCount); // COUNT
+    return I;
+  }
+  void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
+    MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
+  }
+  void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
+      const {
+    for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
+        It != E; ++It) {
+      MachineInstr *MI = *It;
+      CounterPropagateAddr(MI, Addr);
+    }
+  }
+
+public:
+  R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
+    ST(tm.getSubtarget<AMDGPUSubtarget>()) {
+      const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
+      if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
+        MaxFetchInst = 8;
+      else
+        MaxFetchInst = 16;
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF) {
+    unsigned MaxStack = 0;
+    unsigned CurrentStack = 0;
+    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
+        ++MB) {
+      MachineBasicBlock &MBB = *MB;
+      unsigned CfCount = 0;
+      std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+      std::vector<MachineInstr * > IfThenElseStack;
+      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+      if (MFI->ShaderType == 1) {
+        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+            getHWInstrDesc(CF_CALL_FS));
+        CfCount++;
+      }
+      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+          I != E;) {
+        if (isFetch(I)) {
+          DEBUG(dbgs() << CfCount << ":"; I->dump(););
+          I = MakeFetchClause(MBB, I, 0);
+          CfCount++;
+          continue;
+        }
+
+        MachineBasicBlock::iterator MI = I;
+        I++;
+        switch (MI->getOpcode()) {
+        case AMDGPU::CF_ALU_PUSH_BEFORE:
+          CurrentStack++;
+          MaxStack = std::max(MaxStack, CurrentStack);
+        case AMDGPU::CF_ALU:
+        case AMDGPU::EG_ExportBuf:
+        case AMDGPU::EG_ExportSwz:
+        case AMDGPU::R600_ExportBuf:
+        case AMDGPU::R600_ExportSwz:
+        case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+        case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
+          DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+          CfCount++;
+          break;
+        case AMDGPU::WHILELOOP: {
+          CurrentStack++;
+          MaxStack = std::max(MaxStack, CurrentStack);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_WHILE_LOOP))
+              .addImm(1);
+          std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+              std::set<MachineInstr *>());
+          Pair.second.insert(MIb);
+          LoopStack.push_back(Pair);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ENDLOOP: {
+          CurrentStack--;
+          std::pair<unsigned, std::set<MachineInstr *> > Pair =
+              LoopStack.back();
+          LoopStack.pop_back();
+          CounterPropagateAddr(Pair.second, CfCount);
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
+              .addImm(Pair.first + 1);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::IF_PREDICATE_SET: {
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_JUMP))
+              .addImm(0)
+              .addImm(0);
+          IfThenElseStack.push_back(MIb);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ELSE: {
+          MachineInstr * JumpInst = IfThenElseStack.back();
+          IfThenElseStack.pop_back();
+          CounterPropagateAddr(JumpInst, CfCount);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_ELSE))
+              .addImm(0)
+              .addImm(1);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          IfThenElseStack.push_back(MIb);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ENDIF: {
+          CurrentStack--;
+          MachineInstr *IfOrElseInst = IfThenElseStack.back();
+          IfThenElseStack.pop_back();
+          CounterPropagateAddr(IfOrElseInst, CfCount + 1);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_POP))
+              .addImm(CfCount + 1)
+              .addImm(1);
+          (void)MIb;
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::PREDICATED_BREAK: {
+          CurrentStack--;
+          CfCount += 3;
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
+              .addImm(CfCount)
+              .addImm(1);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_LOOP_BREAK))
+              .addImm(0);
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
+              .addImm(CfCount)
+              .addImm(1);
+          LoopStack.back().second.insert(MIb);
+          MI->eraseFromParent();
+          break;
+        }
+        case AMDGPU::CONTINUE: {
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_LOOP_CONTINUE))
+              .addImm(0);
+          LoopStack.back().second.insert(MIb);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        default:
+          break;
+        }
+      }
+      BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+          TII->get(AMDGPU::STACK_SIZE))
+          .addImm(MaxStack);
+    }
+
+    return false;
+  }
+
+  const char *getPassName() const {
+    return "R600 Control Flow Finalizer Pass";
+  }
+};
+
+char R600ControlFlowFinalizer::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+  return new R600ControlFlowFinalizer(TM);
+}
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
new file mode 100644
index 000000000000..3fdc678b9ef1
--- /dev/null
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -0,0 +1,255 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600EmitClauseMarkersPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  unsigned OccupiedDwords(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+    case AMDGPU::DOT4_eg_pseudo:
+    case AMDGPU::DOT4_r600_pseudo:
+      return 4;
+    case AMDGPU::KILL:
+      return 0;
+    default:
+      break;
+    }
+
+    if(TII->isVector(*MI) ||
+        TII->isCubeOp(MI->getOpcode()) ||
+        TII->isReductionOp(MI->getOpcode()))
+      return 4;
+
+    unsigned NumLiteral = 0;
+    for (MachineInstr::mop_iterator It = MI->operands_begin(),
+        E = MI->operands_end(); It != E; ++It) {
+      MachineOperand &MO = *It;
+      if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+        ++NumLiteral;
+    }
+    return 1 + NumLiteral;
+  }
+
+  bool isALU(const MachineInstr *MI) const {
+    if (TII->isALUInstr(MI->getOpcode()))
+      return true;
+    if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
+      return true;
+    switch (MI->getOpcode()) {
+    case AMDGPU::PRED_X:
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+    case AMDGPU::COPY:
+    case AMDGPU::DOT4_eg_pseudo:
+    case AMDGPU::DOT4_r600_pseudo:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  bool IsTrivialInst(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::KILL:
+    case AMDGPU::RETURN:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  // Register Idx, then Const value
+  std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI)
+      const {
+    const R600Operands::Ops OpTable[3][2] = {
+      {R600Operands::SRC0, R600Operands::SRC0_SEL},
+      {R600Operands::SRC1, R600Operands::SRC1_SEL},
+      {R600Operands::SRC2, R600Operands::SRC2_SEL},
+    };
+    std::vector<std::pair<unsigned, unsigned> > Result;
+
+    if (!TII->isALUInstr(MI->getOpcode()))
+      return Result;
+    for (unsigned j = 0; j < 3; j++) {
+      int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+      if (SrcIdx < 0)
+        break;
+      if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+        unsigned Const = MI->getOperand(
+            TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+        Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const));
+      }
+    }
+    return Result;
+  }
+
+  std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
+    // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
+    // (See also R600ISelLowering.cpp)
+    // ConstIndex value is in [0, 4095];
+    return std::pair<unsigned, unsigned>(
+        ((Sel >> 2) - 512) >> 12, // KC_BANK
+        // Line Number of ConstIndex
+        // A line contains 16 constant registers however KCX bank can lock
+        // two line at the same time ; thus we want to get an even line number.
+        // Line number can be retrieved with (>>4), using (>>5) <<1 generates
+        // an even number.
+        ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
+  }
+
+  bool SubstituteKCacheBank(MachineInstr *MI,
+      std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
+    std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+    std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI);
+    assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
+    for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+      unsigned Sel = Consts[i].second;
+      unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
+      unsigned KCacheIndex = Index * 4 + Chan;
+      const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
+      if (CachedConsts.empty()) {
+        CachedConsts.push_back(BankLine);
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts[0] == BankLine) {
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts.size() == 1) {
+        CachedConsts.push_back(BankLine);
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts[1] == BankLine) {
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+        continue;
+      }
+      return false;
+    }
+
+    for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+      switch(UsedKCache[i].first) {
+      case 0:
+        MI->getOperand(Consts[i].first).setReg(
+            AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second));
+        break;
+      case 1:
+        MI->getOperand(Consts[i].first).setReg(
+            AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second));
+        break;
+      default:
+        llvm_unreachable("Wrong Cache Line");
+      }
+    }
+    return true;
+  }
+
+  MachineBasicBlock::iterator
+  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+    MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
+    bool PushBeforeModifier = false;
+    unsigned AluInstCount = 0;
+    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+      if (IsTrivialInst(I))
+        continue;
+      if (!isALU(I))
+        break;
+      if (AluInstCount > TII->getMaxAlusPerClause())
+        break;
+      if (I->getOpcode() == AMDGPU::PRED_X) {
+        if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
+          PushBeforeModifier = true;
+        AluInstCount ++;
+        continue;
+      }
+      if (I->getOpcode() == AMDGPU::KILLGT) {
+        I++;
+        break;
+      }
+      if (TII->isALUInstr(I->getOpcode()) &&
+          !SubstituteKCacheBank(I, KCacheBanks))
+        break;
+      AluInstCount += OccupiedDwords(I);
+    }
+    unsigned Opcode = PushBeforeModifier ?
+        AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
+    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
+        .addImm(0) // ADDR
+        .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
+        .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
+        .addImm(KCacheBanks.empty()?0:2) // KM0
+        .addImm((KCacheBanks.size() < 2)?0:2) // KM1
+        .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
+        .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
+        .addImm(AluInstCount); // COUNT
+    return I;
+  }
+
+public:
+  R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF) {
+    for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                    BB != BB_E; ++BB) {
+      MachineBasicBlock &MBB = *BB;
+      MachineBasicBlock::iterator I = MBB.begin();
+      if (I->getOpcode() == AMDGPU::CF_ALU)
+        continue; // BB was already parsed
+      for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
+        if (isALU(I))
+          I = MakeALUClause(MBB, I);
+        else
+          ++I;
+      }
+    }
+    return false;
+  }
+
+  const char *getPassName() const {
+    return "R600 Emit Clause Markers Pass";
+  }
+};
+
+char R600EmitClauseMarkersPass::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) {
+  return new R600EmitClauseMarkersPass(TM);
+}
+
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index a73691dd3c2b..53e6e51dd2b1 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -28,7 +28,6 @@ using namespace llvm;
 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
     AMDGPUTargetLowering(TM),
     TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
-  setOperationAction(ISD::MUL, MVT::i64, Expand);
   addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
   addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
   addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
@@ -58,7 +57,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
-  setOperationAction(ISD::FPOW, MVT::f32, Custom);
 
   setOperationAction(ISD::ROTL, MVT::i32, Custom);
 
@@ -316,7 +314,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::SELECT: return LowerSELECT(Op, DAG);
   case ISD::STORE: return LowerSTORE(Op, DAG);
   case ISD::LOAD: return LowerLOAD(Op, DAG);
-  case ISD::FPOW: return LowerFPOW(Op, DAG);
   case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
   case ISD::INTRINSIC_VOID: {
     SDValue Chain = Op.getOperand(0);
@@ -918,15 +915,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
   return DAG.getMergeValues(Ops, 2, DL);
 }
 
-SDValue R600TargetLowering::LowerFPOW(SDValue Op,
-    SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
-  SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
-  SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
-  return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
-}
-
 /// XXX Only kernel functions are supported, so we can assume for now that
 /// every function is a kernel function, but in the future we should use
 /// separate calling conventions for kernel and non-kernel functions.
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 5cb4b912a18d..2c09acb9af30 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -59,7 +59,6 @@ class R600TargetLowering : public AMDGPUTargetLowering {
   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 08650980fd5a..b232188a2641 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -645,6 +645,9 @@ const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
   return &AMDGPU::IndirectRegRegClass;
 }
 
+unsigned R600InstrInfo::getMaxAlusPerClause() const {
+  return 115;
+}
 
 MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
                                                   MachineBasicBlock::iterator I,
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index bf9569e6598a..dbae90013d22 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -145,6 +145,7 @@ namespace llvm {
 
   virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
 
+  unsigned getMaxAlusPerClause() const;
 
   ///buildDefaultInstruction - This function returns a MachineInstr with
   /// all the instruction modifiers initialized to their default values.
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 8c50d54cf593..361fc9816b45 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -234,6 +234,80 @@ class VTX_WORD1_GPR {
   let Word1{31}    = SRF_MODE_ALL;
 }
 
+class TEX_WORD0 {
+  field bits<32> Word0;
+
+  bits<5> TEX_INST;
+  bits<2> INST_MOD;
+  bits<1> FETCH_WHOLE_QUAD;
+  bits<8> RESOURCE_ID;
+  bits<7> SRC_GPR;
+  bits<1> SRC_REL;
+  bits<1> ALT_CONST;
+  bits<2> RESOURCE_INDEX_MODE;
+  bits<2> SAMPLER_INDEX_MODE;
+
+  let Word0{4-0} = TEX_INST;
+  let Word0{6-5} = INST_MOD;
+  let Word0{7} = FETCH_WHOLE_QUAD;
+  let Word0{15-8} = RESOURCE_ID;
+  let Word0{22-16} = SRC_GPR;
+  let Word0{23} = SRC_REL;
+  let Word0{24} = ALT_CONST;
+  let Word0{26-25} = RESOURCE_INDEX_MODE;
+  let Word0{28-27} = SAMPLER_INDEX_MODE;
+}
+
+class TEX_WORD1 {
+  field bits<32> Word1;
+
+  bits<7> DST_GPR;
+  bits<1> DST_REL;
+  bits<3> DST_SEL_X;
+  bits<3> DST_SEL_Y;
+  bits<3> DST_SEL_Z;
+  bits<3> DST_SEL_W;
+  bits<7> LOD_BIAS;
+  bits<1> COORD_TYPE_X;
+  bits<1> COORD_TYPE_Y;
+  bits<1> COORD_TYPE_Z;
+  bits<1> COORD_TYPE_W;
+
+  let Word1{6-0} = DST_GPR;
+  let Word1{7} = DST_REL;
+  let Word1{11-9} = DST_SEL_X;
+  let Word1{14-12} = DST_SEL_Y;
+  let Word1{17-15} = DST_SEL_Z;
+  let Word1{20-18} = DST_SEL_W;
+  let Word1{27-21} = LOD_BIAS;
+  let Word1{28} = COORD_TYPE_X;
+  let Word1{29} = COORD_TYPE_Y;
+  let Word1{30} = COORD_TYPE_Z;
+  let Word1{31} = COORD_TYPE_W;
+}
+
+class TEX_WORD2 {
+  field bits<32> Word2;
+
+  bits<5> OFFSET_X;
+  bits<5> OFFSET_Y;
+  bits<5> OFFSET_Z;
+  bits<5> SAMPLER_ID;
+  bits<3> SRC_SEL_X;
+  bits<3> SRC_SEL_Y;
+  bits<3> SRC_SEL_Z;
+  bits<3> SRC_SEL_W;
+
+  let Word2{4-0} = OFFSET_X;
+  let Word2{9-5} = OFFSET_Y;
+  let Word2{14-10} = OFFSET_Z;
+  let Word2{19-15} = SAMPLER_ID;
+  let Word2{22-20} = SRC_SEL_X;
+  let Word2{25-23} = SRC_SEL_Y;
+  let Word2{28-26} = SRC_SEL_Z;
+  let Word2{31-29} = SRC_SEL_W;
+}
+
 /*
 XXX: R600 subtarget uses a slightly different encoding than the other
 subtargets.  We currently handle this in R600MCCodeEmitter, but we may
@@ -277,9 +351,9 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
               (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
                    R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
                    LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
-              !strconcat(opName,
+              !strconcat("  ", opName,
                    "$clamp $dst$write$dst_rel$omod, "
-                   "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
+                   "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
                    "$literal $pred_sel$last"),
               pattern,
               itin>,
@@ -318,10 +392,10 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
                LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
-          !strconcat(opName,
+          !strconcat("  ", opName,
                 "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
-                "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
-                "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
+                "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+                "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
                 "$literal $pred_sel$last"),
           pattern,
           itin>,
@@ -356,10 +430,10 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
                R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
                LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
-          !strconcat(opName, "$clamp $dst$dst_rel, "
-                             "$src0_neg$src0$src0_sel$src0_rel, "
-                             "$src1_neg$src1$src1_sel$src1_rel, "
-                             "$src2_neg$src2$src2_sel$src2_rel, "
+          !strconcat("  ", opName, "$clamp $dst$dst_rel, "
+                             "$src0_neg$src0$src0_rel, "
+                             "$src1_neg$src1$src1_rel, "
+                             "$src2_neg$src2$src2_rel, "
                              "$literal $pred_sel$last"),
           pattern,
           itin>,
@@ -386,12 +460,32 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
 class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
   InstR600 <inst,
-          (outs R600_Reg128:$dst),
-          (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
-          !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"),
+          (outs R600_Reg128:$DST_GPR),
+          (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
+          !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
           pattern,
-          itin>{
-    let Inst {10-0} = inst;
+          itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
+    let Inst{31-0} = Word0;
+    let Inst{63-32} = Word1;
+
+    let TEX_INST = inst{4-0};
+    let SRC_REL = 0;
+    let DST_REL = 0;
+    let DST_SEL_X = 0;
+    let DST_SEL_Y = 1;
+    let DST_SEL_Z = 2;
+    let DST_SEL_W = 3;
+    let LOD_BIAS = 0;
+
+    let INST_MOD = 0;
+    let FETCH_WHOLE_QUAD = 0;
+    let ALT_CONST = 0;
+    let SAMPLER_INDEX_MODE = 0;
+
+    let COORD_TYPE_X = 0;
+    let COORD_TYPE_Y = 0;
+    let COORD_TYPE_Z = 0;
+    let COORD_TYPE_W = 0;
   }
 
 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
@@ -644,7 +738,9 @@ multiclass SteamOutputExportPattern<Instruction ExportInst,
       4095, imm:$mask, buf3inst, 0)>;
 }
 
-let usesCustomInserter = 1 in {
+// Export Instructions should not be duplicated by TailDuplication pass
+// (which assumes that duplicable instruction are affected by exec mask)
+let usesCustomInserter = 1, isNotDuplicable = 1 in {
 
 class ExportSwzInst : InstR600ISA<(
     outs),
@@ -671,6 +767,173 @@ class ExportBufInst : InstR600ISA<(
   let Inst{63-32} = Word1;
 }
 
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+class CF_ALU_WORD0 {
+  field bits<32> Word0;
+
+  bits<22> ADDR;
+  bits<4> KCACHE_BANK0;
+  bits<4> KCACHE_BANK1;
+  bits<2> KCACHE_MODE0;
+
+  let Word0{21-0} = ADDR;
+  let Word0{25-22} = KCACHE_BANK0;
+  let Word0{29-26} = KCACHE_BANK1;
+  let Word0{31-30} = KCACHE_MODE0;
+}
+
+class CF_ALU_WORD1 {
+  field bits<32> Word1;
+
+  bits<2> KCACHE_MODE1;
+  bits<8> KCACHE_ADDR0;
+  bits<8> KCACHE_ADDR1;
+  bits<7> COUNT;
+  bits<1> ALT_CONST;
+  bits<4> CF_INST;
+  bits<1> WHOLE_QUAD_MODE;
+  bits<1> BARRIER;
+
+  let Word1{1-0} = KCACHE_MODE1;
+  let Word1{9-2} = KCACHE_ADDR0;
+  let Word1{17-10} = KCACHE_ADDR1;
+  let Word1{24-18} = COUNT;
+  let Word1{25} = ALT_CONST;
+  let Word1{29-26} = CF_INST;
+  let Word1{30} = WHOLE_QUAD_MODE;
+  let Word1{31} = BARRIER;
+}
+
+class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+!strconcat(OpName, " $COUNT, @$ADDR, "
+"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
+", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
+  field bits<64> Inst;
+
+  let CF_INST = inst;
+  let ALT_CONST = 0;
+  let WHOLE_QUAD_MODE = 0;
+  let BARRIER = 1;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class CF_WORD0_R600 {
+  field bits<32> Word0;
+
+  bits<32> ADDR;
+
+  let Word0 = ADDR;
+}
+
+class CF_WORD1_R600 {
+  field bits<32> Word1;
+
+  bits<3> POP_COUNT;
+  bits<5> CF_CONST;
+  bits<2> COND;
+  bits<3> COUNT;
+  bits<6> CALL_COUNT;
+  bits<1> COUNT_3;
+  bits<1> END_OF_PROGRAM;
+  bits<1> VALID_PIXEL_MODE;
+  bits<7> CF_INST;
+  bits<1> WHOLE_QUAD_MODE;
+  bits<1> BARRIER;
+
+  let Word1{2-0} = POP_COUNT;
+  let Word1{7-3} = CF_CONST;
+  let Word1{9-8} = COND;
+  let Word1{12-10} = COUNT;
+  let Word1{18-13} = CALL_COUNT;
+  let Word1{19} = COUNT_3;
+  let Word1{21} = END_OF_PROGRAM;
+  let Word1{22} = VALID_PIXEL_MODE;
+  let Word1{29-23} = CF_INST;
+  let Word1{30} = WHOLE_QUAD_MODE;
+  let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
+  field bits<64> Inst;
+
+  let CF_INST = inst;
+  let BARRIER = 1;
+  let CF_CONST = 0;
+  let VALID_PIXEL_MODE = 0;
+  let COND = 0;
+  let CALL_COUNT = 0;
+  let COUNT_3 = 0;
+  let END_OF_PROGRAM = 0;
+  let WHOLE_QUAD_MODE = 0;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class CF_WORD0_EG {
+  field bits<32> Word0;
+
+  bits<24> ADDR;
+  bits<3> JUMPTABLE_SEL;
+
+  let Word0{23-0} = ADDR;
+  let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1_EG {
+  field bits<32> Word1;
+
+  bits<3> POP_COUNT;
+  bits<5> CF_CONST;
+  bits<2> COND;
+  bits<6> COUNT;
+  bits<1> VALID_PIXEL_MODE;
+  bits<8> CF_INST;
+  bits<1> BARRIER;
+
+  let Word1{2-0} = POP_COUNT;
+  let Word1{7-3} = CF_CONST;
+  let Word1{9-8} = COND;
+  let Word1{15-10} = COUNT;
+  let Word1{20} = VALID_PIXEL_MODE;
+  let Word1{29-22} = CF_INST;
+  let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
+  field bits<64> Inst;
+
+  let CF_INST = inst;
+  let BARRIER = 1;
+  let JUMPTABLE_SEL = 0;
+  let CF_CONST = 0;
+  let VALID_PIXEL_MODE = 0;
+  let COND = 0;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+def CF_ALU : ALU_CLAUSE<8, "ALU">;
+def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+
+def STACK_SIZE : AMDGPUInst <(outs),
+(ins i32imm:$num), "nstack $num", [] > {
+  field bits<8> Inst;
+  bits<8> num;
+  let Inst = num;
+}
+
 let Predicates = [isR600toCayman] in {
 
 //===----------------------------------------------------------------------===//
@@ -867,25 +1130,33 @@ def CNDGT_INT : R600_3OP <
 
 def TEX_LD : R600_TEX <
   0x03, "TEX_LD",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+      imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
+      imm:$SAMPLER_ID, imm:$textureTarget))]
 > {
-let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget";
-let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget);
+let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
+    "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
+let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
+    i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
+    i32imm:$textureTarget);
 }
 
 def TEX_GET_TEXTURE_RESINFO : R600_TEX <
   0x04, "TEX_GET_TEXTURE_RESINFO",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_GET_GRADIENTS_H : R600_TEX <
   0x07, "TEX_GET_GRADIENTS_H",
-  [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_GET_GRADIENTS_V : R600_TEX <
   0x08, "TEX_GET_GRADIENTS_V",
-  [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SET_GRADIENTS_H : R600_TEX <
@@ -900,32 +1171,38 @@ def TEX_SET_GRADIENTS_V : R600_TEX <
 
 def TEX_SAMPLE : R600_TEX <
   0x10, "TEX_SAMPLE",
-  [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C : R600_TEX <
   0x18, "TEX_SAMPLE_C",
-  [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
 def TEX_SAMPLE_L : R600_TEX <
   0x11, "TEX_SAMPLE_L",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C_L : R600_TEX <
   0x19, "TEX_SAMPLE_C_L",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
 def TEX_SAMPLE_LB : R600_TEX <
   0x12, "TEX_SAMPLE_LB",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C_LB : R600_TEX <
   0x1A, "TEX_SAMPLE_C_LB",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
 def TEX_SAMPLE_G : R600_TEX <
@@ -1141,13 +1418,14 @@ let Predicates = [isR600] in {
   def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
 
   defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
   def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
 
   def : Pat<(fsqrt R600_Reg32:$src),
     (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
 
   def R600_ExportSwz : ExportSwzInst {
-    let Word1{20-17} = 1; // BURST_COUNT
+    let Word1{20-17} = 0; // BURST_COUNT
     let Word1{21} = eop;
     let Word1{22} = 1; // VALID_PIXEL_MODE
     let Word1{30-23} = inst;
@@ -1156,13 +1434,59 @@ let Predicates = [isR600] in {
   defm : ExportPattern<R600_ExportSwz, 39>;
 
   def R600_ExportBuf : ExportBufInst {
-    let Word1{20-17} = 1; // BURST_COUNT
+    let Word1{20-17} = 0; // BURST_COUNT
     let Word1{21} = eop;
     let Word1{22} = 1; // VALID_PIXEL_MODE
     let Word1{30-23} = inst;
     let Word1{31} = 1; // BARRIER
   }
   defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+
+  def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "TEX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "VTX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
+  "LOOP_START_DX10 @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
+  "LOOP_BREAK @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
+  "CONTINUE @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "JUMP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "ELSE @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
+    let ADDR = 0;
+    let COUNT = 0;
+    let POP_COUNT = 0;
+  }
+  def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "POP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+
 }
 
 // Helper pattern for normalizing inputs to triginomic instructions for R700+
@@ -1212,6 +1536,7 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
 def SIN_eg : SIN_Common<0x8D>;
 def COS_eg : COS_Common<0x8E>;
 
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
 def : SIN_PAT <SIN_eg>;
 def : COS_PAT <COS_eg>;
 def : Pat<(fsqrt R600_Reg32:$src),
@@ -1245,6 +1570,9 @@ let Predicates = [isEGorCayman] in {
     VecALU
   >;
 
+  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", []>;
+  defm : BFIPatterns <BFI_INT_eg>;
+
   def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
     [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
                                           R600_Reg32:$src2))],
@@ -1299,7 +1627,7 @@ let hasSideEffects = 1 in {
     (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
 
   def EG_ExportSwz : ExportSwzInst {
-    let Word1{19-16} = 1; // BURST_COUNT
+    let Word1{19-16} = 0; // BURST_COUNT
     let Word1{20} = 1; // VALID_PIXEL_MODE
     let Word1{21} = eop;
     let Word1{29-22} = inst;
@@ -1309,7 +1637,7 @@ let hasSideEffects = 1 in {
   defm : ExportPattern<EG_ExportSwz, 83>;
 
   def EG_ExportBuf : ExportBufInst {
-    let Word1{19-16} = 1; // BURST_COUNT
+    let Word1{19-16} = 0; // BURST_COUNT
     let Word1{20} = 1; // VALID_PIXEL_MODE
     let Word1{21} = eop;
     let Word1{29-22} = inst;
@@ -1318,6 +1646,52 @@ let hasSideEffects = 1 in {
   }
   defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
 
+  def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "TEX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "VTX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
+  "LOOP_START_DX10 @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
+  "LOOP_BREAK @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
+  "CONTINUE @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "JUMP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "ELSE @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
+    let ADDR = 0;
+    let COUNT = 0;
+    let POP_COUNT = 0;
+  }
+  def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "POP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+
+
 //===----------------------------------------------------------------------===//
 // Memory read/write instructions
 //===----------------------------------------------------------------------===//
@@ -1540,23 +1914,25 @@ def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
 def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
 def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
 def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
-def LOG_IEEE_ : LOG_IEEE_Common<0x83>;
+def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
 def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
 def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
 def SIN_cm : SIN_Common<0x8D>;
 def COS_cm : COS_Common<0x8E>;
 } // End isVector = 1
 
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
 def : SIN_PAT <SIN_cm>;
 def : COS_PAT <COS_cm>;
 
 defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
 
 // RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
 def : Pat <
   (AMDGPUurecip R600_Reg32:$src0),
   (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
-                            (MOV_IMM_I32 0x4f800000)))
+                            (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
 >;
 
 
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
index b07a585f0831..018b40363363 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -13,5 +13,6 @@
 using namespace llvm;
 
 R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
-  : MachineFunctionInfo() {
-  }
+  : AMDGPUMachineFunction(MF) { }
+
+
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index 13a46b8eec0a..99c1f91b09b1 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -14,14 +14,13 @@
 #define R600MACHINEFUNCTIONINFO_H
 
 #include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "AMDGPUMachineFunction.h"
 #include <vector>
 
 namespace llvm {
 
-class R600MachineFunctionInfo : public MachineFunctionInfo {
-
+class R600MachineFunctionInfo : public AMDGPUMachineFunction {
 public:
   R600MachineFunctionInfo(const MachineFunction &MF);
   SmallVector<unsigned, 4> LiveOuts;
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 9074364bb3a2..a777142a9e70 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -37,7 +37,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
   CurInstKind = IDOther;
   CurEmitted = 0;
   OccupedSlotsMask = 15;
-  InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
+  InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
 
 
   const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index ce5994ca3680..03f49761ea40 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -43,6 +43,37 @@ foreach Index = 0-127 in {
                                    Index>;
 }
 
+// KCACHE_BANK0
+foreach Index = 159-128 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#Index#"-128]."#Chan, Index, Chan>;
+  }
+  // 128-bit Temporary Registers
+  def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#Index#"-128].XYZW",
+                                 [!cast<Register>("KC0_"#Index#"_X"),
+                                  !cast<Register>("KC0_"#Index#"_Y"),
+                                  !cast<Register>("KC0_"#Index#"_Z"),
+                                  !cast<Register>("KC0_"#Index#"_W")],
+                                 Index>;
+}
+
+// KCACHE_BANK1
+foreach Index = 191-160 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#Index#"-160]."#Chan, Index, Chan>;
+  }
+  // 128-bit Temporary Registers
+  def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#Index#"-160].XYZW",
+                                 [!cast<Register>("KC1_"#Index#"_X"),
+                                  !cast<Register>("KC1_"#Index#"_Y"),
+                                  !cast<Register>("KC1_"#Index#"_Z"),
+                                  !cast<Register>("KC1_"#Index#"_W")],
+                                 Index>;
+}
+
+
 // Array Base Register holding input in FS
 foreach Index = 448-480 in {
   def ArrayBase#Index :  R600Reg<"ARRAY_BASE", Index>;
@@ -80,6 +111,38 @@ def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X",
 
 } // End isAllocatable = 0
 
+def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_X", 128, 159))>;
+
+def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_Y", 128, 159))>;
+
+def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_Z", 128, 159))>;
+
+def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_W", 128, 159))>;
+
+def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (interleave R600_KC0_X, R600_KC0_Y,
+                                               R600_KC0_Z, R600_KC0_W)>;
+
+def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_X", 160, 191))>;
+
+def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_Y", 160, 191))>;
+
+def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_Z", 160, 191))>;
+
+def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_W", 160, 191))>;
+
+def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (interleave R600_KC1_X, R600_KC1_Y,
+                                               R600_KC1_Z, R600_KC1_W)>;
+
 def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
                                    (add (sequence "T%u_X", 0, 127), AR_X)>;
 
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
new file mode 100644
index 000000000000..716b093fc69d
--- /dev/null
+++ b/lib/Target/R600/SIDefines.h
@@ -0,0 +1,22 @@
+//===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef SIDEFINES_H_
+#define SIDEFINES_H_
+
+#define R_00B028_SPI_SHADER_PGM_RSRC1_PS                                0x00B028
+#define R_00B128_SPI_SHADER_PGM_RSRC1_VS                                0x00B128
+#define R_00B228_SPI_SHADER_PGM_RSRC1_GS                                0x00B228
+#define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
+#define   S_00B028_VGPRS(x)                                           (((x) & 0x3F) << 0)
+#define   S_00B028_SGPRS(x)                                           (((x) & 0x0F) << 6)
+#define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
+
+#endif // SIDEFINES_H_
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 93f8c38a3acb..1a07affc195a 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -49,6 +49,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 
   addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
   addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+  addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
 
   addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
   addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -70,11 +71,15 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::i64, Custom);
+
   setTargetDAGCombine(ISD::SELECT_CC);
 
   setTargetDAGCombine(ISD::SETCC);
 
-  setSchedulingPreference(Sched::Source);
+  setSchedulingPreference(Sched::RegPressure);
 }
 
 SDValue SITargetLowering::LowerFormalArguments(
@@ -208,28 +213,15 @@ SDValue SITargetLowering::LowerFormalArguments(
 
 MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     MachineInstr * MI, MachineBasicBlock * BB) const {
-  MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
-  MachineBasicBlock::iterator I = MI;
 
   switch (MI->getOpcode()) {
   default:
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
   case AMDGPU::BRANCH: return BB;
-  case AMDGPU::SI_WQM:
-    LowerSI_WQM(MI, *BB, I, MRI);
-    break;
   }
   return BB;
 }
 
-void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
-    MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
-  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC);
-
-  MI->eraseFromParent();
-}
-
 EVT SITargetLowering::getSetCCResultType(EVT VT) const {
   return MVT::i1;
 }
@@ -247,6 +239,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::STORE: return LowerSTORE(Op, DAG);
   }
   return SDValue();
 }
@@ -345,6 +338,32 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
   return Chain;
 }
 
+#define RSRC_DATA_FORMAT 0xf00000000000
+
+SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Value = Op.getOperand(1);
+  SDValue VirtualAddress = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+
+  if (StoreNode->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) {
+    return SDValue();
+  }
+
+  SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
+                               DAG.getConstant(0, MVT::i64),
+			       DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64));
+
+  SDValue Ops[2];
+  Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain,
+                       Value, SrcSrc, VirtualAddress);
+  Ops[1] = Chain;
+
+  return DAG.getMergeValues(Ops, 2, DL);
+
+}
+
 SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -437,9 +456,12 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
     float F;
   } Imm;
 
-  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N))
+  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
+    if (Node->getZExtValue() >> 32) {
+        return -1;
+    }
     Imm.I = Node->getSExtValue();
-  else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+  } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
     Imm.F = Node->getValueAPF().convertToFloat();
   else
     return -1; // It isn't an immediate
@@ -497,22 +519,23 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op,
   MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 
   SDNode *Node = Op.getNode();
 
-  int OpClass;
+  const TargetRegisterClass *OpClass;
   if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) {
     const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode());
-    OpClass = Desc.OpInfo[Op.getResNo()].RegClass;
+    int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
+    if (OpClassID == -1)
+      OpClass = getRegClassFor(Op.getSimpleValueType());
+    else
+      OpClass = TRI->getRegClass(OpClassID);
 
   } else if (Node->getOpcode() == ISD::CopyFromReg) {
     RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode());
-    OpClass = MRI.getRegClass(Reg->getReg())->getID();
+    OpClass = MRI.getRegClass(Reg->getReg());
 
   } else
     return false;
 
-  if (OpClass == -1)
-    return false;
-
-  return TRI->getRegClass(RegClass)->hasSubClassEq(TRI->getRegClass(OpClass));
+  return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass);
 }
 
 /// \brief Make sure that we don't exeed the number of allowed scalars
@@ -546,8 +569,9 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
   Operand = SDValue(Node, 0);
 }
 
-SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
-                                          SelectionDAG &DAG) const {
+/// \brief Try to fold the Nodes operands into the Node
+SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
+                                       SelectionDAG &DAG) const {
 
   // Original encoding (either e32 or e64)
   int Opcode = Node->getMachineOpcode();
@@ -556,6 +580,13 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
   unsigned NumDefs = Desc->getNumDefs();
   unsigned NumOps = Desc->getNumOperands();
 
+  // Commuted opcode if available
+  int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
+  const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
+
+  assert(!DescRev || DescRev->getNumDefs() == NumDefs);
+  assert(!DescRev || DescRev->getNumOperands() == NumOps);
+
   // e64 version if available, -1 otherwise
   int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
   const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
@@ -608,41 +639,54 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
 
     // Is this a VSrc or SSrc operand ?
     unsigned RegClass = Desc->OpInfo[Op].RegClass;
-    if (!isVSrc(RegClass) && !isSSrc(RegClass)) {
+    if (isVSrc(RegClass) || isSSrc(RegClass)) {
+      // Try to fold the immediates
+      if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
+        // Folding didn't worked, make sure we don't hit the SReg limit
+        ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
+      }
+      continue;
+    }
+
+    if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
 
-      if (i == 1 && Desc->isCommutable() &&
-          fitsRegClass(DAG, Ops[0], RegClass) &&
-          foldImm(Ops[1], Immediate, ScalarSlotUsed)) {
+      unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
+      assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
 
-        assert(isVSrc(Desc->OpInfo[NumDefs].RegClass) ||
-               isSSrc(Desc->OpInfo[NumDefs].RegClass));
+      // Test if it makes sense to swap operands
+      if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
+          (!fitsRegClass(DAG, Ops[1], RegClass) &&
+           fitsRegClass(DAG, Ops[1], OtherRegClass))) {
 
         // Swap commutable operands
         SDValue Tmp = Ops[1];
         Ops[1] = Ops[0];
         Ops[0] = Tmp;
 
-      } else if (DescE64 && !Immediate) {
-        // Test if it makes sense to switch to e64 encoding
-
-        RegClass = DescE64->OpInfo[Op].RegClass;
-        int32_t TmpImm = -1;
-        if ((isVSrc(RegClass) || isSSrc(RegClass)) &&
-            foldImm(Ops[i], TmpImm, ScalarSlotUsed)) {
-
-          Immediate = -1;
-          Promote2e64 = true;
-          Desc = DescE64;
-          DescE64 = 0;
-        }
+        Desc = DescRev;
+        DescRev = 0;
+        continue;
       }
-      continue;
     }
 
-    // Try to fold the immediates
-    if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
-      // Folding didn't worked, make sure we don't hit the SReg limit
-      ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
+    if (DescE64 && !Immediate) {
+
+      // Test if it makes sense to switch to e64 encoding
+      unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
+      if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
+        continue;
+
+      int32_t TmpImm = -1;
+      if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
+          (!fitsRegClass(DAG, Ops[i], RegClass) &&
+           fitsRegClass(DAG, Ops[1], OtherRegClass))) {
+
+        // Switch to e64 encoding
+        Immediate = -1;
+        Promote2e64 = true;
+        Desc = DescE64;
+        DescE64 = 0;
+      }
     }
   }
 
@@ -656,10 +700,118 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
   for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
     Ops.push_back(Node->getOperand(i));
 
-  // Either create a complete new or update the current instruction
-  if (Promote2e64)
-    return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(),
-                              Node->getVTList(), Ops.data(), Ops.size());
-  else
-    return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+  // Create a complete new instruction
+  return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
+                            Node->getVTList(), Ops);
+}
+
+/// \brief Helper function for adjustWritemask
+unsigned SubIdx2Lane(unsigned Idx) {
+  switch (Idx) {
+  default: return 0;
+  case AMDGPU::sub0: return 0;
+  case AMDGPU::sub1: return 1;
+  case AMDGPU::sub2: return 2;
+  case AMDGPU::sub3: return 3;
+  }
+}
+
+/// \brief Adjust the writemask of MIMG instructions
+void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
+                                       SelectionDAG &DAG) const {
+  SDNode *Users[4] = { };
+  unsigned Writemask = 0, Lane = 0;
+
+  // Try to figure out the used register components
+  for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
+       I != E; ++I) {
+
+    // Abort if we can't understand the usage
+    if (!I->isMachineOpcode() ||
+        I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
+      return;
+
+    Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+
+    // Abort if we have more than one user per component
+    if (Users[Lane])
+      return;
+
+    Users[Lane] = *I;
+    Writemask |= 1 << Lane;
+  }
+
+  // Abort if all components are used
+  if (Writemask == 0xf)
+    return;
+
+  // Adjust the writemask in the node
+  std::vector<SDValue> Ops;
+  Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+    Ops.push_back(Node->getOperand(i));
+  Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+
+  // If we only got one lane, replace it with a copy
+  if (Writemask == (1U << Lane)) {
+    SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
+    SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                      DebugLoc(), MVT::f32,
+                                      SDValue(Node, 0), RC);
+    DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+    return;
+  }
+
+  // Update the users of the node with the new indices
+  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+
+    SDNode *User = Users[i];
+    if (!User)
+      continue;
+
+    SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
+    DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
+
+    switch (Idx) {
+    default: break;
+    case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
+    case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
+    case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+    }
+  }
+}
+
+/// \brief Fold the instructions after slecting them
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+                                          SelectionDAG &DAG) const {
+
+  if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
+    adjustWritemask(Node, DAG);
+
+  return foldOperands(Node, DAG);
+}
+
+/// \brief Assign the register class depending on the number of
+/// bits set in the writemask
+void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                                     SDNode *Node) const {
+  if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+    return;
+
+  unsigned VReg = MI->getOperand(0).getReg();
+  unsigned Writemask = MI->getOperand(1).getImm();
+  unsigned BitsSet = 0;
+  for (unsigned i = 0; i < 4; ++i)
+    BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+  const TargetRegisterClass *RC;
+  switch (BitsSet) {
+  default: return;
+  case 1:  RC = &AMDGPU::VReg_32RegClass; break;
+  case 2:  RC = &AMDGPU::VReg_64RegClass; break;
+  case 3:  RC = &AMDGPU::VReg_96RegClass; break;
+  }
+
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MRI.setRegClass(VReg, RC);
 }
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index d65622526f20..de637bea37b9 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -24,9 +24,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   const SIInstrInfo * TII;
   const TargetRegisterInfo * TRI;
 
-  void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
-              MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
-
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 
@@ -36,6 +34,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
   void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, 
                        unsigned RegClass, bool &ScalarSlotUsed) const;
 
+  SDNode *foldOperands(MachineSDNode *N, SelectionDAG &DAG) const;
+  void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
+
 public:
   SITargetLowering(TargetMachine &tm);
 
@@ -52,6 +53,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
   virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
   virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+  virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                             SDNode *Node) const;
 
   int32_t analyzeImmediate(const SDNode *N) const;
 };
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 3891ddb2dbe2..f737ddd28077 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -284,33 +284,33 @@ let Uses = [EXEC] in {
 class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
     Enc64<outs, ins, asm, pattern> {
 
-  bits<8> VDATA;
-  bits<12> OFFSET;
-  bits<1> OFFEN;
-  bits<1> IDXEN;
-  bits<1> GLC;
-  bits<1> ADDR64;
-  bits<1> LDS;
-  bits<8> VADDR;
-  bits<7> SRSRC;
-  bits<1> SLC;
-  bits<1> TFE;
-  bits<8> SOFFSET;
-
-  let Inst{11-0} = OFFSET;
-  let Inst{12} = OFFEN;
-  let Inst{13} = IDXEN;
-  let Inst{14} = GLC;
-  let Inst{15} = ADDR64;
-  let Inst{16} = LDS;
+  bits<12> offset;
+  bits<1> offen;
+  bits<1> idxen;
+  bits<1> glc;
+  bits<1> addr64;
+  bits<1> lds;
+  bits<8> vaddr;
+  bits<8> vdata;
+  bits<7> srsrc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
+
+  let Inst{11-0} = offset;
+  let Inst{12} = offen;
+  let Inst{13} = idxen;
+  let Inst{14} = glc;
+  let Inst{15} = addr64;
+  let Inst{16} = lds;
   let Inst{24-18} = op;
   let Inst{31-26} = 0x38; //encoding
-  let Inst{39-32} = VADDR;
-  let Inst{47-40} = VDATA;
-  let Inst{52-48} = SRSRC{6-2};
-  let Inst{54} = SLC;
-  let Inst{55} = TFE;
-  let Inst{63-56} = SOFFSET;
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{54} = slc;
+  let Inst{55} = tfe;
+  let Inst{63-56} = soffset;
 
   let VM_CNT = 1;
   let EXP_CNT = 1;
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index de2373b11a75..9a04c609b6c9 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
   };
 
+  const int16_t Sub0_2[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+  };
+
   const int16_t Sub0_1[] = {
     AMDGPU::sub0, AMDGPU::sub1, 0
   };
@@ -65,6 +69,26 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   unsigned Opcode;
   const int16_t *SubIndices;
 
+  if (AMDGPU::M0 == DestReg) {
+    // Check if M0 isn't already set to this value
+    for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
+      I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
+
+      if (!I->definesRegister(AMDGPU::M0))
+        continue;
+
+      unsigned Opc = I->getOpcode();
+      if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
+        break;
+
+      if (!I->readsRegister(SrcReg))
+        break;
+
+      // The copy isn't necessary
+      return;
+    }
+  }
+
   if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
     assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
@@ -105,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opcode = AMDGPU::V_MOV_B32_e32;
     SubIndices = Sub0_1;
 
+  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_2;
+
   } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
     assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
 	   AMDGPU::SReg_128RegClass.contains(SrcReg));
@@ -138,6 +167,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   }
 }
 
+unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+
+  int NewOpc;
+
+  // Try to map original to commuted opcode
+  if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+    return NewOpc;
+
+  // Try to map commuted to original opcode
+  if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+    return NewOpc;
+
+  return Opcode;
+}
+
 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
                                               bool NewMI) const {
 
@@ -145,7 +189,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
       !MI->getOperand(2).isReg())
     return 0;
 
-  return TargetInstrInfo::commuteInstruction(MI, NewMI);
+  MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+
+  if (MI)
+    MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+
+  return MI;
 }
 
 MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 5789af5d2116..87eff4d6c95c 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -35,6 +35,8 @@ class SIInstrInfo : public AMDGPUInstrInfo {
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const;
 
+  unsigned commuteOpcode(unsigned Opcode) const;
+
   virtual MachineInstr *commuteInstruction(MachineInstr *MI,
                                            bool NewMI=false) const;
 
@@ -76,6 +78,9 @@ class SIInstrInfo : public AMDGPUInstrInfo {
 namespace AMDGPU {
 
   int getVOPe64(uint16_t Opcode);
+  int getCommuteRev(uint16_t Opcode);
+  int getCommuteOrig(uint16_t Opcode);
+  int isMIMG(uint16_t Opcode);
 
 } // End namespace AMDGPU
 
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 2f10c388ad55..aafc331f25e7 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -26,6 +26,10 @@ def HI32 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
 }]>;
 
+def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE",
+                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                           [SDNPHasChain, SDNPMayStore]>;
+
 def IMM8bitDWORD : ImmLeaf <
   i32, [{
     return (Imm & ~0x3FC) == 0;
@@ -138,6 +142,11 @@ class VOP <string opName> {
   string OpName = opName;
 }
 
+class VOP2_REV <string revOp, bit isOrig> {
+  string RevOp = revOp;
+  bit IsOrig = isOrig;
+}
+
 multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
                         string opName, list<dag> pattern> {
 
@@ -166,11 +175,11 @@ multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
   : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
 
 multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
-                        string opName, list<dag> pattern> {
+                        string opName, list<dag> pattern, string revOp> {
   def _e32 : VOP2 <
     op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
     opName#"_e32 $dst, $src0, $src1", pattern
-  >, VOP <opName>;
+  >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
 
   def _e64 : VOP3 <
     {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@@ -179,23 +188,26 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
          i32imm:$abs, i32imm:$clamp,
          i32imm:$omod, i32imm:$neg),
     opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
-  >, VOP <opName> {
+  >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
     let SRC2 = SIOperand.ZERO;
   }
 }
 
-multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern>
-  : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
+                    string revOp = opName>
+  : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
 
-multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern>
-  : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
+                    string revOp = opName>
+  : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
 
-multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> {
+multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
+                     string revOp = opName> {
 
   def _e32 : VOP2 <
     op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
     opName#"_e32 $dst, $src0, $src1", pattern
-  >, VOP <opName>;
+  >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
 
   def _e64 : VOP3b <
     {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@@ -204,7 +216,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> {
          i32imm:$abs, i32imm:$clamp,
          i32imm:$omod, i32imm:$neg),
     opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
-  >, VOP <opName> {
+  >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
     let SRC2 = SIOperand.ZERO;
     /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
        can write it into any SGPR. We currently don't use the carry out,
@@ -247,14 +259,14 @@ multiclass VOPC_64 <bits<8> op, string opName,
 class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   op, (outs VReg_32:$dst),
   (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
-   i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
 >, VOP <opName>;
 
 class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   op, (outs VReg_64:$dst),
   (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
-   i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
 >, VOP <opName>;
 
@@ -277,17 +289,39 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
 
 class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
   op,
-  (outs regClass:$dst),
+  (outs regClass:$vdata),
   (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
        i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
        i1imm:$tfe, SSrc_32:$soffset),
-  asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
+  asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, "
      #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
   []> {
   let mayLoad = 1;
   let mayStore = 0;
 }
 
+class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
+                         ValueType VT> :
+    MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr),
+          name#" $vdata, $srsrc + $vaddr",
+          [(SIbuffer_store (VT vdataClass:$vdata), (i128 SReg_128:$srsrc),
+                                                    (i64 VReg_64:$vaddr))]> {
+
+  let mayLoad = 0;
+  let mayStore = 1;
+
+  // Encoding
+  let offset = 0;
+  let offen = 0;
+  let idxen = 0;
+  let glc = 0;
+  let addr64 = 1;
+  let lds = 0;
+  let slc = 0;
+  let tfe = 0;
+  let soffset = 128; // ZERO
+}
+
 class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
   op,
   (outs regClass:$dst),
@@ -305,13 +339,14 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
   op,
   (outs VReg_128:$vdata),
   (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
-       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
        SReg_256:$srsrc, SReg_128:$ssamp),
   asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
      #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
   []> {
   let mayLoad = 1;
   let mayStore = 0;
+  let hasPostISelHook = 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -327,4 +362,31 @@ def getVOPe64 : InstrMapping {
   let ValueCols = [["8"]];
 }
 
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
+// Maps an commuted opcode to its original version
+def getCommuteOrig : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+// Test if the supplied opcode is an MIMG instruction
+def isMIMG : InstrMapping {
+  let FilterClass = "MIMG_Load_Helper";
+  let RowFields = ["Inst"];
+  let ColFields = ["Size"];
+  let KeyCol = ["8"];
+  let ValueCols = [["8"]];
+}
+
 include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 05b04a92b312..9faf89b51f1a 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -408,8 +408,14 @@ def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2",
 def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
 //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
 //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
-//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
-//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+
+def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
+  0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32
+>;
+
+def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
+  0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64
+>;
 //def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
 //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
 //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
@@ -596,8 +602,8 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
 defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
   [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
 >;
-//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
 defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
   [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
 >;
@@ -804,13 +810,13 @@ let isCommutable = 1 in {
 defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
   [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
 >;
-} // End isCommutable = 1
 
 defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
   [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
 >;
+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
+} // End isCommutable = 1
 
-defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
 defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
 
 let isCommutable = 1 in {
@@ -848,18 +854,20 @@ defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
 defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
 defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
 
-} // End isCommutable = 1
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
+  [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
+
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
+  [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
 
-defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
-defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
-defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
-defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
 defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
   [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
 >;
-defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
-
-let isCommutable = 1 in {
+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
 
 defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
   [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
@@ -880,25 +888,24 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
 //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
 //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
 //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
-let Defs = [VCC] in { // Carry-out goes to VCC
 
-let isCommutable = 1 in {
+let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
 defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
   [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
 >;
-} // End isCommutable = 1
 
 defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
   [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
 >;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
 
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>;
 let Uses = [VCC] in { // Carry-out comes from VCC
 defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
 defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
 } // End Uses = [VCC]
-} // End Defs = [VCC]
+} // End isCommutable = 1, Defs = [VCC]
+
 defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
 ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
 ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
@@ -941,6 +948,7 @@ def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
 def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
 def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
 def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+defm : BFIPatterns <V_BFI_B32>;
 def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
 def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
 //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
@@ -971,14 +979,31 @@ def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
 def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
 def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
 def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+
+let isCommutable = 1 in {
+
 def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
 def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
 def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+} // isCommutable = 1
+
 def : Pat <
   (mul VSrc_32:$src0, VReg_32:$src1),
-  (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+  (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0))
 >;
-def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+def : Pat <
+  (mulhu VSrc_32:$src0, VReg_32:$src1),
+  (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0))
+>;
+
+def : Pat <
+  (mulhs VSrc_32:$src0, VReg_32:$src1),
+  (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0))
+>;
+
 def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
 def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
 def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
@@ -1067,17 +1092,6 @@ def LOAD_CONST : AMDGPUShaderInst <
   [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
 >;
 
-let usesCustomInserter = 1 in {
-
-def SI_WQM : InstSI <
-  (outs),
-  (ins),
-  "SI_WQM",
-  [(int_SI_wqm)]
->;
-
-} // end usesCustomInserter 
-
 // SI Psuedo instructions. These are used by the CFG structurizer pass
 // and should be lowered to ISA instructions prior to codegen.
 
@@ -1206,55 +1220,48 @@ def : Pat <
 
 /* int_SI_sample for simple 1D texture lookup */
 def : Pat <
-  (int_SI_sample imm:$writemask, (v1i32 VReg_32:$addr),
-                 SReg_256:$rsrc, SReg_128:$sampler, imm),
-  (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
-                (i32 (COPY_TO_REGCLASS VReg_32:$addr, VReg_32)),
+  (int_SI_sample VReg_32:$addr, SReg_256:$rsrc, SReg_128:$sampler, imm),
+  (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
                 SReg_256:$rsrc, SReg_128:$sampler)
 >;
 
 class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
                     ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
+    (name (addr_type addr_class:$addr),
           SReg_256:$rsrc, SReg_128:$sampler, imm),
-    (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
-          (EXTRACT_SUBREG addr_class:$addr, sub0),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
           SReg_256:$rsrc, SReg_128:$sampler)
 >;
 
 class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
                         ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
+    (name (addr_type addr_class:$addr),
           SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
-    (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0,
-          (EXTRACT_SUBREG addr_class:$addr, sub0),
+    (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
           SReg_256:$rsrc, SReg_128:$sampler)
 >;
 
 class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
                          ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
+    (name (addr_type addr_class:$addr),
           SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
-    (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0,
-          (EXTRACT_SUBREG addr_class:$addr, sub0),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
           SReg_256:$rsrc, SReg_128:$sampler)
 >;
 
 class SampleShadowPattern<Intrinsic name, MIMG opcode,
                           RegisterClass addr_class, ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
+    (name (addr_type addr_class:$addr),
           SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
-    (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
-          (EXTRACT_SUBREG addr_class:$addr, sub0),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
           SReg_256:$rsrc, SReg_128:$sampler)
 >;
 
 class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
                                RegisterClass addr_class, ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
+    (name (addr_type addr_class:$addr),
           SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
-    (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0,
-          (EXTRACT_SUBREG addr_class:$addr, sub0),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
           SReg_256:$rsrc, SReg_128:$sampler)
 >;
 
@@ -1442,8 +1449,7 @@ def : Pat <
 /********** ================== **********/
 
 /* llvm.AMDGPU.pow */
-/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>;
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
 
 def : Pat <
   (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
@@ -1470,20 +1476,20 @@ def : Pat <
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
     (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
                   (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub0),
+                  (EXTRACT_SUBREG VReg_128:$src, sub2)),
+                   sub0),
     (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
                   (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub1),
+                  (EXTRACT_SUBREG VReg_128:$src, sub2)),
+                   sub1),
     (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
                   (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub2),
+                  (EXTRACT_SUBREG VReg_128:$src, sub2)),
+                   sub2),
     (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
                   (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub3)
+                  (EXTRACT_SUBREG VReg_128:$src, sub2)),
+                   sub3)
 >;
 
 def : Pat <
@@ -1509,13 +1515,20 @@ def : Pat <
   (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
 >;
 
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+  (AMDGPUurecip i32:$src0),
+  (V_CVT_U32_F32_e32
+    (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
+                   (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
+>;
+
 /********** ================== **********/
 /**********   VOP3 Patterns    **********/
 /********** ================== **********/
 
 def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
-           (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
-            0, 0, 0, 0)>;
+           (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2)>;
 
 /********** ================== **********/
 /**********   SMRD Patterns    **********/
@@ -1590,4 +1603,18 @@ defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
 defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
 defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
 
+/********** =============== **********/
+/**********   Conditions    **********/
+/********** =============== **********/
+
+def : Pat<
+  (i1 (setcc f32:$src0, f32:$src1, SETO)),
+  (V_CMP_O_F32_e64 f32:$src0, f32:$src1)
+>;
+
+def : Pat<
+  (i1 (setcc f32:$src0, f32:$src1, SETUO)),
+  (V_CMP_U_F32_e64 f32:$src0, f32:$src1)
+>;
+
 } // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 33bb8157b436..16d9d812af99 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -16,11 +16,10 @@ let TargetPrefix = "SI", isTarget = 1 in {
 
   def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
-  def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
-  def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
-  def int_SI_wqm : Intrinsic <[], [], []>;
+  def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
 
-  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
+  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_SI_sample : Sample;
   def int_SI_sampleb : Sample;
@@ -28,8 +27,8 @@ let TargetPrefix = "SI", isTarget = 1 in {
 
   /* Interpolation Intrinsics */
 
-  def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
-  def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrReadMem]>;
+  def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
 
   /* Control flow Intrinsics */
 
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 9a027e77eb67..2b60eb9fb375 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -197,7 +197,8 @@ void SILowerControlFlowPass::Else(MachineInstr &MI) {
   unsigned Dst = MI.getOperand(0).getReg();
   unsigned Src = MI.getOperand(1).getReg();
 
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
+  BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+          TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
           .addReg(Src); // Saved EXEC
 
   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
@@ -409,6 +410,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
 bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
 
   bool HaveKill = false;
+  bool NeedWQM = false;
   unsigned Depth = 0;
 
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -478,9 +480,22 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
         case AMDGPU::SI_INDIRECT_DST_V16:
           IndirectDst(MI);
           break;
+
+        case AMDGPU::V_INTERP_P1_F32:
+        case AMDGPU::V_INTERP_P2_F32:
+        case AMDGPU::V_INTERP_MOV_F32:
+          NeedWQM = true;
+          break;
+
       }
     }
   }
 
+  if (NeedWQM) {
+    MachineBasicBlock &MBB = MF.front();
+    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+            AMDGPU::EXEC).addReg(AMDGPU::EXEC);
+  }
+
   return true;
 }
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index 1a4e4cbbbb01..ee0e30755f01 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,25 +10,9 @@
 
 
 #include "SIMachineFunctionInfo.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
 
 using namespace llvm;
 
-const char *SIMachineFunctionInfo::ShaderTypeAttribute = "ShaderType";
-
 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
-  : MachineFunctionInfo(),
-    ShaderType(0),
-    PSInputAddr(0) {
-
-  AttributeSet Set = MF.getFunction()->getAttributes();
-  Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
-                                 ShaderTypeAttribute);
-
-  if (A.isStringAttribute()) {
-    StringRef Str = A.getValueAsString();
-    if (Str.getAsInteger(0, ShaderType))
-      llvm_unreachable("Can't parse shader type!");
-  }
-}
+  : AMDGPUMachineFunction(MF),
+    PSInputAddr(0) { }
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 91a809b124a5..6da9f7f9a14d 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -15,18 +15,15 @@
 #ifndef SIMACHINEFUNCTIONINFO_H_
 #define SIMACHINEFUNCTIONINFO_H_
 
-#include "llvm/CodeGen/MachineFunction.h"
+#include "AMDGPUMachineFunction.h"
 
 namespace llvm {
 
 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 /// tells the hardware which interpolation parameters to load.
-class SIMachineFunctionInfo : public MachineFunctionInfo {
+class SIMachineFunctionInfo : public AMDGPUMachineFunction {
 public:
-  static const char *ShaderTypeAttribute;
-
   SIMachineFunctionInfo(const MachineFunction &MF);
-  unsigned ShaderType;
   unsigned PSInputAddr;
 };
 
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 88275c523f77..99278ae8dceb 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -30,6 +30,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                             MachineFunction &MF) const {
+  return RC->getNumRegs();
+}
+
 const TargetRegisterClass *
 SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
   switch (rc->getID()) {
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index 40171e4450e7..caec22841345 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -31,6 +31,9 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
 
   virtual BitVector getReservedRegs(const MachineFunction &MF) const;
 
+  virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                                       MachineFunction &MF) const;
+
   /// \param RC is an AMDIL reg class.
   ///
   /// \returns the SI register class that is equivalent to \p RC.
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 4f14931a9c48..244d4c00348d 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1],
                              [(add (trunc VGPR_32, 255)),
                               (add (shl VGPR_32, 1))]>;
 
+// VGPR 96-bit registers
+def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
+                             [(add (trunc VGPR_32, 254)),
+                              (add (shl VGPR_32, 1)),
+                              (add (shl VGPR_32, 2))]>;
+
 // VGPR 128-bit registers
 def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
                               [(add (trunc VGPR_32, 253)),
@@ -151,7 +157,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
   (add SGPR_64, VCCReg, EXECReg)
 >;
 
-def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>;
 
 def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
 
@@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
 
 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
 
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+  let Size = 96;
+}
+
 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
 
 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
new file mode 100644
index 000000000000..aac0e8d74a85
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
@@ -0,0 +1,62 @@
+//===-- SparcBaseInfo.h - Top level definitions for Sparc ---- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions
+// for the Sparc target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core code gen
+// types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCBASEINFO_H
+#define SPARCBASEINFO_H
+
+namespace llvm {
+
+/// SPII - This namespace holds target specific flags for instruction info.
+namespace SPII {
+
+/// Target Operand Flags. Sparc specific TargetFlags for MachineOperands and
+/// SDNodes.
+enum TOF {
+  MO_NO_FLAG,
+
+  // Extract the low 10 bits of an address.
+  // Assembler: %lo(addr)
+  MO_LO,
+
+  // Extract bits 31-10 of an address. Only for sethi.
+  // Assembler: %hi(addr) or %lm(addr)
+  MO_HI,
+
+  // Extract bits 43-22 of an adress. Only for sethi.
+  // Assembler: %h44(addr)
+  MO_H44,
+
+  // Extract bits 21-12 of an address.
+  // Assembler: %m44(addr)
+  MO_M44,
+
+  // Extract bits 11-0 of an address.
+  // Assembler: %l44(addr)
+  MO_L44,
+
+  // Extract bits 63-42 of an address. Only for sethi.
+  // Assembler: %hh(addr)
+  MO_HH,
+
+  // Extract bits 41-32 of an address.
+  // Assembler: %hm(addr)
+  MO_HM
+};
+
+} // end namespace SPII
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 7fdb0c39285a..1c64e1b0c4c0 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -50,14 +50,42 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
+// Code models. Some only make sense for 64-bit code.
+//
+// SunCC  Reloc   CodeModel  Constraints
+// abs32  Static  Small      text+data+bss linked below 2^32 bytes
+// abs44  Static  Medium     text+data+bss linked below 2^44 bytes
+// abs64  Static  Large      text smaller than 2^31 bytes
+// pic13  PIC_    Small      GOT < 2^13 bytes
+// pic32  PIC_    Medium     GOT < 2^32 bytes
+//
+// All code models require that the text segment is smaller than 2GB.
+
 static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // The default 32-bit code model is abs32/pic32.
+  if (CM == CodeModel::Default)
+    CM = RM == Reloc::PIC_ ? CodeModel::Medium : CodeModel::Small;
+
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
+static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // The default 64-bit code model is abs44/pic32.
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Medium;
+
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
 extern "C" void LLVMInitializeSparcTargetMC() {
   // Register the MC asm info.
   RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
@@ -67,7 +95,7 @@ extern "C" void LLVMInitializeSparcTargetMC() {
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
                                        createSparcMCCodeGenInfo);
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
-                                       createSparcMCCodeGenInfo);
+                                       createSparcV9MCCodeGenInfo);
 
   // Register the MC instruction info.
   TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index e14b3cbf161d..108eb9047903 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -72,15 +73,39 @@ namespace {
 void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand (opNum);
-  bool CloseParen = false;
-  if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
-    O << "%hi(";
-    CloseParen = true;
-  } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
-             !MO.isReg() && !MO.isImm()) {
-    O << "%lo(";
-    CloseParen = true;
+  unsigned TF = MO.getTargetFlags();
+#ifndef NDEBUG
+  // Verify the target flags.
+  if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
+    if (MI->getOpcode() == SP::CALL)
+      assert(TF == SPII::MO_NO_FLAG &&
+             "Cannot handle target flags on call address");
+    else if (MI->getOpcode() == SP::SETHIi)
+      assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) &&
+             "Invalid target flags for address operand on sethi");
+    else
+      assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 ||
+              TF == SPII::MO_HM) &&
+             "Invalid target flags for small address operand");
   }
+#endif
+
+  bool CloseParen = true;
+  switch (TF) {
+  default:
+      llvm_unreachable("Unknown target flags on operand");
+  case SPII::MO_NO_FLAG:
+    CloseParen = false;
+    break;
+  case SPII::MO_LO:  O << "%lo(";  break;
+  case SPII::MO_HI:  O << "%hi(";  break;
+  case SPII::MO_H44: O << "%h44("; break;
+  case SPII::MO_M44: O << "%m44("; break;
+  case SPII::MO_L44: O << "%l44("; break;
+  case SPII::MO_HH:  O << "%hh(";  break;
+  case SPII::MO_HM:  O << "%hm(";  break;
+  }
+
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
     O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
@@ -127,14 +152,7 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
     return;   // don't print "+0"
 
   O << "+";
-  if (MI->getOperand(opNum+1).isGlobal() ||
-      MI->getOperand(opNum+1).isCPI()) {
-    O << "%lo(";
-    printOperand(MI, opNum+1, O);
-    O << ")";
-  } else {
-    printOperand(MI, opNum+1, O);
-  }
+  printOperand(MI, opNum+1, O);
 }
 
 bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index d4712208126f..54784e018834 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -12,17 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
+// SPARC v8 32-bit.
 //===----------------------------------------------------------------------===//
 
-// Sparc 32-bit C return-value convention.
-def RetCC_Sparc32 : CallingConv<[
-  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
-  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
-]>;
-
-// Sparc 32-bit C Calling convention.
 def CC_Sparc32 : CallingConv<[
   //Custom assign SRet to [sp+64].
   CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
@@ -34,3 +26,94 @@ def CC_Sparc32 : CallingConv<[
   // Alternatively, they are assigned to the stack in 4-byte aligned units.
   CCAssignToStack<4, 4>
 ]>;
+
+def RetCC_Sparc32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// SPARC v9 64-bit.
+//===----------------------------------------------------------------------===//
+//
+// The 64-bit ABI conceptually assigns all function arguments to a parameter
+// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments
+// occupy a multiple of 8 bytes in the array. Integer arguments are extended to
+// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the
+// first 4 bytes in the slot are undefined.
+//
+// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter
+// array at fixed offsets. Integer arguments are promoted to registers when
+// possible.
+//
+// The floating point registers %f0 to %f31 shadow the first 128 bytes of the
+// parameter array at fixed offsets. Float and double parameters are promoted
+// to these registers when possible.
+//
+// Structs up to 16 bytes in size are passed by value. They are right-aligned
+// in one or two 8-byte slots in the parameter array. Struct members are
+// promoted to both floating point and integer registers when possible. A
+// struct containing two floats would thus be passed in %f0 and %f1, while two
+// float function arguments would occupy 8 bytes each, and be passed in %f1 and
+// %f3.
+//
+// When a struct { int, float } is passed by value, the int goes in the high
+// bits of an integer register while the float goes in a floating point
+// register.
+//
+// The difference is encoded in LLVM IR using the inreg atttribute on function
+// arguments:
+//
+//   C:   void f(float, float);
+//   IR:  declare void f(float %f1, float %f3)
+//
+//   C:   void f(struct { float f0, f1; });
+//   IR:  declare void f(float inreg %f0, float inreg %f1)
+//
+//   C:   void f(int, float);
+//   IR:  declare void f(int signext %i0, float %f3)
+//
+//   C:   void f(struct { int i0high; float f1; });
+//   IR:  declare void f(i32 inreg %i0high, float inreg %f1)
+//
+// Two ints in a struct are simply coerced to i64:
+//
+//   C:   void f(struct { int i0high, i0low; });
+//   IR:  declare void f(i64 %i0.coerced)
+//
+// The frontend and backend divide the task of producing ABI compliant code for
+// C functions. The C frontend will:
+//
+//  - Annotate integer arguments with zeroext or signext attributes.
+//
+//  - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with
+//    inreg attributes.
+//
+//  - Pass structs larger than 16 bytes indirectly with an explicit pointer
+//    argument. The byval attribute is not used.
+//
+// The backend will:
+//
+//  - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg.
+//
+//  - Promote to integer or floating point registers depending on type.
+//
+// Function return values are passed exactly like function arguments, except a
+// struct up to 32 bytes in size can be returned in registers.
+
+// Function arguments AND return values.
+def CC_Sparc64 : CallingConv<[
+  // The frontend uses the inreg flag to indicate i32 and float arguments from
+  // structs. These arguments are not promoted to 64 bits, but they can still
+  // be assigned to integer and float registers.
+  CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>,
+
+  // All integers are promoted to i64 by the caller.
+  CCIfType<[i32], CCPromoteToType<i64>>,
+
+  // Custom assignment is required because stack space is reserved for all
+  // arguments whether they are passed in registers or not.
+  CCCustom<"CC_Sparc64_Full">
+]>;
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index a0dae6e9480c..7874240f5983 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -37,18 +37,27 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Get the number of bytes to allocate from the FrameInfo
   int NumBytes = (int) MFI->getStackSize();
 
-  // Emit the correct save instruction based on the number of bytes in
-  // the frame. Minimum stack frame size according to V8 ABI is:
-  //   16 words for register window spill
-  //    1 word for address of returned aggregate-value
-  // +  6 words for passing parameters on the stack
-  // ----------
-  //   23 words * 4 bytes per word = 92 bytes
-  NumBytes += 92;
+  if (SubTarget.is64Bit()) {
+    // All 64-bit stack frames must be 16-byte aligned, and must reserve space
+    // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128.
+    NumBytes += 128;
+    // Frames with calls must also reserve space for 6 outgoing arguments
+    // whether they are used or not. LowerCall_64 takes care of that.
+    assert(NumBytes % 16 == 0 && "Stack size not 16-byte aligned");
+  } else {
+    // Emit the correct save instruction based on the number of bytes in
+    // the frame. Minimum stack frame size according to V8 ABI is:
+    //   16 words for register window spill
+    //    1 word for address of returned aggregate-value
+    // +  6 words for passing parameters on the stack
+    // ----------
+    //   23 words * 4 bytes per word = 92 bytes
+    NumBytes += 92;
 
-  // Round up to next doubleword boundary -- a double-word boundary
-  // is required by the ABI.
-  NumBytes = (NumBytes + 7) & ~7;
+    // Round up to next doubleword boundary -- a double-word boundary
+    // is required by the ABI.
+    NumBytes = RoundUpToAlignment(NumBytes, 8);
+  }
   NumBytes = -NumBytes;
 
   if (NumBytes >= -4096) {
@@ -70,15 +79,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
 void SparcFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  MachineInstr &MI = *I;
-  DebugLoc dl = MI.getDebugLoc();
-  int Size = MI.getOperand(0).getImm();
-  if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
-    Size = -Size;
-  const SparcInstrInfo &TII =
-    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
-  if (Size)
-    BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+  if (!hasReservedCallFrame(MF)) {
+    MachineInstr &MI = *I;
+    DebugLoc DL = MI.getDebugLoc();
+    int Size = MI.getOperand(0).getImm();
+    if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+      Size = -Size;
+    const SparcInstrInfo &TII =
+      *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+    if (Size)
+      BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
+        .addImm(Size);
+  }
   MBB.erase(I);
 }
 
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 464233e7da35..c37566201613 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -22,10 +22,12 @@ namespace llvm {
   class SparcSubtarget;
 
 class SparcFrameLowering : public TargetFrameLowering {
+  const SparcSubtarget &SubTarget;
 public:
-  explicit SparcFrameLowering(const SparcSubtarget &/*sti*/)
-    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {
-  }
+  explicit SparcFrameLowering(const SparcSubtarget &ST)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+                          ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8),
+      SubTarget(ST) {}
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 5fa545d30160..a709685cd08c 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -73,7 +73,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
 bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI.getPointerTy());
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
@@ -87,7 +87,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
         if (FrameIndexSDNode *FIN =
                 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
           // Constant offset from frame ref.
-          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
+                                             TLI.getPointerTy());
         } else {
           Base = Addr.getOperand(0);
         }
@@ -130,7 +131,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   }
 
   R1 = Addr;
-  R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+  R2 = CurDAG->getRegister(SP::G0, TLI.getPointerTy());
   return true;
 }
 
@@ -146,6 +147,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
 
   case ISD::SDIV:
   case ISD::UDIV: {
+    // sdivx / udivx handle 64-bit divides.
+    if (N->getValueType(0) == MVT::i64)
+      break;
     // FIXME: should use a custom expander to expose the SRA to the dag.
     SDValue DivLHS = N->getOperand(0);
     SDValue DivRHS = N->getOperand(1);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 28ac02a613ee..49d68c704c1f 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SparcISelLowering.h"
 #include "SparcMachineFunctionInfo.h"
 #include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -74,25 +75,117 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
+// Allocate a full-sized argument for the 64-bit ABI.
+static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
+                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert((LocVT == MVT::f32 || LocVT.getSizeInBits() == 64) &&
+         "Can't handle non-64 bits locations");
+
+  // Stack space is allocated for all arguments starting from [%fp+BIAS+128].
+  unsigned Offset = State.AllocateStack(8, 8);
+  unsigned Reg = 0;
+
+  if (LocVT == MVT::i64 && Offset < 6*8)
+    // Promote integers to %i0-%i5.
+    Reg = SP::I0 + Offset/8;
+  else if (LocVT == MVT::f64 && Offset < 16*8)
+    // Promote doubles to %d0-%d30. (Which LLVM calls D0-D15).
+    Reg = SP::D0 + Offset/8;
+  else if (LocVT == MVT::f32 && Offset < 16*8)
+    // Promote floats to %f1, %f3, ...
+    Reg = SP::F1 + Offset/4;
+
+  // Promote to register when possible, otherwise use the stack slot.
+  if (Reg) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  // This argument goes on the stack in an 8-byte slot.
+  // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to
+  // the right-aligned float. The first 4 bytes of the stack slot are undefined.
+  if (LocVT == MVT::f32)
+    Offset += 4;
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
+// Allocate a half-sized argument for the 64-bit ABI.
+//
+// This is used when passing { float, int } structs by value in registers.
+static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
+                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
+  unsigned Offset = State.AllocateStack(4, 4);
+
+  if (LocVT == MVT::f32 && Offset < 16*8) {
+    // Promote floats to %f0-%f31.
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4,
+                                     LocVT, LocInfo));
+    return true;
+  }
+
+  if (LocVT == MVT::i32 && Offset < 6*8) {
+    // Promote integers to %i0-%i5, using half the register.
+    unsigned Reg = SP::I0 + Offset/8;
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::AExt;
+
+    // Set the Custom bit if this i32 goes in the high bits of a register.
+    if (Offset % 8 == 0)
+      State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg,
+                                             LocVT, LocInfo));
+    else
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
 #include "SparcGenCallingConv.inc"
 
+// The calling conventions in SparcCallingConv.td are described in terms of the
+// callee's register window. This function translates registers to the
+// corresponding caller window %o register.
+static unsigned toCallerWindow(unsigned Reg) {
+  assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7 && "Unexpected enum");
+  if (Reg >= SP::I0 && Reg <= SP::I7)
+    return Reg - SP::I0 + SP::O0;
+  return Reg;
+}
+
 SDValue
 SparcTargetLowering::LowerReturn(SDValue Chain,
-                                 CallingConv::ID CallConv, bool isVarArg,
+                                 CallingConv::ID CallConv, bool IsVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
                                  const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
+                                 DebugLoc DL, SelectionDAG &DAG) const {
+  if (Subtarget->is64Bit())
+    return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+  return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+}
 
+SDValue
+SparcTargetLowering::LowerReturn_32(SDValue Chain,
+                                    CallingConv::ID CallConv, bool IsVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    DebugLoc DL, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
                  DAG.getTarget(), RVLocs, *DAG.getContext());
 
-  // Analize return values.
+  // Analyze return values.
   CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
 
   SDValue Flag;
@@ -105,7 +198,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
                              OutVals[i], Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
@@ -120,8 +213,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     unsigned Reg = SFI->getSRetReturnReg();
     if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
-    Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+    Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
     RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
@@ -134,22 +227,114 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
   if (Flag.getNode())
     RetOps.push_back(Flag);
 
-  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+  return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
                      &RetOps[0], RetOps.size());
 }
 
-/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
-/// passed in either one or two GPRs, including FP values.  TODO: we should
-/// pass FP values in FP registers for fastcc functions.
+// Lower return values for the 64-bit ABI.
+// Return values are passed the exactly the same way as function arguments.
 SDValue
-SparcTargetLowering::LowerFormalArguments(SDValue Chain,
-                                          CallingConv::ID CallConv, bool isVarArg,
-                                          const SmallVectorImpl<ISD::InputArg>
-                                            &Ins,
-                                          DebugLoc dl, SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals)
-                                            const {
+SparcTargetLowering::LowerReturn_64(SDValue Chain,
+                                    CallingConv::ID CallConv, bool IsVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    DebugLoc DL, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), RVLocs, *DAG.getContext());
+
+  // Analyze return values.
+  CCInfo.AnalyzeReturn(Outs, CC_Sparc64);
+
+  SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // The second operand on the return instruction is the return address offset.
+  // The return address is always %i7+8 with the 64-bit ABI.
+  RetOps.push_back(DAG.getConstant(8, MVT::i32));
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue OutVal = OutVals[i];
+
+    // Integer return values must be sign or zero extended by the callee.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::ZExt:
+      OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::AExt:
+      OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
+    default:
+      break;
+    }
+
+    // The custom bit on an i32 return value indicates that it should be passed
+    // in the high bits of the register.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+      OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal,
+                           DAG.getConstant(32, MVT::i32));
+
+      // The next value may go in the low bits of the same register.
+      // Handle both at once.
+      if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) {
+        SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]);
+        OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV);
+        // Skip the next value, it's already done.
+        ++i;
+      }
+    }
+
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
 
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
+                     &RetOps[0], RetOps.size());
+}
+
+SDValue SparcTargetLowering::
+LowerFormalArguments(SDValue Chain,
+                     CallingConv::ID CallConv,
+                     bool IsVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc DL,
+                     SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget->is64Bit())
+    return LowerFormalArguments_64(Chain, CallConv, IsVarArg, Ins,
+                                   DL, DAG, InVals);
+  return LowerFormalArguments_32(Chain, CallConv, IsVarArg, Ins,
+                                 DL, DAG, InVals);
+}
+
+/// LowerFormalArguments32 - V8 uses a very simple ABI, where all values are
+/// passed in either one or two GPRs, including FP values.  TODO: we should
+/// pass FP values in FP registers for fastcc functions.
+SDValue SparcTargetLowering::
+LowerFormalArguments_32(SDValue Chain,
+                        CallingConv::ID CallConv,
+                        bool isVarArg,
+                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                        DebugLoc dl,
+                        SelectionDAG &DAG,
+                        SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
@@ -341,9 +526,94 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
   return Chain;
 }
 
+// Lower formal arguments for the 64 bit ABI.
+SDValue SparcTargetLowering::
+LowerFormalArguments_64(SDValue Chain,
+                        CallingConv::ID CallConv,
+                        bool IsVarArg,
+                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                        DebugLoc DL,
+                        SelectionDAG &DAG,
+                        SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Analyze arguments according to CC_Sparc64.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    if (VA.isRegLoc()) {
+      // This argument is passed in a register.
+      // All integer register arguments are promoted by the caller to i64.
+
+      // Create a virtual register for the promoted live-in value.
+      unsigned VReg = MF.addLiveIn(VA.getLocReg(),
+                                   getRegClassFor(VA.getLocVT()));
+      SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
+
+      // Get the high bits for i32 struct elements.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+        Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
+                          DAG.getConstant(32, MVT::i32));
+
+      // The caller promoted the argument, so insert an Assert?ext SDNode so we
+      // won't promote the value again in this function.
+      switch (VA.getLocInfo()) {
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
+                          DAG.getValueType(VA.getValVT()));
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
+                          DAG.getValueType(VA.getValVT()));
+        break;
+      default:
+        break;
+      }
+
+      // Truncate the register down to the argument type.
+      if (VA.isExtInLoc())
+        Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
+
+      InVals.push_back(Arg);
+      continue;
+    }
+
+    // The registers are exhausted. This argument was passed on the stack.
+    assert(VA.isMemLoc());
+    // The CC_Sparc64_Full/Half functions compute stack offsets relative to the
+    // beginning of the arguments area at %fp+BIAS+128.
+    unsigned Offset = VA.getLocMemOffset() + 128;
+    unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+    // Adjust offset for extended arguments, SPARC is big-endian.
+    // The caller will have written the full slot with extended bytes, but we
+    // prefer our own extending loads.
+    if (VA.isExtInLoc())
+      Offset += 8 - ValSize;
+    int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true);
+    InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain,
+                                 DAG.getFrameIndex(FI, getPointerTy()),
+                                 MachinePointerInfo::getFixedStack(FI),
+                                 false, false, false, 0));
+  }
+  return Chain;
+}
+
 SDValue
 SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget->is64Bit())
+    return LowerCall_64(CLI, InVals);
+  return LowerCall_32(CLI, InVals);
+}
+
+// Lower a call for the 32-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+                                  SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG                     = CLI.DAG;
   DebugLoc &dl                          = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
@@ -546,11 +816,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    unsigned Reg = RegsToPass[i].first;
-    // Remap I0->I7 -> O0->O7.
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
+    unsigned Reg = toCallerWindow(RegsToPass[i].first);
     Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
@@ -572,13 +838,9 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   Ops.push_back(Callee);
   if (hasStructRetAttr)
     Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    unsigned Reg = RegsToPass[i].first;
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
-    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
-  }
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
+                                  RegsToPass[i].second.getValueType()));
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
@@ -598,13 +860,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    unsigned Reg = RVLocs[i].getLocReg();
-
-    // Remap I0->I7 -> O0->O7.
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
-    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+    Chain = DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()),
                                RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
     InVals.push_back(Chain.getValue(0));
@@ -637,6 +893,212 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
   return getDataLayout()->getTypeAllocSize(ElementTy);
 }
 
+// Lower a call for the 64-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+                                  SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  DebugLoc DL = CLI.DL;
+  SDValue Chain = CLI.Chain;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  // The stack offset computed by CC_Sparc64 includes all arguments.
+  // Called functions expect 6 argument words to exist in the stack frame, used
+  // or not.
+  unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());
+
+  // Keep stack frames 16-byte aligned.
+  ArgsSize = RoundUpToAlignment(ArgsSize, 16);
+
+  // Adjust the stack pointer to make room for the arguments.
+  // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+  // with more than 6 arguments.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+  // Collect the set of registers to pass to the function and their values.
+  // This will be emitted as a sequence of CopyToReg nodes glued to the call
+  // instruction.
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  // Collect chains from all the memory opeations that copy arguments to the
+  // stack. They must follow the stack pointer adjustment above and precede the
+  // call instruction itself.
+  SmallVector<SDValue, 8> MemOpChains;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    const CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = CLI.OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown location info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      // The custom bit on an i32 return value indicates that it should be
+      // passed in the high bits of the register.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+        Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
+                          DAG.getConstant(32, MVT::i32));
+
+        // The next value may go in the low bits of the same register.
+        // Handle both at once.
+        if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() &&
+            ArgLocs[i+1].getLocReg() == VA.getLocReg()) {
+          SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
+                                   CLI.OutVals[i+1]);
+          Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV);
+          // Skip the next value, it's already done.
+          ++i;
+        }
+      }
+      RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+    // The argument area starts at %fp+BIAS+128 in the callee frame,
+    // %sp+BIAS+128 in ours.
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
+                                           Subtarget->getStackPointerBias() +
+                                           128);
+    PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+    MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+  }
+
+  // Emit all stores, make sure they occur before the call.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of CopyToReg nodes glued together with token chain and
+  // glue operands which copy the outgoing args into registers. The InGlue is
+  // necessary since all emitted instructions must be stuck together in order
+  // to pass the live physical registers.
+  SDValue InGlue;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, DL,
+                             RegsToPass[i].first, RegsToPass[i].second, InGlue);
+    InGlue = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  SDValue Callee = CLI.Callee;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy());
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+  // Build the operands for the call instruction itself.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // Make sure the CopyToReg nodes are glued to the call instruction which
+  // consumes the registers.
+  if (InGlue.getNode())
+    Ops.push_back(InGlue);
+
+  // Now the call itself.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+  InGlue = Chain.getValue(1);
+
+  // Revert the stack pointer immediately after the call.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InGlue);
+  InGlue = Chain.getValue(1);
+
+  // Now extract the return values. This is more or less the same as
+  // LowerFormalArguments_64.
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), RVLocs, *DAG.getContext());
+  RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    unsigned Reg = toCallerWindow(VA.getLocReg());
+
+    // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+    // reside in the same register in the high and low bits. Reuse the
+    // CopyFromReg previous node to avoid duplicate copies.
+    SDValue RV;
+    if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+      if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+        RV = Chain.getValue(0);
+
+    // But usually we'll create a new CopyFromReg for a different register.
+    if (!RV.getNode()) {
+      RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+      Chain = RV.getValue(1);
+      InGlue = Chain.getValue(2);
+    }
+
+    // Get the high bits for i32 struct elements.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+      RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+                       DAG.getConstant(32, MVT::i32));
+
+    // The callee promoted the return value, so insert an Assert?ext SDNode so
+    // we won't promote the value again in this function.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate the register down to the return value type.
+    if (VA.isExtInLoc())
+      RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+    InVals.push_back(RV);
+  }
+
+  return Chain;
+}
+
 //===----------------------------------------------------------------------===//
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
@@ -689,11 +1151,14 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
 
 SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  Subtarget = &TM.getSubtarget<SparcSubtarget>();
 
   // Set up the register classes.
   addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
   addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
   addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
+  if (Subtarget->is64Bit())
+    addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
 
   // Turn FP extload into load/fextend
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
@@ -703,9 +1168,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
   // Custom legalize GlobalAddress nodes into LO/HI parts.
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-  setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::ConstantPool, getPointerTy(), Custom);
 
   // Sparc doesn't have sext_inreg, replace them with shl/sra
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -749,6 +1214,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+    setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+  }
+
   // FIXME: There are instructions available for ATOMIC_FENCE
   // on SparcV8 and later.
   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
@@ -818,8 +1288,10 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::CMPICC:     return "SPISD::CMPICC";
   case SPISD::CMPFCC:     return "SPISD::CMPFCC";
   case SPISD::BRICC:      return "SPISD::BRICC";
+  case SPISD::BRXCC:      return "SPISD::BRXCC";
   case SPISD::BRFCC:      return "SPISD::BRFCC";
   case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+  case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
   case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
   case SPISD::Hi:         return "SPISD::Hi";
   case SPISD::Lo:         return "SPISD::Lo";
@@ -846,6 +1318,7 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   switch (Op.getOpcode()) {
   default: break;
   case SPISD::SELECT_ICC:
+  case SPISD::SELECT_XCC:
   case SPISD::SELECT_FCC:
     DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
     DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
@@ -866,7 +1339,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   if (isa<ConstantSDNode>(RHS) &&
       cast<ConstantSDNode>(RHS)->isNullValue() &&
       CC == ISD::SETNE &&
-      ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+      (((LHS.getOpcode() == SPISD::SELECT_ICC ||
+         LHS.getOpcode() == SPISD::SELECT_XCC) &&
         LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
        (LHS.getOpcode() == SPISD::SELECT_FCC &&
         LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
@@ -881,46 +1355,89 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   }
 }
 
+// Convert to a target node and set target flags.
+SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF,
+                                             SelectionDAG &DAG) const {
+  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
+    return DAG.getTargetGlobalAddress(GA->getGlobal(),
+                                      GA->getDebugLoc(),
+                                      GA->getValueType(0),
+                                      GA->getOffset(), TF);
+
+  if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
+    return DAG.getTargetConstantPool(CP->getConstVal(),
+                                     CP->getValueType(0),
+                                     CP->getAlignment(),
+                                     CP->getOffset(), TF);
+
+  if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
+    return DAG.getTargetExternalSymbol(ES->getSymbol(),
+                                       ES->getValueType(0), TF);
+
+  llvm_unreachable("Unhandled address SDNode");
+}
+
+// Split Op into high and low parts according to HiTF and LoTF.
+// Return an ADD node combining the parts.
+SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
+                                          unsigned HiTF, unsigned LoTF,
+                                          SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
+  SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
+  return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+}
+
+// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
+// or ExternalSymbol SDNode.
+SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = getPointerTy();
+
+  // Handle PIC mode first.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    // This is the pic32 code model, the GOT is known to be smaller than 4GB.
+    SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+    SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
+    SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo);
+    return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
+                       MachinePointerInfo::getGOT(), false, false, false, 0);
+  }
+
+  // This is one of the absolute code models.
+  switch(getTargetMachine().getCodeModel()) {
+  default:
+    llvm_unreachable("Unsupported absolute code model");
+  case CodeModel::Small:
+    // abs32.
+    return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+  case CodeModel::Medium: {
+    // abs44.
+    SDValue H44 = makeHiLoPair(Op, SPII::MO_H44, SPII::MO_M44, DAG);
+    H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, MVT::i32));
+    SDValue L44 = withTargetFlags(Op, SPII::MO_L44, DAG);
+    L44 = DAG.getNode(SPISD::Lo, DL, VT, L44);
+    return DAG.getNode(ISD::ADD, DL, VT, H44, L44);
+  }
+  case CodeModel::Large: {
+    // abs64.
+    SDValue Hi = makeHiLoPair(Op, SPII::MO_HH, SPII::MO_HM, DAG);
+    Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, MVT::i32));
+    SDValue Lo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+    return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+  }
+  }
+}
+
 SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
-  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
-  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
-
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
-                                   getPointerTy());
-  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
+  return makeAddress(Op, DAG);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
                                                SelectionDAG &DAG) const {
-  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  const Constant *C = N->getConstVal();
-  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
-  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
-  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
-                                   getPointerTy());
-  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
+  return makeAddress(Op, DAG);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -954,12 +1471,13 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
 
   // Get the condition flag.
   SDValue CompareFlag;
-  if (LHS.getValueType() == MVT::i32) {
-    EVT VTs[] = { MVT::i32, MVT::Glue };
+  if (LHS.getValueType().isInteger()) {
+    EVT VTs[] = { LHS.getValueType(), MVT::Glue };
     SDValue Ops[2] = { LHS, RHS };
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
-    Opc = SPISD::BRICC;
+    // 32-bit compares use the icc flags, 64-bit uses the xcc flags.
+    Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
   } else {
     CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
     if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
@@ -983,12 +1501,13 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   LookThroughSetCC(LHS, RHS, CC, SPCC);
 
   SDValue CompareFlag;
-  if (LHS.getValueType() == MVT::i32) {
+  if (LHS.getValueType().isInteger()) {
     // subcc returns a value
     EVT VTs[] = { LHS.getValueType(), MVT::Glue };
     SDValue Ops[2] = { LHS, RHS };
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
-    Opc = SPISD::SELECT_ICC;
+    Opc = LHS.getValueType() == MVT::i32 ?
+          SPISD::SELECT_ICC : SPISD::SELECT_XCC;
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
   } else {
     CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 09148ea54027..fd706bebd415 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -19,14 +19,18 @@
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
+  class SparcSubtarget;
+
   namespace SPISD {
     enum {
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
-      CMPICC,      // Compare two GPR operands, set icc.
+      CMPICC,      // Compare two GPR operands, set icc+xcc.
       CMPFCC,      // Compare two FP operands, set fcc.
       BRICC,       // Branch to dest on icc condition
+      BRXCC,       // Branch to dest on xcc condition (64-bit only).
       BRFCC,       // Branch to dest on fcc condition
       SELECT_ICC,  // Select between two values using the current ICC flags.
+      SELECT_XCC,  // Select between two values using the current XCC flags.
       SELECT_FCC,  // Select between two values using the current FCC flags.
 
       Hi, Lo,      // Hi/Lo operations, typically on a global address.
@@ -42,6 +46,7 @@ namespace llvm {
   }
 
   class SparcTargetLowering : public TargetLowering {
+    const SparcSubtarget *Subtarget;
   public:
     SparcTargetLowering(TargetMachine &TM);
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -66,6 +71,7 @@ namespace llvm {
     getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
@@ -74,10 +80,26 @@ namespace llvm {
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerFormalArguments_32(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerFormalArguments_64(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(TargetLowering::CallLoweringInfo &CLI,
                 SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+                         SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+                         SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
@@ -85,11 +107,25 @@ namespace llvm {
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
+    SDValue LowerReturn_32(SDValue Chain,
+                           CallingConv::ID CallConv, bool IsVarArg,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           DebugLoc DL, SelectionDAG &DAG) const;
+    SDValue LowerReturn_64(SDValue Chain,
+                           CallingConv::ID CallConv, bool IsVarArg,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           DebugLoc DL, SelectionDAG &DAG) const;
 
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
 
     unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+    SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
+    SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+                         SelectionDAG &DAG) const;
+    SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
new file mode 100644
index 000000000000..91805f9f11b5
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -0,0 +1,333 @@
+//===-- SparcInstr64Bit.td - 64-bit instructions for Sparc Target ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction definitions and patterns needed for 64-bit
+// code generation on SPARC v9.
+//
+// Some SPARC v9 instructions are defined in SparcInstrInfo.td because they can
+// also be used in 32-bit code running on a SPARC v9 CPU.
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+// The same integer registers are used for i32 and i64 values.
+// When registers hold i32 values, the high bits are don't care.
+// This give us free trunc and anyext.
+def : Pat<(i64 (anyext i32:$val)), (COPY_TO_REGCLASS $val, I64Regs)>;
+def : Pat<(i32 (trunc i64:$val)), (COPY_TO_REGCLASS $val, IntRegs)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Shift Instructions.
+//===----------------------------------------------------------------------===//
+//
+// The 32-bit shift instructions are still available. The left shift srl
+// instructions shift all 64 bits, but it only accepts a 5-bit shift amount.
+//
+// The srl instructions only shift the low 32 bits and clear the high 32 bits.
+// Finally, sra shifts the low 32 bits and sign-extends to 64 bits.
+
+let Predicates = [Is64Bit] in {
+
+def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
+
+def : Pat<(i64 (and i64:$val, 0xffffffff)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext_inreg i64:$val, i32)), (SRAri $val, 0)>;
+
+defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
+defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
+defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Immediates.
+//===----------------------------------------------------------------------===//
+//
+// All 32-bit immediates can be materialized with sethi+or, but 64-bit
+// immediates may require more code. There may be a point where it is
+// preferable to use a constant pool load instead, depending on the
+// microarchitecture.
+
+// The %g0 register is constant 0.
+// This is useful for stx %g0, [...], for example.
+def : Pat<(i64 0), (i64 G0)>, Requires<[Is64Bit]>;
+
+// Single-instruction patterns.
+
+// The ALU instructions want their simm13 operands as i32 immediates.
+def as_i32imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
+}]>;
+def : Pat<(i64 simm13:$val), (ORri (i64 G0), (as_i32imm $val))>;
+def : Pat<(i64 SETHIimm:$val), (SETHIi (HI22 $val))>;
+
+// Double-instruction patterns.
+
+// All unsigned i32 immediates can be handled by sethi+or.
+def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
+def : Pat<(i64 uimm32:$val), (ORri (SETHIi (HI22 $val)), (LO10 $val))>,
+      Requires<[Is64Bit]>;
+
+// All negative i33 immediates can be handled by sethi+xor.
+def nimm33 : PatLeaf<(imm), [{
+  int64_t Imm = N->getSExtValue();
+  return Imm < 0 && isInt<33>(Imm);
+}]>;
+// Bits 10-31 inverted. Same as assembler's %hix.
+def HIX22 : SDNodeXForm<imm, [{
+  uint64_t Val = (~N->getZExtValue() >> 10) & ((1u << 22) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 0-9 with ones in bits 10-31. Same as assembler's %lox.
+def LOX10 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~(~N->getZExtValue() & 0x3ff), MVT::i32);
+}]>;
+def : Pat<(i64 nimm33:$val), (XORri (SETHIi (HIX22 $val)), (LOX10 $val))>,
+      Requires<[Is64Bit]>;
+
+// More possible patterns:
+//
+//   (sllx sethi, n)
+//   (sllx simm13, n)
+//
+// 3 instrs:
+//
+//   (xor (sllx sethi), simm13)
+//   (sllx (xor sethi, simm13))
+//
+// 4 instrs:
+//
+//   (or sethi, (sllx sethi))
+//   (xnor sethi, (sllx sethi))
+//
+// 5 instrs:
+//
+//   (or (sllx sethi), (or sethi, simm13))
+//   (xnor (sllx sethi), (or sethi, simm13))
+//   (or (sllx sethi), (sllx sethi))
+//   (xnor (sllx sethi), (sllx sethi))
+//
+// Worst case is 6 instrs:
+//
+//   (or (sllx (or sethi, simmm13)), (or sethi, simm13))
+
+// Bits 42-63, same as assembler's %hh.
+def HH22 : SDNodeXForm<imm, [{
+  uint64_t Val = (N->getZExtValue() >> 42) & ((1u << 22) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 32-41, same as assembler's %hm.
+def HM10 : SDNodeXForm<imm, [{
+  uint64_t Val = (N->getZExtValue() >> 32) & ((1u << 10) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+def : Pat<(i64 imm:$val),
+          (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i32 32)),
+                (ORri (SETHIi (HI22 $val)), (LO10 $val)))>,
+      Requires<[Is64Bit]>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Integer Arithmetic and Logic.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+// Register-register instructions.
+
+def : Pat<(and i64:$a, i64:$b), (ANDrr $a, $b)>;
+def : Pat<(or  i64:$a, i64:$b), (ORrr  $a, $b)>;
+def : Pat<(xor i64:$a, i64:$b), (XORrr $a, $b)>;
+
+def : Pat<(and i64:$a, (not i64:$b)), (ANDNrr $a, $b)>;
+def : Pat<(or  i64:$a, (not i64:$b)), (ORNrr  $a, $b)>;
+def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>;
+
+def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>;
+def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>;
+
+// Add/sub with carry were renamed to addc/subc in SPARC v9.
+def : Pat<(adde i64:$a, i64:$b), (ADDXrr $a, $b)>;
+def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>;
+
+def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>;
+def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+def : Pat<(SPcmpicc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+// Register-immediate instructions.
+
+def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>;
+def : Pat<(or  i64:$a, (i64 simm13:$b)), (ORri  $a, (as_i32imm $b))>;
+def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>;
+
+def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>;
+def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>;
+
+def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Integer Multiply and Divide.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+def MULXrr : F3_1<2, 0b001001,
+                  (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                  "mulx $rs1, $rs2, $rd",
+                  [(set i64:$rd, (mul i64:$rs1, i64:$rs2))]>;
+def MULXri : F3_2<2, 0b001001,
+                  (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                  "mulx $rs1, $i, $rd",
+                  [(set i64:$rd, (mul i64:$rs1, (i64 simm13:$i)))]>;
+
+// Division can trap.
+let hasSideEffects = 1 in {
+def SDIVXrr : F3_1<2, 0b101101,
+                   (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                   "sdivx $rs1, $rs2, $rd",
+                   [(set i64:$rd, (sdiv i64:$rs1, i64:$rs2))]>;
+def SDIVXri : F3_2<2, 0b101101,
+                   (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                   "sdivx $rs1, $i, $rd",
+                   [(set i64:$rd, (sdiv i64:$rs1, (i64 simm13:$i)))]>;
+
+def UDIVXrr : F3_1<2, 0b001101,
+                   (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                   "udivx $rs1, $rs2, $rd",
+                   [(set i64:$rd, (udiv i64:$rs1, i64:$rs2))]>;
+def UDIVXri : F3_2<2, 0b001101,
+                   (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                   "udivx $rs1, $i, $rd",
+                   [(set i64:$rd, (udiv i64:$rs1, (i64 simm13:$i)))]>;
+} // hasSideEffects = 1
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Loads and Stores.
+//===----------------------------------------------------------------------===//
+//
+// All the 32-bit loads and stores are available. The extending loads are sign
+// or zero-extending to 64 bits. The LDrr and LDri instructions load 32 bits
+// zero-extended to i64. Their mnemonic is lduw in SPARC v9 (Load Unsigned
+// Word).
+//
+// SPARC v9 adds 64-bit loads as well as a sign-extending ldsw i32 loads.
+
+let Predicates = [Is64Bit] in {
+
+// 64-bit loads.
+def LDXrr  : F3_1<3, 0b001011,
+                  (outs I64Regs:$dst), (ins MEMrr:$addr),
+                  "ldx [$addr], $dst",
+                  [(set i64:$dst, (load ADDRrr:$addr))]>;
+def LDXri  : F3_2<3, 0b001011,
+                  (outs I64Regs:$dst), (ins MEMri:$addr),
+                  "ldx [$addr], $dst",
+                  [(set i64:$dst, (load ADDRri:$addr))]>;
+
+// Extending loads to i64.
+def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRrr:$addr)),  (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRri:$addr)),  (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRrr:$addr)),  (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRri:$addr)),  (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRrr:$addr)),  (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRri:$addr)),  (LDri ADDRri:$addr)>;
+
+// Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
+def LDSWrr : F3_1<3, 0b001011,
+                 (outs I64Regs:$dst), (ins MEMrr:$addr),
+                 "ldsw [$addr], $dst",
+                 [(set i64:$dst, (sextloadi32 ADDRrr:$addr))]>;
+def LDSWri : F3_2<3, 0b001011,
+                 (outs I64Regs:$dst), (ins MEMri:$addr),
+                 "ldsw [$addr], $dst",
+                 [(set i64:$dst, (sextloadi32 ADDRri:$addr))]>;
+
+// 64-bit stores.
+def STXrr  : F3_1<3, 0b001110,
+                 (outs), (ins MEMrr:$addr, I64Regs:$src),
+                 "stx $src, [$addr]",
+                 [(store i64:$src, ADDRrr:$addr)]>;
+def STXri  : F3_2<3, 0b001110,
+                 (outs), (ins MEMri:$addr, I64Regs:$src),
+                 "stx $src, [$addr]",
+                 [(store i64:$src, ADDRri:$addr)]>;
+
+// Truncating stores from i64 are identical to the i32 stores.
+def : Pat<(truncstorei8  i64:$src, ADDRrr:$addr), (STBrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei8  i64:$src, ADDRri:$addr), (STBri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRrr:$addr), (STHrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), (STHri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRrr:$addr), (STrr  ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), (STri  ADDRri:$addr, $src)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Conditionals.
+//===----------------------------------------------------------------------===//
+//
+// Flag-setting instructions like subcc and addcc set both icc and xcc flags.
+// The icc flags correspond to the 32-bit result, and the xcc are for the
+// full 64-bit result.
+//
+// We reuse CMPICC SDNodes for compares, but use new BRXCC branch nodes for
+// 64-bit compares. See LowerBR_CC.
+
+let Predicates = [Is64Bit] in {
+
+let Uses = [ICC] in
+def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                     "bp$cc %xcc, $dst",
+                     [(SPbrxcc bb:$dst, imm:$cc)]>;
+
+// Conditional moves on %xcc.
+let Uses = [ICC], Constraints = "$f = $rd" in {
+def MOVXCCrr : Pseudo<(outs IntRegs:$rd),
+                      (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+                      "mov$cond %xcc, $rs2, $rd",
+                      [(set i32:$rd,
+                       (SPselectxcc i32:$rs2, i32:$f, imm:$cond))]>;
+def MOVXCCri : Pseudo<(outs IntRegs:$rd),
+                      (ins i32imm:$i, IntRegs:$f, CCOp:$cond),
+                      "mov$cond %xcc, $i, $rd",
+                      [(set i32:$rd,
+                       (SPselecticc simm11:$i, i32:$f, imm:$cond))]>;
+} // Uses, Constraints
+
+def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond),
+          (MOVXCCrr $t, $f, imm:$cond)>;
+def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond),
+          (MOVXCCri (as_i32imm $t), $f, imm:$cond)>;
+
+} // Predicates = [Is64Bit]
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index dce331228b8f..e7fde085beea 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -111,4 +111,41 @@ class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
   let Inst{4-0}  = rs2;
 }
 
+// Shift by register rs2.
+class F3_Sr<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bit x = xVal;           // 1 for 64-bit shifts.
+  bits<5> rs2;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 0;     // i field = 0
+  let Inst{12}   = x;     // extended registers.
+  let Inst{4-0}  = rs2;
+}
 
+// Shift by immediate.
+class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bit x = xVal;           // 1 for 64-bit shifts.
+  bits<6> shcnt;          // shcnt32 / shcnt64.
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 1;     // i field = 1
+  let Inst{12}   = x;     // extended registers.
+  let Inst{5-0}  = shcnt;
+}
+
+// Define rr and ri shift instructions with patterns.
+multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
+                ValueType VT, RegisterClass RC> {
+  def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, IntRegs:$rs2),
+                 !strconcat(OpcStr, " $rs, $rs2, $rd"),
+                 [(set VT:$rd, (OpNode VT:$rs, i32:$rs2))]>;
+  def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, i32imm:$shcnt),
+                 !strconcat(OpcStr, " $rs, $shcnt, $rd"),
+                 [(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 90b698d507e7..07e07f1db303 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -21,6 +21,12 @@ include "SparcInstrFormats.td"
 // Feature predicates.
 //===----------------------------------------------------------------------===//
 
+// True when generating 32-bit code.
+def Is32Bit : Predicate<"!Subtarget.is64Bit()">;
+
+// True when generating 64-bit code. This also implies HasV9.
+def Is64Bit : Predicate<"Subtarget.is64Bit()">;
+
 // HasV9 - This predicate is true when the target processor supports V9
 // instructions.  Note that the machine may be running in 32-bit mode.
 def HasV9   : Predicate<"Subtarget.isV9()">;
@@ -63,17 +69,17 @@ def SETHIimm : PatLeaf<(imm), [{
 }], HI22>;
 
 // Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
 
 // Address operands
-def MEMrr : Operand<i32> {
+def MEMrr : Operand<iPTR> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
+  let MIOperandInfo = (ops ptr_rc, ptr_rc);
 }
-def MEMri : Operand<i32> {
+def MEMri : Operand<iPTR> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, i32imm);
+  let MIOperandInfo = (ops ptr_rc, i32imm);
 }
 
 // Branch targets have OtherVT type.
@@ -98,6 +104,7 @@ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
 def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
 def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
 def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrxcc : SDNode<"SPISD::BRXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 
 def SPhi    : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
@@ -107,6 +114,7 @@ def SPftoi  : SDNode<"SPISD::FTOI", SDTSPFTOI>;
 def SPitof  : SDNode<"SPISD::ITOF", SDTSPITOF>;
 
 def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
 def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
 
 //  These are target-independent nodes, but have target-specific formats.
@@ -182,11 +190,11 @@ multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
   def rr  : F3_1<2, Op3Val, 
                  (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                  !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+                 [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
   def ri  : F3_2<2, Op3Val,
                  (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                  !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+                 [(set i32:$dst, (OpNode i32:$b, (i32 simm13:$c)))]>;
 }
 
 /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
@@ -243,10 +251,10 @@ let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
                       "!FpMOVD $src, $dst", []>;
   def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
                       "!FpNEGD $src, $dst",
-                      [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+                      [(set f64:$dst, (fneg f64:$src))]>;
   def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
                       "!FpABSD $src, $dst",
-                      [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+                      [(set f64:$dst, (fabs f64:$src))]>;
 }
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
@@ -257,19 +265,16 @@ let Uses = [ICC], usesCustomInserter = 1 in {
   def SELECT_CC_Int_ICC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_ICC PSEUDO!",
-            [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
-                                             imm:$Cond))]>;
+            [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>;
   def SELECT_CC_FP_ICC
    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_FP_ICC PSEUDO!",
-            [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
-                                            imm:$Cond))]>;
+            [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>;
 
   def SELECT_CC_DFP_ICC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_ICC PSEUDO!",
-            [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
-                                             imm:$Cond))]>;
+            [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>;
 }
 
 let usesCustomInserter = 1, Uses = [FCC] in {
@@ -277,19 +282,16 @@ let usesCustomInserter = 1, Uses = [FCC] in {
   def SELECT_CC_Int_FCC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_FCC PSEUDO!",
-            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
-                                             imm:$Cond))]>;
+            [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>;
 
   def SELECT_CC_FP_FCC
    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_FP_FCC PSEUDO!",
-            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
-                                            imm:$Cond))]>;
+            [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>;
   def SELECT_CC_DFP_FCC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_FCC PSEUDO!",
-            [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
-                                             imm:$Cond))]>;
+            [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>;
 }
 
 
@@ -309,111 +311,111 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
 def LDSBrr : F3_1<3, 0b001001,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldsb [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>;
 def LDSBri : F3_2<3, 0b001001,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldsb [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+                  [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>;
 def LDSHrr : F3_1<3, 0b001010,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldsh [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>;
 def LDSHri : F3_2<3, 0b001010,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldsh [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+                  [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>;
 def LDUBrr : F3_1<3, 0b000001,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldub [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>;
 def LDUBri : F3_2<3, 0b000001,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldub [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+                  [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>;
 def LDUHrr : F3_1<3, 0b000010,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "lduh [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>;
 def LDUHri : F3_2<3, 0b000010,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "lduh [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+                  [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>;
 def LDrr   : F3_1<3, 0b000000,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ld [$addr], $dst",
-                  [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set i32:$dst, (load ADDRrr:$addr))]>;
 def LDri   : F3_2<3, 0b000000,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ld [$addr], $dst",
-                  [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set i32:$dst, (load ADDRri:$addr))]>;
 
 // Section B.2 - Load Floating-point Instructions, p. 92
 def LDFrr  : F3_1<3, 0b100000,
                   (outs FPRegs:$dst), (ins MEMrr:$addr),
                   "ld [$addr], $dst",
-                  [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set f32:$dst, (load ADDRrr:$addr))]>;
 def LDFri  : F3_2<3, 0b100000,
                   (outs FPRegs:$dst), (ins MEMri:$addr),
                   "ld [$addr], $dst",
-                  [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set f32:$dst, (load ADDRri:$addr))]>;
 def LDDFrr : F3_1<3, 0b100011,
                   (outs DFPRegs:$dst), (ins MEMrr:$addr),
                   "ldd [$addr], $dst",
-                  [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set f64:$dst, (load ADDRrr:$addr))]>;
 def LDDFri : F3_2<3, 0b100011,
                   (outs DFPRegs:$dst), (ins MEMri:$addr),
                   "ldd [$addr], $dst",
-                  [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set f64:$dst, (load ADDRri:$addr))]>;
 
 // Section B.4 - Store Integer Instructions, p. 95
 def STBrr : F3_1<3, 0b000101,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "stb $src, [$addr]",
-                 [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+                 [(truncstorei8 i32:$src, ADDRrr:$addr)]>;
 def STBri : F3_2<3, 0b000101,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "stb $src, [$addr]",
-                 [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+                 [(truncstorei8 i32:$src, ADDRri:$addr)]>;
 def STHrr : F3_1<3, 0b000110,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "sth $src, [$addr]",
-                 [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+                 [(truncstorei16 i32:$src, ADDRrr:$addr)]>;
 def STHri : F3_2<3, 0b000110,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "sth $src, [$addr]",
-                 [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+                 [(truncstorei16 i32:$src, ADDRri:$addr)]>;
 def STrr  : F3_1<3, 0b000100,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "st $src, [$addr]",
-                 [(store IntRegs:$src, ADDRrr:$addr)]>;
+                 [(store i32:$src, ADDRrr:$addr)]>;
 def STri  : F3_2<3, 0b000100,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "st $src, [$addr]",
-                 [(store IntRegs:$src, ADDRri:$addr)]>;
+                 [(store i32:$src, ADDRri:$addr)]>;
 
 // Section B.5 - Store Floating-point Instructions, p. 97
 def STFrr   : F3_1<3, 0b100100,
                    (outs), (ins MEMrr:$addr, FPRegs:$src),
                    "st $src, [$addr]",
-                   [(store FPRegs:$src, ADDRrr:$addr)]>;
+                   [(store f32:$src, ADDRrr:$addr)]>;
 def STFri   : F3_2<3, 0b100100,
                    (outs), (ins MEMri:$addr, FPRegs:$src),
                    "st $src, [$addr]",
-                   [(store FPRegs:$src, ADDRri:$addr)]>;
+                   [(store f32:$src, ADDRri:$addr)]>;
 def STDFrr  : F3_1<3, 0b100111,
                    (outs), (ins MEMrr:$addr, DFPRegs:$src),
                    "std  $src, [$addr]",
-                   [(store DFPRegs:$src, ADDRrr:$addr)]>;
+                   [(store f64:$src, ADDRrr:$addr)]>;
 def STDFri  : F3_2<3, 0b100111,
                    (outs), (ins MEMri:$addr, DFPRegs:$src),
                    "std $src, [$addr]",
-                   [(store DFPRegs:$src, ADDRri:$addr)]>;
+                   [(store f64:$src, ADDRri:$addr)]>;
 
 // Section B.9 - SETHI Instruction, p. 104
 def SETHIi: F2_1<0b100,
                  (outs IntRegs:$dst), (ins i32imm:$src),
                  "sethi $src, $dst",
-                 [(set IntRegs:$dst, SETHIimm:$src)]>;
+                 [(set i32:$dst, SETHIimm:$src)]>;
 
 // Section B.10 - NOP Instruction, p. 105
 // (It's a special case of SETHI)
@@ -426,7 +428,7 @@ defm AND    : F3_12<"and", 0b000001, and>;
 def ANDNrr  : F3_1<2, 0b000101,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "andn $b, $c, $dst",
-                   [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+                   [(set i32:$dst, (and i32:$b, (not i32:$c)))]>;
 def ANDNri  : F3_2<2, 0b000101,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "andn $b, $c, $dst", []>;
@@ -436,7 +438,7 @@ defm OR     : F3_12<"or", 0b000010, or>;
 def ORNrr   : F3_1<2, 0b000110,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "orn $b, $c, $dst",
-                   [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+                   [(set i32:$dst, (or i32:$b, (not i32:$c)))]>;
 def ORNri   : F3_2<2, 0b000110,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "orn $b, $c, $dst", []>;
@@ -445,7 +447,7 @@ defm XOR    : F3_12<"xor", 0b000011, xor>;
 def XNORrr  : F3_1<2, 0b000111,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "xnor $b, $c, $dst",
-                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+                   [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
 def XNORri  : F3_2<2, 0b000111,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "xnor $b, $c, $dst", []>;
@@ -462,7 +464,7 @@ defm ADD   : F3_12<"add", 0b000000, add>;
 def LEA_ADDri   : F3_2<2, 0b000000,
                    (outs IntRegs:$dst), (ins MEMri:$addr),
                    "add ${addr:arith}, $dst",
-                   [(set IntRegs:$dst, ADDRri:$addr)]>;
+                   [(set i32:$dst, ADDRri:$addr)]>;
 
 let Defs = [ICC] in                   
   defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
@@ -603,11 +605,11 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010,
 def FSTOD : F3_3<2, 0b110100, 0b011001001, 
                  (outs DFPRegs:$dst), (ins FPRegs:$src),
                  "fstod $src, $dst",
-                 [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+                 [(set f64:$dst, (fextend f32:$src))]>;
 def FDTOS : F3_3<2, 0b110100, 0b011000110,
                  (outs FPRegs:$dst), (ins DFPRegs:$src),
                  "fdtos $src, $dst",
-                 [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+                 [(set f32:$dst, (fround f64:$src))]>;
 
 // Floating-point Move Instructions, p. 144
 def FMOVS : F3_3<2, 0b110100, 0b000000001,
@@ -616,22 +618,22 @@ def FMOVS : F3_3<2, 0b110100, 0b000000001,
 def FNEGS : F3_3<2, 0b110100, 0b000000101, 
                  (outs FPRegs:$dst), (ins FPRegs:$src),
                  "fnegs $src, $dst",
-                 [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+                 [(set f32:$dst, (fneg f32:$src))]>;
 def FABSS : F3_3<2, 0b110100, 0b000001001, 
                  (outs FPRegs:$dst), (ins FPRegs:$src),
                  "fabss $src, $dst",
-                 [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+                 [(set f32:$dst, (fabs f32:$src))]>;
 
 
 // Floating-point Square Root Instructions, p.145
 def FSQRTS : F3_3<2, 0b110100, 0b000101001, 
                   (outs FPRegs:$dst), (ins FPRegs:$src),
                   "fsqrts $src, $dst",
-                  [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+                  [(set f32:$dst, (fsqrt f32:$src))]>;
 def FSQRTD : F3_3<2, 0b110100, 0b000101010, 
                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
                   "fsqrtd $src, $dst",
-                  [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+                  [(set f64:$dst, (fsqrt f64:$src))]>;
 
 
 
@@ -639,42 +641,42 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010,
 def FADDS  : F3_3<2, 0b110100, 0b001000001,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fadds $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>;
 def FADDD  : F3_3<2, 0b110100, 0b001000010,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "faddd $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>;
 def FSUBS  : F3_3<2, 0b110100, 0b001000101,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fsubs $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>;
 def FSUBD  : F3_3<2, 0b110100, 0b001000110,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "fsubd $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>;
 
 // Floating-point Multiply and Divide Instructions, p. 147
 def FMULS  : F3_3<2, 0b110100, 0b001001001,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fmuls $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>;
 def FMULD  : F3_3<2, 0b110100, 0b001001010,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "fmuld $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>;
 def FSMULD : F3_3<2, 0b110100, 0b001101001,
                   (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fsmuld $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
-                                            (fextend FPRegs:$src2)))]>;
+                  [(set f64:$dst, (fmul (fextend f32:$src1),
+                                        (fextend f32:$src2)))]>;
 def FDIVS  : F3_3<2, 0b110100, 0b001001101,
                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                  "fdivs $src1, $src2, $dst",
-                 [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+                 [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>;
 def FDIVD  : F3_3<2, 0b110100, 0b001001110,
                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                  "fdivd $src1, $src2, $dst",
-                 [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+                 [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>;
 
 // Floating-point Compare Instructions, p. 148
 // Note: the 2nd template arg is different for these guys.
@@ -685,11 +687,11 @@ let Defs = [FCC] in {
   def FCMPS  : F3_3<2, 0b110101, 0b001010001,
                    (outs), (ins FPRegs:$src1, FPRegs:$src2),
                    "fcmps $src1, $src2\n\tnop",
-                   [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+                   [(SPcmpfcc f32:$src1, f32:$src2)]>;
   def FCMPD  : F3_3<2, 0b110101, 0b001010010,
                    (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
                    "fcmpd $src1, $src2\n\tnop",
-                   [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+                   [(SPcmpfcc f64:$src1, f64:$src2)]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -704,52 +706,45 @@ let Predicates = [HasV9], Constraints = "$T = $dst" in {
     def MOVICCrr
       : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
                "mov$cc %icc, $F, $dst",
-               [(set IntRegs:$dst,
-                           (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+               [(set i32:$dst, (SPselecticc i32:$F, i32:$T, imm:$cc))]>;
     def MOVICCri
       : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
                "mov$cc %icc, $F, $dst",
-               [(set IntRegs:$dst,
-                            (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+               [(set i32:$dst, (SPselecticc simm11:$F, i32:$T, imm:$cc))]>;
   }
 
   let Uses = [FCC] in {
     def MOVFCCrr
       : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
                "mov$cc %fcc0, $F, $dst",
-               [(set IntRegs:$dst,
-                           (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+               [(set i32:$dst, (SPselectfcc i32:$F, i32:$T, imm:$cc))]>;
     def MOVFCCri
       : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
                "mov$cc %fcc0, $F, $dst",
-               [(set IntRegs:$dst,
-                            (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+               [(set i32:$dst, (SPselectfcc simm11:$F, i32:$T, imm:$cc))]>;
   }
 
   let Uses = [ICC] in {
     def FMOVS_ICC
       : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
                "fmovs$cc %icc, $F, $dst",
-               [(set FPRegs:$dst,
-                           (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+               [(set f32:$dst,
+                           (SPselecticc f32:$F, f32:$T, imm:$cc))]>;
     def FMOVD_ICC
       : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
                "fmovd$cc %icc, $F, $dst",
-               [(set DFPRegs:$dst,
-                           (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+               [(set f64:$dst, (SPselecticc f64:$F, f64:$T, imm:$cc))]>;
   }
 
   let Uses = [FCC] in {
     def FMOVS_FCC
       : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
                "fmovs$cc %fcc0, $F, $dst",
-               [(set FPRegs:$dst,
-                           (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+               [(set f32:$dst, (SPselectfcc f32:$F, f32:$T, imm:$cc))]>;
     def FMOVD_FCC
       : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
                "fmovd$cc %fcc0, $F, $dst",
-               [(set DFPRegs:$dst,
-                           (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+               [(set f64:$dst, (SPselectfcc f64:$F, f64:$T, imm:$cc))]>;
   }
 
 }
@@ -762,11 +757,11 @@ let Predicates = [HasV9] in {
   def FNEGD : F3_3<2, 0b110100, 0b000000110, 
                    (outs DFPRegs:$dst), (ins DFPRegs:$src),
                    "fnegd $src, $dst",
-                   [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+                   [(set f64:$dst, (fneg f64:$src))]>;
   def FABSD : F3_3<2, 0b110100, 0b000001010, 
                    (outs DFPRegs:$dst), (ins DFPRegs:$src),
                    "fabsd $src, $dst",
-                   [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+                   [(set f64:$dst, (fabs f64:$src))]>;
 }
 
 // POPCrr - This does a ctpop of a 64-bit register.  As such, we have to clear
@@ -774,8 +769,8 @@ let Predicates = [HasV9] in {
 def POPCrr : F3_1<2, 0b101110, 
                   (outs IntRegs:$dst), (ins IntRegs:$src),
                   "popc $src, $dst", []>, Requires<[HasV9]>;
-def : Pat<(ctpop IntRegs:$src),
-          (POPCrr (SLLri IntRegs:$src, 0))>;
+def : Pat<(ctpop i32:$src),
+          (POPCrr (SLLri $src, 0))>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
@@ -783,28 +778,26 @@ def : Pat<(ctpop IntRegs:$src),
 
 // Small immediates.
 def : Pat<(i32 simm13:$val),
-          (ORri G0, imm:$val)>;
+          (ORri (i32 G0), imm:$val)>;
 // Arbitrary immediates.
 def : Pat<(i32 imm:$val),
           (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
 
 // subc
-def : Pat<(subc IntRegs:$b, IntRegs:$c),
-          (SUBCCrr IntRegs:$b, IntRegs:$c)>;
-def : Pat<(subc IntRegs:$b, simm13:$val),
-          (SUBCCri IntRegs:$b, imm:$val)>;
+def : Pat<(subc i32:$b, i32:$c),
+          (SUBCCrr $b, $c)>;
+def : Pat<(subc i32:$b, simm13:$val),
+          (SUBCCri $b, imm:$val)>;
 
 // Global addresses, constant pool entries
 def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
-def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>;
 def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
-def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
 
 // Add reg, lo.  This is used when taking the addr of a global/constpool entry.
-def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
-          (ADDri IntRegs:$r, tglobaladdr:$in)>;
-def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
-          (ADDri IntRegs:$r, tconstpool:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)),  (ADDri $r, tconstpool:$in)>;
 
 // Calls: 
 def : Pat<(call tglobaladdr:$dst),
@@ -823,3 +816,5 @@ def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
 // zextload bool -> zextload byte
 def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
 def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+
+include "SparcInstr64Bit.td"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 25e90b7af57c..3af4c614cd44 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -56,6 +56,12 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+const TargetRegisterClass*
+SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+                                      unsigned Kind) const {
+  return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
+}
+
 void
 SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                        int SPAdj, unsigned FIOperandNum,
@@ -68,8 +74,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Addressable stack objects are accessed using neg. offsets from %fp
   MachineFunction &MF = *MI.getParent()->getParent();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MI.getOperand(FIOperandNum + 1).getImm();
+  int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+                   MI.getOperand(FIOperandNum + 1).getImm() +
+                   Subtarget.getStackPointerBias();
 
   // Replace frame index with a frame pointer reference.
   if (Offset >= -4096 && Offset <= 4095) {
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index b53a1ed095ab..f91df5398953 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -36,6 +36,9 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
+  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+                                                unsigned Kind) const;
+
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = NULL) const;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 81bff6c51c9d..497e7c5d5612 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -43,7 +43,7 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
 }
 
 // Control Registers
-def ICC : SparcCtrlReg<"ICC">;
+def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code.
 def FCC : SparcCtrlReg<"FCC">;
 
 // Y register
@@ -140,7 +140,10 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
 // FIXME: the register order should be defined in terms of the preferred
 // allocation order...
 //
-def IntRegs : RegisterClass<"SP", [i32], 32,
+// This register class should not be used to hold i64 values, use the I64Regs
+// register class for that. The i64 type is included here to allow i64 patterns
+// using the integer instructions.
+def IntRegs : RegisterClass<"SP", [i32, i64], 32,
                             (add L0, L1, L2, L3, L4, L5, L6,
                                  L7, I0, I1, I2, I3, I4, I5,
                                  O0, O1, O2, O3, O4, O5, O7,
@@ -155,6 +158,13 @@ def IntRegs : RegisterClass<"SP", [i32], 32,
                                  G5, G6, G7 // reserved for kernel
                                  )>;
 
+// Register class for 64-bit mode, with a 64-bit spill slot size.
+// These are the same as the 32-bit registers, so TableGen will consider this
+// to be a sub-class of IntRegs. That works out because requiring a 64-bit
+// spill slot is a stricter constraint than only requiring a 32-bit spill slot.
+def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>;
+
+// Floating point register classes.
 def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
 
 def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index a81931b34aa2..b94dd110ea9f 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -52,6 +52,12 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
     }
     return std::string(p);
   }
+
+  /// The 64-bit ABI uses biased stack and frame pointers, so the stack frame
+  /// of the current function is the area from [%sp+BIAS] to [%fp+BIAS].
+  int64_t getStackPointerBias() const {
+    return is64Bit() ? 2047 : 0;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 79f74bd66127..11a5d7a68450 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -149,8 +149,8 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
   return wrap(unwrap(T)->getDataLayout());
 }
 
-LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
-  char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
+  formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
   TargetMachine* TM = unwrap(T);
   Module* Mod = unwrap(M);
 
@@ -176,14 +176,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
       ft = TargetMachine::CGFT_ObjectFile;
       break;
   }
-  raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
-  formatted_raw_ostream destf(dest);
-  if (!error.empty()) {
-    *ErrorMessage = strdup(error.c_str());
-    return true;
-  }
-
-  if (TM->addPassesToEmitFile(pass, destf, ft)) {
+  if (TM->addPassesToEmitFile(pass, OS, ft)) {
     error = "TargetMachine can't emit a file of this type";
     *ErrorMessage = strdup(error.c_str());
     return true;
@@ -191,7 +184,35 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
 
   pass.run(*Mod);
 
-  destf.flush();
-  dest.flush();
+  OS.flush();
   return false;
 }
+
+LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
+  char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+  std::string error;
+  raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
+  formatted_raw_ostream destf(dest);
+  if (!error.empty()) {
+    *ErrorMessage = strdup(error.c_str());
+    return true;
+  }
+  bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
+  dest.flush();
+  return Result;
+}
+
+LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
+  LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage,
+  LLVMMemoryBufferRef *OutMemBuf) {
+  std::string CodeString;
+  raw_string_ostream OStream(CodeString);
+  formatted_raw_ostream Out(OStream);
+  bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage);
+  OStream.flush();
+
+  std::string &Data = OStream.str();
+  *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(),
+                                                     Data.length(), "");
+  return Result;
+}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 4ed5534a62ef..395fd97f96aa 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
@@ -32,11 +33,440 @@ using namespace llvm;
 namespace {
 struct X86Operand;
 
+static const char OpPrecedence[] = {
+  0, // IC_PLUS
+  0, // IC_MINUS
+  1, // IC_MULTIPLY
+  1, // IC_DIVIDE
+  2, // IC_RPAREN
+  3, // IC_LPAREN
+  0, // IC_IMM
+  0  // IC_REGISTER
+};
+
 class X86AsmParser : public MCTargetAsmParser {
   MCSubtargetInfo &STI;
   MCAsmParser &Parser;
   ParseInstructionInfo *InstInfo;
 private:
+  enum InfixCalculatorTok {
+    IC_PLUS = 0,
+    IC_MINUS,
+    IC_MULTIPLY,
+    IC_DIVIDE,
+    IC_RPAREN,
+    IC_LPAREN,
+    IC_IMM,
+    IC_REGISTER
+  };
+
+  class InfixCalculator {
+    typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
+    SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
+    SmallVector<ICToken, 4> PostfixStack;
+    
+  public:
+    int64_t popOperand() {
+      assert (!PostfixStack.empty() && "Poped an empty stack!");
+      ICToken Op = PostfixStack.pop_back_val();
+      assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
+              && "Expected and immediate or register!");
+      return Op.second;
+    }
+    void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
+      assert ((Op == IC_IMM || Op == IC_REGISTER) &&
+              "Unexpected operand!");
+      PostfixStack.push_back(std::make_pair(Op, Val));
+    }
+    
+    void popOperator() { InfixOperatorStack.pop_back_val(); }
+    void pushOperator(InfixCalculatorTok Op) {
+      // Push the new operator if the stack is empty.
+      if (InfixOperatorStack.empty()) {
+        InfixOperatorStack.push_back(Op);
+        return;
+      }
+      
+      // Push the new operator if it has a higher precedence than the operator
+      // on the top of the stack or the operator on the top of the stack is a
+      // left parentheses.
+      unsigned Idx = InfixOperatorStack.size() - 1;
+      InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
+      if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
+        InfixOperatorStack.push_back(Op);
+        return;
+      }
+      
+      // The operator on the top of the stack has higher precedence than the
+      // new operator.
+      unsigned ParenCount = 0;
+      while (1) {
+        // Nothing to process.
+        if (InfixOperatorStack.empty())
+          break;
+        
+        Idx = InfixOperatorStack.size() - 1;
+        StackOp = InfixOperatorStack[Idx];
+        if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
+          break;
+        
+        // If we have an even parentheses count and we see a left parentheses,
+        // then stop processing.
+        if (!ParenCount && StackOp == IC_LPAREN)
+          break;
+        
+        if (StackOp == IC_RPAREN) {
+          ++ParenCount;
+          InfixOperatorStack.pop_back_val();
+        } else if (StackOp == IC_LPAREN) {
+          --ParenCount;
+          InfixOperatorStack.pop_back_val();
+        } else {
+          InfixOperatorStack.pop_back_val();
+          PostfixStack.push_back(std::make_pair(StackOp, 0));
+        }
+      }
+      // Push the new operator.
+      InfixOperatorStack.push_back(Op);
+    }
+    int64_t execute() {
+      // Push any remaining operators onto the postfix stack.
+      while (!InfixOperatorStack.empty()) {
+        InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
+        if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
+          PostfixStack.push_back(std::make_pair(StackOp, 0));
+      }
+      
+      if (PostfixStack.empty())
+        return 0;
+      
+      SmallVector<ICToken, 16> OperandStack;
+      for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
+        ICToken Op = PostfixStack[i];
+        if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
+          OperandStack.push_back(Op);
+        } else {
+          assert (OperandStack.size() > 1 && "Too few operands.");
+          int64_t Val;
+          ICToken Op2 = OperandStack.pop_back_val();
+          ICToken Op1 = OperandStack.pop_back_val();
+          switch (Op.first) {
+          default:
+            report_fatal_error("Unexpected operator!");
+            break;
+          case IC_PLUS:
+            Val = Op1.second + Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_MINUS:
+            Val = Op1.second - Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_MULTIPLY:
+            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+                    "Multiply operation with an immediate and a register!");
+            Val = Op1.second * Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_DIVIDE:
+            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+                    "Divide operation with an immediate and a register!");
+            assert (Op2.second != 0 && "Division by zero!");
+            Val = Op1.second / Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          }
+        }
+      }
+      assert (OperandStack.size() == 1 && "Expected a single result.");
+      return OperandStack.pop_back_val().second;
+    }
+  };
+
+  enum IntelExprState {
+    IES_PLUS,
+    IES_MINUS,
+    IES_MULTIPLY,
+    IES_DIVIDE,
+    IES_LBRAC,
+    IES_RBRAC,
+    IES_LPAREN,
+    IES_RPAREN,
+    IES_REGISTER,
+    IES_INTEGER,
+    IES_IDENTIFIER,
+    IES_ERROR
+  };
+
+  class IntelExprStateMachine {
+    IntelExprState State, PrevState;
+    unsigned BaseReg, IndexReg, TmpReg, Scale;
+    int64_t Imm;
+    const MCExpr *Sym;
+    StringRef SymName;
+    bool StopOnLBrac, AddImmPrefix;
+    InfixCalculator IC;
+  public:
+    IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
+      State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
+      Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
+      AddImmPrefix(addimmprefix) {}
+    
+    unsigned getBaseReg() { return BaseReg; }
+    unsigned getIndexReg() { return IndexReg; }
+    unsigned getScale() { return Scale; }
+    const MCExpr *getSym() { return Sym; }
+    StringRef getSymName() { return SymName; }
+    int64_t getImm() { return Imm + IC.execute(); }
+    bool isValidEndState() { return State == IES_RBRAC; }
+    bool getStopOnLBrac() { return StopOnLBrac; }
+    bool getAddImmPrefix() { return AddImmPrefix; }
+    bool hadError() { return State == IES_ERROR; }
+
+    void onPlus() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_RPAREN:
+      case IES_REGISTER:
+        State = IES_PLUS;
+        IC.pushOperator(IC_PLUS);
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onMinus() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MULTIPLY:
+      case IES_DIVIDE:
+      case IES_LPAREN:
+      case IES_RPAREN:
+      case IES_LBRAC:
+      case IES_RBRAC:
+      case IES_INTEGER:
+      case IES_REGISTER:
+        State = IES_MINUS;
+        // Only push the minus operator if it is not a unary operator.
+        if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
+              CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
+              CurrState == IES_LPAREN || CurrState == IES_LBRAC))
+          IC.pushOperator(IC_MINUS);
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onRegister(unsigned Reg) {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_LPAREN:
+        State = IES_REGISTER;
+        TmpReg = Reg;
+        IC.pushOperand(IC_REGISTER);
+        break;
+      case IES_MULTIPLY:
+        // Index Register - Scale * Register
+        if (PrevState == IES_INTEGER) {
+          assert (!IndexReg && "IndexReg already set!");
+          State = IES_REGISTER;
+          IndexReg = Reg;
+          // Get the scale and replace the 'Scale * Register' with '0'.
+          Scale = IC.popOperand();
+          IC.pushOperand(IC_IMM);
+          IC.popOperator();
+        } else {
+          State = IES_ERROR;
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+        State = IES_INTEGER;
+        Sym = SymRef;
+        SymName = SymRefName;
+        IC.pushOperand(IC_IMM);
+        break;
+      }
+    }
+    void onInteger(int64_t TmpInt) {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+      case IES_DIVIDE:
+      case IES_MULTIPLY:
+      case IES_LPAREN:
+        State = IES_INTEGER;
+        if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
+          // Index Register - Register * Scale
+          assert (!IndexReg && "IndexReg already set!");
+          IndexReg = TmpReg;
+          Scale = TmpInt;
+          // Get the scale and replace the 'Register * Scale' with '0'.
+          IC.popOperator();
+        } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+                    PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+                    PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+                   CurrState == IES_MINUS) {
+          // Unary minus.  No need to pop the minus operand because it was never
+          // pushed.
+          IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
+        } else {
+          IC.pushOperand(IC_IMM, TmpInt);
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onStar() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_MULTIPLY;
+        IC.pushOperator(IC_MULTIPLY);
+        break;
+      }
+    }
+    void onDivide() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_RPAREN:
+        State = IES_DIVIDE;
+        IC.pushOperator(IC_DIVIDE);
+        break;
+      }
+    }
+    void onLBrac() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_RBRAC:
+        State = IES_PLUS;
+        IC.pushOperator(IC_PLUS);
+        break;
+      }
+    }
+    void onRBrac() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_RBRAC;
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onLParen() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+      case IES_MULTIPLY:
+      case IES_DIVIDE:
+      case IES_LPAREN:
+        // FIXME: We don't handle this type of unary minus, yet.
+        if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+            PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+            PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+            CurrState == IES_MINUS) {
+          State = IES_ERROR;
+          break;
+        }
+        State = IES_LPAREN;
+        IC.pushOperator(IC_LPAREN);
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onRParen() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_RPAREN;
+        IC.pushOperator(IC_RPAREN);
+        break;
+      }
+    }
+  };
+
   MCAsmParser &getParser() const { return Parser; }
 
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -56,14 +486,22 @@ class X86AsmParser : public MCTargetAsmParser {
   X86Operand *ParseOperand();
   X86Operand *ParseATTOperand();
   X86Operand *ParseIntelOperand();
-  X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
-  X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
-  X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
-  X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
+  X86Operand *ParseIntelOffsetOfOperator();
+  X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
+  X86Operand *ParseIntelOperator(unsigned OpKind);
+  X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp,
+                                   SMLoc StartLoc);
+  X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
+  X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+                                       int64_t ImmDisp, unsigned Size);
+  X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
+                                   SMLoc &End);
   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
 
-  bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
-                             SmallString<64> &Err);
+  X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
+                                    unsigned BaseReg, unsigned IndexReg,
+                                    unsigned Scale, SMLoc Start, SMLoc End,
+                                    unsigned Size, StringRef SymName);
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
@@ -93,6 +531,10 @@ class X86AsmParser : public MCTargetAsmParser {
     setAvailableFeatures(FB);
   }
 
+  bool isParsingIntelSyntax() {
+    return getParser().getAssemblerDialect();
+  }
+
   /// @name Auto-generated Matcher Functions
   /// {
 
@@ -115,10 +557,6 @@ class X86AsmParser : public MCTargetAsmParser {
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   virtual bool ParseDirective(AsmToken DirectiveID);
-
-  bool isParsingIntelSyntax() {
-    return getParser().getAssemblerDialect();
-  }
 };
 } // end anonymous namespace
 
@@ -168,6 +606,7 @@ struct X86Operand : public MCParsedAsmOperand {
 
   SMLoc StartLoc, EndLoc;
   SMLoc OffsetOfLoc;
+  StringRef SymName;
   bool AddressOf;
 
   struct TokOp {
@@ -181,7 +620,6 @@ struct X86Operand : public MCParsedAsmOperand {
 
   struct ImmOp {
     const MCExpr *Val;
-    bool NeedAsmRewrite;
   };
 
   struct MemOp {
@@ -191,7 +629,6 @@ struct X86Operand : public MCParsedAsmOperand {
     unsigned IndexReg;
     unsigned Scale;
     unsigned Size;
-    bool NeedSizeDir;
   };
 
   union {
@@ -204,6 +641,8 @@ struct X86Operand : public MCParsedAsmOperand {
   X86Operand(KindTy K, SMLoc Start, SMLoc End)
     : Kind(K), StartLoc(Start), EndLoc(End) {}
 
+  StringRef getSymName() { return SymName; }
+
   /// getStartLoc - Get the location of the first token of this operand.
   SMLoc getStartLoc() const { return StartLoc; }
   /// getEndLoc - Get the location of the last token of this operand.
@@ -236,11 +675,6 @@ struct X86Operand : public MCParsedAsmOperand {
     return Imm.Val;
   }
 
-  bool needAsmRewrite() const {
-    assert(Kind == Immediate && "Invalid access!");
-    return Imm.NeedAsmRewrite;
-  }
-
   const MCExpr *getMemDisp() const {
     assert(Kind == Memory && "Invalid access!");
     return Mem.Disp;
@@ -337,11 +771,6 @@ struct X86Operand : public MCParsedAsmOperand {
     return isImmSExti64i32Value(CE->getValue());
   }
 
-  unsigned getMemSize() const {
-    assert(Kind == Memory && "Invalid access!");
-    return Mem.Size;
-  }
-
   bool isOffsetOf() const {
     return OffsetOfLoc.getPointer();
   }
@@ -350,11 +779,6 @@ struct X86Operand : public MCParsedAsmOperand {
     return AddressOf;
   }
 
-  bool needSizeDirective() const {
-    assert(Kind == Memory && "Invalid access!");
-    return Mem.NeedSizeDir;
-  }
-
   bool isMem() const { return Kind == Memory; }
   bool isMem8() const {
     return Kind == Memory && (!Mem.Size || Mem.Size == 8);
@@ -482,25 +906,26 @@ struct X86Operand : public MCParsedAsmOperand {
 
   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
                                bool AddressOf = false,
-                               SMLoc OffsetOfLoc = SMLoc()) {
+                               SMLoc OffsetOfLoc = SMLoc(),
+                               StringRef SymName = StringRef()) {
     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
     Res->Reg.RegNo = RegNo;
     Res->AddressOf = AddressOf;
     Res->OffsetOfLoc = OffsetOfLoc;
+    Res->SymName = SymName;
     return Res;
   }
 
-  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
-                               bool NeedRewrite = true){
+  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
     Res->Imm.Val = Val;
-    Res->Imm.NeedAsmRewrite = NeedRewrite;
     return Res;
   }
 
   /// Create an absolute memory operand.
   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
-                               unsigned Size = 0, bool NeedSizeDir = false) {
+                               unsigned Size = 0,
+                               StringRef SymName = StringRef()) {
     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
     Res->Mem.SegReg   = 0;
     Res->Mem.Disp     = Disp;
@@ -508,7 +933,7 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.IndexReg = 0;
     Res->Mem.Scale    = 1;
     Res->Mem.Size     = Size;
-    Res->Mem.NeedSizeDir = NeedSizeDir;
+    Res->SymName = SymName;
     Res->AddressOf = false;
     return Res;
   }
@@ -517,7 +942,8 @@ struct X86Operand : public MCParsedAsmOperand {
   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
                                unsigned BaseReg, unsigned IndexReg,
                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
-                               unsigned Size = 0, bool NeedSizeDir = false) {
+                               unsigned Size = 0,
+                               StringRef SymName = StringRef()) {
     // We should never just have a displacement, that should be parsed as an
     // absolute memory operand.
     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -532,7 +958,7 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.IndexReg = IndexReg;
     Res->Mem.Scale    = Scale;
     Res->Mem.Size     = Size;
-    Res->Mem.NeedSizeDir = NeedSizeDir;
+    Res->SymName = SymName;
     Res->AddressOf = false;
     return Res;
   }
@@ -689,251 +1115,114 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
   return Size;
 }
 
-enum IntelBracExprState {
-  IBES_START,
-  IBES_LBRAC,
-  IBES_RBRAC,
-  IBES_REGISTER,
-  IBES_REGISTER_STAR,
-  IBES_REGISTER_STAR_INTEGER,
-  IBES_INTEGER,
-  IBES_INTEGER_STAR,
-  IBES_INDEX_REGISTER,
-  IBES_IDENTIFIER,
-  IBES_DISP_EXPR,
-  IBES_MINUS,
-  IBES_ERROR
-};
-
-class IntelBracExprStateMachine {
-  IntelBracExprState State;
-  unsigned BaseReg, IndexReg, Scale;
-  int64_t Disp;
+X86Operand *
+X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
+                                    unsigned BaseReg, unsigned IndexReg,
+                                    unsigned Scale, SMLoc Start, SMLoc End,
+                                    unsigned Size, StringRef SymName) {
+  bool NeedSizeDir = false;
+  if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+    const MCSymbol &Sym = SymRef->getSymbol();
+    // FIXME: The SemaLookup will fail if the name is anything other then an
+    // identifier.
+    // FIXME: Pass a valid SMLoc.
+    bool IsVarDecl = false;
+    unsigned tLength, tSize, tType;
+    SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, tSize,
+                                            tType, IsVarDecl);
+    if (!Size) {
+      Size = tType * 8; // Size is in terms of bits in this context.
+      NeedSizeDir = Size > 0;
+    }
+    // If this is not a VarDecl then assume it is a FuncDecl or some other label
+    // reference.  We need an 'r' constraint here, so we need to create register
+    // operand to ensure proper matching.  Just pick a GPR based on the size of
+    // a pointer.
+    if (!IsVarDecl) {
+      unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+      return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
+                                   SMLoc(), SymName);
+    }
+  }
 
-  unsigned TmpReg;
-  int64_t TmpInteger;
+  if (NeedSizeDir)
+    InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
+                                                /*Len=*/0, Size));  
 
-  bool isPlus;
+  // When parsing inline assembly we set the base register to a non-zero value
+  // if we don't know the actual value at this time.  This is necessary to
+  // get the matching correct in some cases.
+  BaseReg = BaseReg ? BaseReg : 1;
+  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                               End, Size, SymName);
+}
 
-public:
-  IntelBracExprStateMachine(MCAsmParser &parser) :
-    State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0),
-    TmpReg(0), TmpInteger(0), isPlus(true) {}
-
-  unsigned getBaseReg() { return BaseReg; }
-  unsigned getIndexReg() { return IndexReg; }
-  unsigned getScale() { return Scale; }
-  int64_t getDisp() { return Disp; }
-  bool isValidEndState() { return State == IBES_RBRAC; }
-
-  void onPlus() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_INTEGER:
-      State = IBES_START;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_START;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
-      }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_START;
-      break;
-    }
-    isPlus = true;
-  }
-  void onMinus() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_MINUS;
-      break;
-    case IBES_INTEGER:
-      State = IBES_START;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_START;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
+static void
+RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
+                           StringRef SymName, int64_t ImmDisp,
+                           int64_t FinalImmDisp, SMLoc &BracLoc,
+                           SMLoc &StartInBrac, SMLoc &End) {
+  // Remove the '[' and ']' from the IR string.
+  AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
+  AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
+
+  // If ImmDisp is non-zero, then we parsed a displacement before the
+  // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
+  // If ImmDisp doesn't match the displacement computed by the state machine
+  // then we have an additional displacement in the bracketed expression.
+  if (ImmDisp != FinalImmDisp) {
+    if (ImmDisp) {
+      // We have an immediate displacement before the bracketed expression.
+      // Adjust this to match the final immediate displacement.
+      bool Found = false;
+      for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
+             E = AsmRewrites->end(); I != E; ++I) {
+        if ((*I).Loc.getPointer() > BracLoc.getPointer())
+          continue;
+        if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
+          assert (!Found && "ImmDisp already rewritten.");
+          (*I).Kind = AOK_Imm;
+          (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
+          (*I).Val = FinalImmDisp;
+          Found = true;
+          break;
+        }
       }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_START;
-      break;
-    }
-    isPlus = false;
-  }
-  void onRegister(unsigned Reg) {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_REGISTER;
-      TmpReg = Reg;
-      break;
-    case IBES_INTEGER_STAR:
-      assert (!IndexReg && "IndexReg already set!");
-      State = IBES_INDEX_REGISTER;
-      IndexReg = Reg;
-      Scale = TmpInteger;
-      break;
-    }
-  }
-  void onDispExpr() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_DISP_EXPR;
-      break;
-    }
-  }
-  void onInteger(int64_t TmpInt) {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_INTEGER;
-      TmpInteger = TmpInt;
-      break;
-    case IBES_MINUS:
-      State = IBES_INTEGER;
-      TmpInteger = TmpInt;
-      break;
-    case IBES_REGISTER_STAR:
-      assert (!IndexReg && "IndexReg already set!");
-      State = IBES_INDEX_REGISTER;
-      IndexReg = TmpReg;
-      Scale = TmpInt;
-      break;
-    }
-  }
-  void onStar() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_INTEGER:
-      State = IBES_INTEGER_STAR;
-      break;
-    case IBES_REGISTER:
-      State = IBES_REGISTER_STAR;
-      break;
-    }
-  }
-  void onLBrac() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_RBRAC:
-      State = IBES_START;
-      isPlus = true;
-      break;
+      assert (Found && "Unable to rewrite ImmDisp.");
+    } else {
+      // We have a symbolic and an immediate displacement, but no displacement
+      // before the bracketed expression.  Put the immediate displacement
+      // before the bracketed expression.
+      AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
     }
   }
-  void onRBrac() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_DISP_EXPR:
-      State = IBES_RBRAC;
-      break;
-    case IBES_INTEGER:
-      State = IBES_RBRAC;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_RBRAC;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
-      }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_RBRAC;
-      break;
-    }
+  // Remove all the ImmPrefix rewrites within the brackets.
+  for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
+         E = AsmRewrites->end(); I != E; ++I) {
+    if ((*I).Loc.getPointer() < StartInBrac.getPointer())
+      continue;
+    if ((*I).Kind == AOK_ImmPrefix)
+      (*I).Kind = AOK_Delete;
+  }
+  const char *SymLocPtr = SymName.data();
+  // Skip everything before the symbol.        
+  if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
+    assert(Len > 0 && "Expected a non-negative length.");
+    AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
+  }
+  // Skip everything after the symbol.
+  if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
+    SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
+    assert(Len > 0 && "Expected a non-negative length.");
+    AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
   }
-};
+}
 
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, 
-                                                   unsigned Size) {
+X86Operand *
+X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
   const AsmToken &Tok = Parser.getTok();
-  SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
-
-  // Eat '['
-  if (getLexer().isNot(AsmToken::LBrac))
-    return ErrorOperand(Start, "Expected '[' token!");
-  Parser.Lex();
-
-  unsigned TmpReg = 0;
-
-  // Try to handle '[' 'symbol' ']'
-  if (getLexer().is(AsmToken::Identifier)) {
-    if (ParseRegister(TmpReg, Start, End)) {
-      const MCExpr *Disp;
-      if (getParser().parseExpression(Disp, End))
-        return 0;
-
-      if (getLexer().isNot(AsmToken::RBrac))
-        return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
-      // Adjust the EndLoc due to the ']'.
-      End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
-      Parser.Lex();
-      return X86Operand::CreateMem(Disp, Start, End, Size);
-    }
-  }
 
-  // Parse [ BaseReg + Scale*IndexReg + Disp ].
   bool Done = false;
-  IntelBracExprStateMachine SM(Parser);
-
-  // If we parsed a register, then the end loc has already been set and
-  // the identifier has already been lexed.  We also need to update the
-  // state.
-  if (TmpReg)
-    SM.onRegister(TmpReg);
-
-  const MCExpr *Disp = 0;
   while (!Done) {
     bool UpdateLocLex = true;
 
@@ -941,6 +1230,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
     // identifier.  Don't try an parse it as a register.
     if (Tok.getString().startswith("."))
       break;
+    
+    // If we're parsing an immediate expression, we don't expect a '['.
+    if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
+      break;
 
     switch (getLexer().getKind()) {
     default: {
@@ -950,82 +1243,189 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
       }
       return ErrorOperand(Tok.getLoc(), "Unexpected token!");
     }
+    case AsmToken::EndOfStatement: {
+      Done = true;
+      break;
+    }
     case AsmToken::Identifier: {
-      // This could be a register or a displacement expression.
-      if(!ParseRegister(TmpReg, Start, End)) {
+      // This could be a register or a symbolic displacement.
+      unsigned TmpReg;
+      const MCExpr *Val;
+      SMLoc IdentLoc = Tok.getLoc();
+      StringRef Identifier = Tok.getString();
+      if(!ParseRegister(TmpReg, IdentLoc, End)) {
         SM.onRegister(TmpReg);
         UpdateLocLex = false;
         break;
-      } else if (!getParser().parseExpression(Disp, End)) {
-        SM.onDispExpr();
+      } else {
+        if (!isParsingInlineAsm()) {
+          if (getParser().parsePrimaryExpr(Val, End))
+            return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+        } else {
+          if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, End))
+            return Err;
+        }
+        SM.onIdentifierExpr(Val, Identifier);
         UpdateLocLex = false;
         break;
       }
       return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
     }
-    case AsmToken::Integer: {
-      int64_t Val = Tok.getIntVal();
-      SM.onInteger(Val);
+    case AsmToken::Integer:
+      if (isParsingInlineAsm() && SM.getAddImmPrefix())
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+                                                    Tok.getLoc()));
+      SM.onInteger(Tok.getIntVal());
       break;
-    }
     case AsmToken::Plus:    SM.onPlus(); break;
     case AsmToken::Minus:   SM.onMinus(); break;
     case AsmToken::Star:    SM.onStar(); break;
+    case AsmToken::Slash:   SM.onDivide(); break;
     case AsmToken::LBrac:   SM.onLBrac(); break;
     case AsmToken::RBrac:   SM.onRBrac(); break;
+    case AsmToken::LParen:  SM.onLParen(); break;
+    case AsmToken::RParen:  SM.onRParen(); break;
     }
+    if (SM.hadError())
+      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+
     if (!Done && UpdateLocLex) {
       End = Tok.getLoc();
       Parser.Lex(); // Consume the token.
     }
   }
+  return 0;
+}
 
-  if (!Disp)
-    Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+                                                   int64_t ImmDisp,
+                                                   unsigned Size) {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
+  if (getLexer().isNot(AsmToken::LBrac))
+    return ErrorOperand(BracLoc, "Expected '[' token!");
+  Parser.Lex(); // Eat '['
+
+  SMLoc StartInBrac = Tok.getLoc();
+  // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ].  We
+  // may have already parsed an immediate displacement before the bracketed
+  // expression.
+  IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
+  if (X86Operand *Err = ParseIntelExpression(SM, End))
+    return Err;
+
+  const MCExpr *Disp;
+  if (const MCExpr *Sym = SM.getSym()) {
+    // A symbolic displacement.
+    Disp = Sym;
+    if (isParsingInlineAsm())
+      RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
+                                 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
+                                 End);
+  } else {
+    // An immediate displacement only.   
+    Disp = MCConstantExpr::Create(SM.getImm(), getContext());
+  }
 
   // Parse the dot operator (e.g., [ebx].foo.bar).
   if (Tok.getString().startswith(".")) {
-    SmallString<64> Err;
     const MCExpr *NewDisp;
-    if (ParseIntelDotOperator(Disp, &NewDisp, Err))
-      return ErrorOperand(Tok.getLoc(), Err);
+    if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp))
+      return Err;
     
-    End = Parser.getTok().getEndLoc();
+    End = Tok.getEndLoc();
     Parser.Lex();  // Eat the field.
     Disp = NewDisp;
   }
 
   int BaseReg = SM.getBaseReg();
   int IndexReg = SM.getIndexReg();
+  int Scale = SM.getScale();
+  if (!isParsingInlineAsm()) {
+    // handle [-42]
+    if (!BaseReg && !IndexReg) {
+      if (!SegReg)
+        return X86Operand::CreateMem(Disp, Start, End, Size);
+      else
+        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+    }
+    return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                                 End, Size);
+  }
 
-  // handle [-42]
-  if (!BaseReg && !IndexReg) {
-    if (!SegReg)
-      return X86Operand::CreateMem(Disp, Start, End);
-    else
-      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+  return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                               End, Size, SM.getSymName());
+}
+
+// Inline assembly may use variable names with namespace alias qualifiers.
+X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
+                                               StringRef &Identifier,
+                                               SMLoc &End) {
+  assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
+  Val = 0;
+
+  bool Done = false;
+  const AsmToken &Tok = Parser.getTok();
+  AsmToken IdentEnd = Tok;
+  while (!Done) {
+    End = Tok.getLoc();
+    switch (getLexer().getKind()) {
+    default:
+      Done = true; 
+      break;
+    case AsmToken::Colon:
+      IdentEnd = Tok;
+      getLexer().Lex(); // Consume ':'.
+      if (getLexer().isNot(AsmToken::Colon))
+        return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
+      getLexer().Lex(); // Consume second ':'.
+      if (getLexer().isNot(AsmToken::Identifier))
+        return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
+      break;
+    case AsmToken::Identifier:
+      IdentEnd = Tok;
+      getLexer().Lex(); // Consume the identifier.
+      break;
+    }
   }
 
-  int Scale = SM.getScale();
-  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
-                               Start, End, Size);
+  unsigned Len = IdentEnd.getLoc().getPointer() - Identifier.data();
+  Identifier = StringRef(Identifier.data(), Len + IdentEnd.getString().size());
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+  return 0;
 }
 
 /// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
+X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
+                                               int64_t ImmDisp,
+                                               SMLoc Start) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc End;
 
   unsigned Size = getIntelMemOperandSize(Tok.getString());
   if (Size) {
-    Parser.Lex();
-    assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
-            "Unexpected token!");
-    Parser.Lex();
+    Parser.Lex(); // Eat operand size (e.g., byte, word).
+    if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
+      return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
+    Parser.Lex(); // Eat ptr.
+  }
+
+  // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+  if (getLexer().is(AsmToken::Integer)) {
+    if (isParsingInlineAsm())
+      InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+                                                  Tok.getLoc()));
+    int64_t ImmDisp = Tok.getIntVal();
+    Parser.Lex(); // Eat the integer.
+    if (getLexer().isNot(AsmToken::LBrac))
+      return ErrorOperand(Start, "Expected '[' token!");
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
   }
 
   if (getLexer().is(AsmToken::LBrac))
-    return ParseIntelBracExpression(SegReg, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
 
   if (!ParseRegister(SegReg, Start, End)) {
     // Handel SegReg : [ ... ]
@@ -1034,63 +1434,35 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
     Parser.Lex(); // Eat :
     if (getLexer().isNot(AsmToken::LBrac))
       return ErrorOperand(Start, "Expected '[' token!");
-    return ParseIntelBracExpression(SegReg, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
   }
 
-  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
-  if (getParser().parseExpression(Disp, End))
-    return 0;
+  const MCExpr *Val;
+  if (!isParsingInlineAsm()) {
+    if (getParser().parsePrimaryExpr(Val, End))
+      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
 
-  bool NeedSizeDir = false;
-  bool IsVarDecl = false;
-  if (isParsingInlineAsm()) {
-    if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
-      const MCSymbol &Sym = SymRef->getSymbol();
-      // FIXME: The SemaLookup will fail if the name is anything other then an
-      // identifier.
-      // FIXME: Pass a valid SMLoc.
-      unsigned tLength, tSize, tType;
-      SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
-                                              tSize, tType, IsVarDecl);
-      if (!Size)
-        Size = tType * 8; // Size is in terms of bits in this context.
-      NeedSizeDir = Size > 0;
-    }
+    return X86Operand::CreateMem(Val, Start, End, Size);
   }
-  if (!isParsingInlineAsm())
-    return X86Operand::CreateMem(Disp, Start, End, Size);
-  else {
-    // If this is not a VarDecl then assume it is a FuncDecl or some other label
-    // reference.  We need an 'r' constraint here, so we need to create register
-    // operand to ensure proper matching.  Just pick a GPR based on the size of
-    // a pointer.
-    if (!IsVarDecl) {
-      unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
-      return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
-    }
 
-    // When parsing inline assembly we set the base register to a non-zero value
-    // as we don't know the actual value at this time.  This is necessary to
-    // get the matching correct in some cases.
-    return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
-                                 /*Scale*/1, Start, End, Size, NeedSizeDir);
-  }
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, End))
+    return Err;
+  return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
+                               /*Scale=*/1, Start, End, Size, Identifier);
 }
 
 /// Parse the '.' operator.
-bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
-                                         const MCExpr **NewDisp,
-                                         SmallString<64> &Err) {
-  AsmToken Tok = *&Parser.getTok();
-  uint64_t OrigDispVal, DotDispVal;
+X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+                                                const MCExpr *&NewDisp) {
+  const AsmToken &Tok = Parser.getTok();
+  int64_t OrigDispVal, DotDispVal;
 
   // FIXME: Handle non-constant expressions.
-  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
+  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
     OrigDispVal = OrigDisp->getValue();
-  } else {
-    Err = "Non-constant offsets are not supported!";
-    return true;
-  }
+  else
+    return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!");
 
   // Drop the '.'.
   StringRef DotDispStr = Tok.getString().drop_front(1);
@@ -1100,23 +1472,15 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
     APInt DotDisp;
     DotDispStr.getAsInteger(10, DotDisp);
     DotDispVal = DotDisp.getZExtValue();
-  } else if (Tok.is(AsmToken::Identifier)) {
-    // We should only see an identifier when parsing the original inline asm.
-    // The front-end should rewrite this in terms of immediates.
-    assert (isParsingInlineAsm() && "Unexpected field name!");
-
+  } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
     unsigned DotDisp;
     std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
     if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
-                                           DotDisp)) {
-      Err = "Unable to lookup field reference!";
-      return true;
-    }
+                                           DotDisp))
+      return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!");
     DotDispVal = DotDisp;
-  } else {
-    Err = "Unexpected token type!";
-    return true;
-  }
+  } else
+    return ErrorOperand(Tok.getLoc(), "Unexpected token type!");
 
   if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
     SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
@@ -1126,22 +1490,22 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
                                                 Val));
   }
 
-  *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
-  return false;
+  NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+  return 0;
 }
 
 /// Parse the 'offset' operator.  This operator is used to specify the
 /// location rather then the content of a variable.
-X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
-  SMLoc OffsetOfLoc = Start;
+X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc OffsetOfLoc = Tok.getLoc();
   Parser.Lex(); // Eat offset.
-  Start = Parser.getTok().getLoc();
-  assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
 
-  SMLoc End;
   const MCExpr *Val;
-  if (getParser().parseExpression(Val, End))
-    return ErrorOperand(Start, "Unable to parse expression!");
+  SMLoc Start = Tok.getLoc(), End;
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, End))
+    return Err;
 
   // Don't emit the offset operator.
   InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -1151,7 +1515,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
   // the size of a pointer.
   unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
-                               OffsetOfLoc);
+                               OffsetOfLoc, Identifier);
 }
 
 enum IntelOperatorKind {
@@ -1166,16 +1530,17 @@ enum IntelOperatorKind {
 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
 /// TYPE operator returns the size of a C or C++ type or variable. If the
 /// variable is an array, TYPE returns the size of a single element.
-X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
-  SMLoc TypeLoc = Start;
-  Parser.Lex(); // Eat offset.
-  Start = Parser.getTok().getLoc();
-  assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
+X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc TypeLoc = Tok.getLoc();
+  Parser.Lex(); // Eat operator.
 
-  SMLoc End;
-  const MCExpr *Val;
-  if (getParser().parseExpression(Val, End))
-    return 0;
+  const MCExpr *Val = 0;
+  AsmToken StartTok = Tok;
+  SMLoc Start = Tok.getLoc(), End;
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, End))
+    return Err;
 
   unsigned Length = 0, Size = 0, Type = 0;
   if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
@@ -1186,7 +1551,10 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
     bool IsVarDecl;
     if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
                                                  Size, Type, IsVarDecl))
-      return ErrorOperand(Start, "Unable to lookup expr!");
+      // FIXME: We don't warn on variables with namespace alias qualifiers
+      // because support still needs to be added in the frontend.
+      if (Identifier.equals(StartTok.getString()))
+        return ErrorOperand(Start, "Unable to lookup expr!");
   }
   unsigned CVal;
   switch(OpKind) {
@@ -1202,32 +1570,58 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
   InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
 
   const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
-  return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
+  return X86Operand::CreateImm(Imm, Start, End);
 }
 
 X86Operand *X86AsmParser::ParseIntelOperand() {
-  SMLoc Start = Parser.getTok().getLoc(), End;
-  StringRef AsmTokStr = Parser.getTok().getString();
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc Start = Tok.getLoc(), End;
 
   // Offset, length, type and size operators.
   if (isParsingInlineAsm()) {
+    StringRef AsmTokStr = Tok.getString();
     if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
-      return ParseIntelOffsetOfOperator(Start);
+      return ParseIntelOffsetOfOperator();
     if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
-      return ParseIntelOperator(Start, IOK_LENGTH);
+      return ParseIntelOperator(IOK_LENGTH);
     if (AsmTokStr == "size" || AsmTokStr == "SIZE")
-      return ParseIntelOperator(Start, IOK_SIZE);
+      return ParseIntelOperator(IOK_SIZE);
     if (AsmTokStr == "type" || AsmTokStr == "TYPE")
-      return ParseIntelOperator(Start, IOK_TYPE);
+      return ParseIntelOperator(IOK_TYPE);
   }
 
   // Immediate.
-  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
-      getLexer().is(AsmToken::Minus)) {
-    const MCExpr *Val;
-    if (!getParser().parseExpression(Val, End)) {
-      return X86Operand::CreateImm(Val, Start, End);
+  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
+      getLexer().is(AsmToken::LParen)) {    
+    AsmToken StartTok = Tok;
+    IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
+                             /*AddImmPrefix=*/false);
+    if (X86Operand *Err = ParseIntelExpression(SM, End))
+      return Err;
+
+    int64_t Imm = SM.getImm();
+    if (isParsingInlineAsm()) {
+      unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
+      if (StartTok.getString().size() == Len)
+        // Just add a prefix if this wasn't a complex immediate expression.
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
+      else
+        // Otherwise, rewrite the complex expression as a single immediate.
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
+    }
+
+    if (getLexer().isNot(AsmToken::LBrac)) {
+      const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
+      return X86Operand::CreateImm(ImmExpr, Start, End);
     }
+
+    // Only positive immediates are valid.
+    if (Imm < 0)
+      return ErrorOperand(Start, "expected a positive immediate displacement "
+                          "before bracketed expr.");
+
+    // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+    return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start);
   }
 
   // Register.
@@ -1239,11 +1633,11 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
       return X86Operand::CreateReg(RegNo, Start, End);
 
     getParser().Lex(); // Eat the colon.
-    return ParseIntelMemOperand(RegNo, Start);
+    return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
   }
 
   // Memory operand.
-  return ParseIntelMemOperand(0, Start);
+  return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
 }
 
 X86Operand *X86AsmParser::ParseATTOperand() {
@@ -1267,7 +1661,6 @@ X86Operand *X86AsmParser::ParseATTOperand() {
     if (getLexer().isNot(AsmToken::Colon))
       return X86Operand::CreateReg(RegNo, Start, End);
 
-
     getParser().Lex(); // Eat the colon.
     return ParseMemOperand(RegNo, Start);
   }
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 85d8a991dd6e..e40edba6d689 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -61,7 +61,7 @@ static int modRMRequired(OpcodeType type,
                          InstructionContext insnContext,
                          uint8_t opcode) {
   const struct ContextDecision* decision = 0;
-  
+
   switch (type) {
   case ONEBYTE:
     decision = &ONEBYTE_SYM;
@@ -102,7 +102,7 @@ static InstrUID decode(OpcodeType type,
                        uint8_t opcode,
                        uint8_t modRM) {
   const struct ModRMDecision* dec = 0;
-  
+
   switch (type) {
   case ONEBYTE:
     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
@@ -123,7 +123,7 @@ static InstrUID decode(OpcodeType type,
     dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
     break;
   }
-  
+
   switch (dec->modrm_type) {
   default:
     debug("Corrupt table!  Unknown modrm_type");
@@ -171,10 +171,10 @@ static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
  */
 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
-  
+
   if (!ret)
     ++(insn->readerCursor);
-  
+
   return ret;
 }
 
@@ -238,19 +238,19 @@ CONSUME_FUNC(consumeUInt64, uint64_t)
  */
 static void dbgprintf(struct InternalInstruction* insn,
                       const char* format,
-                      ...) {  
+                      ...) {
   char buffer[256];
   va_list ap;
-  
+
   if (!insn->dlog)
     return;
-    
+
   va_start(ap, format);
   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
   va_end(ap);
-  
+
   insn->dlog(insn->dlogArg, buffer);
-  
+
   return;
 }
 
@@ -305,27 +305,40 @@ static int readPrefixes(struct InternalInstruction* insn) {
   BOOL prefixGroups[4] = { FALSE };
   uint64_t prefixLocation;
   uint8_t byte = 0;
-  
+
   BOOL hasAdSize = FALSE;
   BOOL hasOpSize = FALSE;
-  
+
   dbgprintf(insn, "readPrefixes()");
-    
+
   while (isPrefix) {
     prefixLocation = insn->readerCursor;
-    
+
     if (consumeByte(insn, &byte))
       return -1;
 
     /*
-     * If the first byte is a LOCK prefix break and let it be disassembled
-     * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
-     * FIXME there is currently no way to get the disassembler to print the
-     * lock prefix if it is not the first byte.
+     * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
+     * break and let it be disassembled as a normal "instruction".
      */
-    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
-      break;
-    
+    if (insn->readerCursor - 1 == insn->startLocation
+        && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
+      uint8_t nextByte;
+      if (byte == 0xf0)
+        break;
+      if (lookAtByte(insn, &nextByte))
+        return -1;
+      if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
+        if (consumeByte(insn, &nextByte))
+          return -1;
+        if (lookAtByte(insn, &nextByte))
+          return -1;
+        unconsumeByte(insn);
+      }
+      if (nextByte != 0x0f && nextByte != 0x90)
+        break;
+    }
+
     switch (byte) {
     case 0xf0:  /* LOCK */
     case 0xf2:  /* REPNE/REPNZ */
@@ -387,21 +400,21 @@ static int readPrefixes(struct InternalInstruction* insn) {
       isPrefix = FALSE;
       break;
     }
-    
+
     if (isPrefix)
       dbgprintf(insn, "Found prefix 0x%hhx", byte);
   }
-    
+
   insn->vexSize = 0;
-  
+
   if (byte == 0xc4) {
     uint8_t byte1;
-      
+
     if (lookAtByte(insn, &byte1)) {
       dbgprintf(insn, "Couldn't read second byte of VEX");
       return -1;
     }
-    
+
     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
       insn->vexSize = 3;
       insn->necessaryPrefixLocation = insn->readerCursor - 1;
@@ -410,67 +423,67 @@ static int readPrefixes(struct InternalInstruction* insn) {
       unconsumeByte(insn);
       insn->necessaryPrefixLocation = insn->readerCursor - 1;
     }
-    
+
     if (insn->vexSize == 3) {
       insn->vexPrefix[0] = byte;
       consumeByte(insn, &insn->vexPrefix[1]);
       consumeByte(insn, &insn->vexPrefix[2]);
 
       /* We simulate the REX prefix for simplicity's sake */
-   
+
       if (insn->mode == MODE_64BIT) {
-        insn->rexPrefix = 0x40 
+        insn->rexPrefix = 0x40
                         | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
                         | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
                         | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
                         | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
       }
-    
+
       switch (ppFromVEX3of3(insn->vexPrefix[2]))
       {
       default:
         break;
       case VEX_PREFIX_66:
-        hasOpSize = TRUE;      
+        hasOpSize = TRUE;
         break;
       }
-    
+
       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
     }
   }
   else if (byte == 0xc5) {
     uint8_t byte1;
-    
+
     if (lookAtByte(insn, &byte1)) {
       dbgprintf(insn, "Couldn't read second byte of VEX");
       return -1;
     }
-      
+
     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
       insn->vexSize = 2;
     }
     else {
       unconsumeByte(insn);
     }
-    
+
     if (insn->vexSize == 2) {
       insn->vexPrefix[0] = byte;
       consumeByte(insn, &insn->vexPrefix[1]);
-        
+
       if (insn->mode == MODE_64BIT) {
-        insn->rexPrefix = 0x40 
+        insn->rexPrefix = 0x40
                         | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
       }
-        
+
       switch (ppFromVEX2of2(insn->vexPrefix[1]))
       {
       default:
         break;
       case VEX_PREFIX_66:
-        hasOpSize = TRUE;      
+        hasOpSize = TRUE;
         break;
       }
-         
+
       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
     }
   }
@@ -478,17 +491,17 @@ static int readPrefixes(struct InternalInstruction* insn) {
     if (insn->mode == MODE_64BIT) {
       if ((byte & 0xf0) == 0x40) {
         uint8_t opcodeByte;
-          
+
         if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
           dbgprintf(insn, "Redundant REX prefix");
           return -1;
         }
-          
+
         insn->rexPrefix = byte;
         insn->necessaryPrefixLocation = insn->readerCursor - 2;
-          
+
         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
-      } else {                
+      } else {
         unconsumeByte(insn);
         insn->necessaryPrefixLocation = insn->readerCursor - 1;
       }
@@ -526,7 +539,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
       insn->immediateSize      = (hasOpSize ? 2 : 4);
     }
   }
-  
+
   return 0;
 }
 
@@ -537,22 +550,22 @@ static int readPrefixes(struct InternalInstruction* insn) {
  * @param insn  - The instruction whose opcode is to be read.
  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
  */
-static int readOpcode(struct InternalInstruction* insn) {  
+static int readOpcode(struct InternalInstruction* insn) {
   /* Determine the length of the primary opcode */
-  
+
   uint8_t current;
-  
+
   dbgprintf(insn, "readOpcode()");
-  
+
   insn->opcodeType = ONEBYTE;
-    
+
   if (insn->vexSize == 3)
   {
     switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
     {
     default:
       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
-      return -1;      
+      return -1;
     case 0:
       break;
     case VEX_LOB_0F:
@@ -564,7 +577,7 @@ static int readOpcode(struct InternalInstruction* insn) {
       insn->threeByteEscape = 0x38;
       insn->opcodeType = THREEBYTE_38;
       return consumeByte(insn, &insn->opcode);
-    case VEX_LOB_0F3A:    
+    case VEX_LOB_0F3A:
       insn->twoByteEscape = 0x0f;
       insn->threeByteEscape = 0x3a;
       insn->opcodeType = THREEBYTE_3A;
@@ -577,68 +590,68 @@ static int readOpcode(struct InternalInstruction* insn) {
     insn->opcodeType = TWOBYTE;
     return consumeByte(insn, &insn->opcode);
   }
-    
+
   if (consumeByte(insn, &current))
     return -1;
-  
+
   if (current == 0x0f) {
     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
-    
+
     insn->twoByteEscape = current;
-    
+
     if (consumeByte(insn, &current))
       return -1;
-    
+
     if (current == 0x38) {
       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-      
+
       insn->threeByteEscape = current;
-      
+
       if (consumeByte(insn, &current))
         return -1;
-      
+
       insn->opcodeType = THREEBYTE_38;
     } else if (current == 0x3a) {
       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-      
+
       insn->threeByteEscape = current;
-      
+
       if (consumeByte(insn, &current))
         return -1;
-      
+
       insn->opcodeType = THREEBYTE_3A;
     } else if (current == 0xa6) {
       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-      
+
       insn->threeByteEscape = current;
-      
+
       if (consumeByte(insn, &current))
         return -1;
-      
+
       insn->opcodeType = THREEBYTE_A6;
     } else if (current == 0xa7) {
       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-      
+
       insn->threeByteEscape = current;
-      
+
       if (consumeByte(insn, &current))
         return -1;
-      
+
       insn->opcodeType = THREEBYTE_A7;
     } else {
       dbgprintf(insn, "Didn't find a three-byte escape prefix");
-      
+
       insn->opcodeType = TWOBYTE;
     }
   }
-  
+
   /*
    * At this point we have consumed the full opcode.
    * Anything we consume from here on must be unconsumed.
    */
-  
+
   insn->opcode = current;
-  
+
   return 0;
 }
 
@@ -660,19 +673,19 @@ static int getIDWithAttrMask(uint16_t* instructionID,
                              struct InternalInstruction* insn,
                              uint8_t attrMask) {
   BOOL hasModRMExtension;
-  
+
   uint8_t instructionClass;
 
   instructionClass = contextForAttrs(attrMask);
-  
+
   hasModRMExtension = modRMRequired(insn->opcodeType,
                                     instructionClass,
                                     insn->opcode);
-  
+
   if (hasModRMExtension) {
     if (readModRM(insn))
       return -1;
-    
+
     *instructionID = decode(insn->opcodeType,
                             instructionClass,
                             insn->opcode,
@@ -683,7 +696,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
                             insn->opcode,
                             0);
   }
-      
+
   return 0;
 }
 
@@ -696,7 +709,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
  */
 static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
   off_t i;
-  
+
   for (i = 0;; i++) {
     if (orig[i] == '\0' && equiv[i] == '\0')
       return TRUE;
@@ -715,8 +728,8 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
 }
 
 /*
- * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
- *   appropriate for extended and escape opcodes.  Determines the attributes and 
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ *   appropriate for extended and escape opcodes.  Determines the attributes and
  *   context for the instruction before doing so.
  *
  * @param insn  - The instruction whose ID is to be determined.
@@ -726,21 +739,21 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
 static int getID(struct InternalInstruction* insn, const void *miiArg) {
   uint8_t attrMask;
   uint16_t instructionID;
-  
+
   dbgprintf(insn, "getID()");
-    
+
   attrMask = ATTR_NONE;
 
   if (insn->mode == MODE_64BIT)
     attrMask |= ATTR_64BIT;
-    
+
   if (insn->vexSize) {
     attrMask |= ATTR_VEX;
 
     if (insn->vexSize == 3) {
       switch (ppFromVEX3of3(insn->vexPrefix[2])) {
       case VEX_PREFIX_66:
-        attrMask |= ATTR_OPSIZE;    
+        attrMask |= ATTR_OPSIZE;
         break;
       case VEX_PREFIX_F3:
         attrMask |= ATTR_XS;
@@ -749,14 +762,14 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
         attrMask |= ATTR_XD;
         break;
       }
-    
+
       if (lFromVEX3of3(insn->vexPrefix[2]))
         attrMask |= ATTR_VEXL;
     }
     else if (insn->vexSize == 2) {
       switch (ppFromVEX2of2(insn->vexPrefix[1])) {
       case VEX_PREFIX_66:
-        attrMask |= ATTR_OPSIZE;    
+        attrMask |= ATTR_OPSIZE;
         break;
       case VEX_PREFIX_F3:
         attrMask |= ATTR_XS;
@@ -765,7 +778,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
         attrMask |= ATTR_XD;
         break;
       }
-    
+
       if (lFromVEX2of2(insn->vexPrefix[1]))
         attrMask |= ATTR_VEXL;
     }
@@ -836,26 +849,26 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
      * conservative, but in the specific case where OpSize is present but not
      * in the right place we check if there's a 16-bit operation.
      */
-    
+
     const struct InstructionSpecifier *spec;
     uint16_t instructionIDWithOpsize;
     const char *specName, *specWithOpSizeName;
-    
+
     spec = specifierForUID(instructionID);
-    
+
     if (getIDWithAttrMask(&instructionIDWithOpsize,
                           insn,
                           attrMask | ATTR_OPSIZE)) {
-      /* 
+      /*
        * ModRM required with OpSize but not present; give up and return version
        * without OpSize set
        */
-      
+
       insn->instructionID = instructionID;
       insn->spec = spec;
       return 0;
     }
-    
+
     specName = x86DisassemblerGetInstrName(instructionID, miiArg);
     specWithOpSizeName =
       x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
@@ -882,10 +895,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
     const struct InstructionSpecifier *specWithNewOpcode;
 
     spec = specifierForUID(instructionID);
-    
+
     /* Borrow opcode from one of the other XCHGar opcodes */
     insn->opcode = 0x91;
-   
+
     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
                           insn,
                           attrMask)) {
@@ -906,10 +919,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
 
     return 0;
   }
-  
+
   insn->instructionID = instructionID;
   insn->spec = specifierForUID(insn->instructionID);
-  
+
   return 0;
 }
 
@@ -924,14 +937,14 @@ static int readSIB(struct InternalInstruction* insn) {
   SIBIndex sibIndexBase = 0;
   SIBBase sibBaseBase = 0;
   uint8_t index, base;
-  
+
   dbgprintf(insn, "readSIB()");
-  
+
   if (insn->consumedSIB)
     return 0;
-  
+
   insn->consumedSIB = TRUE;
-  
+
   switch (insn->addressSize) {
   case 2:
     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
@@ -949,9 +962,9 @@ static int readSIB(struct InternalInstruction* insn) {
 
   if (consumeByte(insn, &insn->sib))
     return -1;
-  
+
   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
-  
+
   switch (index) {
   case 0x4:
     insn->sibIndex = SIB_INDEX_NONE;
@@ -963,7 +976,7 @@ static int readSIB(struct InternalInstruction* insn) {
       insn->sibIndex = SIB_INDEX_NONE;
     break;
   }
-  
+
   switch (scaleFromSIB(insn->sib)) {
   case 0:
     insn->sibScale = 1;
@@ -978,9 +991,9 @@ static int readSIB(struct InternalInstruction* insn) {
     insn->sibScale = 8;
     break;
   }
-  
+
   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
-  
+
   switch (base) {
   case 0x5:
     switch (modFromModRM(insn->modRM)) {
@@ -990,12 +1003,12 @@ static int readSIB(struct InternalInstruction* insn) {
       break;
     case 0x1:
       insn->eaDisplacement = EA_DISP_8;
-      insn->sibBase = (insn->addressSize == 4 ? 
+      insn->sibBase = (insn->addressSize == 4 ?
                        SIB_BASE_EBP : SIB_BASE_RBP);
       break;
     case 0x2:
       insn->eaDisplacement = EA_DISP_32;
-      insn->sibBase = (insn->addressSize == 4 ? 
+      insn->sibBase = (insn->addressSize == 4 ?
                        SIB_BASE_EBP : SIB_BASE_RBP);
       break;
     case 0x3:
@@ -1007,7 +1020,7 @@ static int readSIB(struct InternalInstruction* insn) {
     insn->sibBase = (SIBBase)(sibBaseBase + base);
     break;
   }
-  
+
   return 0;
 }
 
@@ -1015,22 +1028,22 @@ static int readSIB(struct InternalInstruction* insn) {
  * readDisplacement - Consumes the displacement of an instruction.
  *
  * @param insn  - The instruction whose displacement is to be read.
- * @return      - 0 if the displacement byte was successfully read; nonzero 
+ * @return      - 0 if the displacement byte was successfully read; nonzero
  *                otherwise.
  */
-static int readDisplacement(struct InternalInstruction* insn) {  
+static int readDisplacement(struct InternalInstruction* insn) {
   int8_t d8;
   int16_t d16;
   int32_t d32;
-  
+
   dbgprintf(insn, "readDisplacement()");
-  
+
   if (insn->consumedDisplacement)
     return 0;
-  
+
   insn->consumedDisplacement = TRUE;
   insn->displacementOffset = insn->readerCursor - insn->startLocation;
-  
+
   switch (insn->eaDisplacement) {
   case EA_DISP_NONE:
     insn->consumedDisplacement = FALSE;
@@ -1051,7 +1064,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
     insn->displacement = d32;
     break;
   }
-  
+
   insn->consumedDisplacement = TRUE;
   return 0;
 }
@@ -1063,22 +1076,22 @@ static int readDisplacement(struct InternalInstruction* insn) {
  * @param insn  - The instruction whose addressing information is to be read.
  * @return      - 0 if the information was successfully read; nonzero otherwise.
  */
-static int readModRM(struct InternalInstruction* insn) {  
+static int readModRM(struct InternalInstruction* insn) {
   uint8_t mod, rm, reg;
-  
+
   dbgprintf(insn, "readModRM()");
-  
+
   if (insn->consumedModRM)
     return 0;
-  
+
   if (consumeByte(insn, &insn->modRM))
     return -1;
   insn->consumedModRM = TRUE;
-  
+
   mod     = modFromModRM(insn->modRM);
   rm      = rmFromModRM(insn->modRM);
   reg     = regFromModRM(insn->modRM);
-  
+
   /*
    * This goes by insn->registerSize to pick the correct register, which messes
    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
@@ -1098,16 +1111,16 @@ static int readModRM(struct InternalInstruction* insn) {
     insn->eaRegBase = EA_REG_RAX;
     break;
   }
-  
+
   reg |= rFromREX(insn->rexPrefix) << 3;
   rm  |= bFromREX(insn->rexPrefix) << 3;
-  
+
   insn->reg = (Reg)(insn->regBase + reg);
-  
+
   switch (insn->addressSize) {
   case 2:
     insn->eaBaseBase = EA_BASE_BX_SI;
-     
+
     switch (mod) {
     case 0x0:
       if (rm == 0x6) {
@@ -1142,14 +1155,14 @@ static int readModRM(struct InternalInstruction* insn) {
   case 4:
   case 8:
     insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
-    
+
     switch (mod) {
     case 0x0:
       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
       switch (rm) {
       case 0x4:
       case 0xc:   /* in case REXW.b is set */
-        insn->eaBase = (insn->addressSize == 4 ? 
+        insn->eaBase = (insn->addressSize == 4 ?
                         EA_BASE_sib : EA_BASE_sib64);
         readSIB(insn);
         if (readDisplacement(insn))
@@ -1191,7 +1204,7 @@ static int readModRM(struct InternalInstruction* insn) {
     }
     break;
   } /* switch (insn->addressSize) */
-  
+
   return 0;
 }
 
@@ -1274,12 +1287,12 @@ GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
  * @return      - 0 if fixup was successful; -1 if the register returned was
  *                invalid for its class.
  */
-static int fixupReg(struct InternalInstruction *insn, 
+static int fixupReg(struct InternalInstruction *insn,
                     const struct OperandSpecifier *op) {
   uint8_t valid;
-  
+
   dbgprintf(insn, "fixupReg()");
-  
+
   switch ((OperandEncoding)op->encoding) {
   default:
     debug("Expected a REG or R/M encoding in fixupReg");
@@ -1311,12 +1324,12 @@ static int fixupReg(struct InternalInstruction *insn,
     }
     break;
   }
-  
+
   return 0;
 }
 
 /*
- * readOpcodeModifier - Reads an operand from the opcode field of an 
+ * readOpcodeModifier - Reads an operand from the opcode field of an
  *   instruction.  Handles AddRegFrm instructions.
  *
  * @param insn    - The instruction whose opcode field is to be read.
@@ -1326,12 +1339,12 @@ static int fixupReg(struct InternalInstruction *insn,
  */
 static int readOpcodeModifier(struct InternalInstruction* insn) {
   dbgprintf(insn, "readOpcodeModifier()");
-  
+
   if (insn->consumedOpcodeModifier)
     return 0;
-  
+
   insn->consumedOpcodeModifier = TRUE;
-  
+
   switch (insn->spec->modifierType) {
   default:
     debug("Unknown modifier type.");
@@ -1345,11 +1358,11 @@ static int readOpcodeModifier(struct InternalInstruction* insn) {
   case MODIFIER_MODRM:
     insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
     return 0;
-  }  
+  }
 }
 
 /*
- * readOpcodeRegister - Reads an operand from the opcode field of an 
+ * readOpcodeRegister - Reads an operand from the opcode field of an
  *   instruction and interprets it appropriately given the operand width.
  *   Handles AddRegFrm instructions.
  *
@@ -1364,39 +1377,39 @@ static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
 
   if (readOpcodeModifier(insn))
     return -1;
-  
+
   if (size == 0)
     size = insn->registerSize;
-  
+
   switch (size) {
   case 1:
-    insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 
+    insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
                                                   | insn->opcodeModifier));
-    if (insn->rexPrefix && 
+    if (insn->rexPrefix &&
         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
     }
-      
+
     break;
   case 2:
     insn->opcodeRegister = (Reg)(MODRM_REG_AX
-                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                 + ((bFromREX(insn->rexPrefix) << 3)
                                     | insn->opcodeModifier));
     break;
   case 4:
     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
-                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                 + ((bFromREX(insn->rexPrefix) << 3)
                                     | insn->opcodeModifier));
     break;
   case 8:
-    insn->opcodeRegister = (Reg)(MODRM_REG_RAX 
-                                 + ((bFromREX(insn->rexPrefix) << 3) 
+    insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+                                 + ((bFromREX(insn->rexPrefix) << 3)
                                     | insn->opcodeModifier));
     break;
   }
-  
+
   return 0;
 }
 
@@ -1414,20 +1427,20 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
   uint16_t imm16;
   uint32_t imm32;
   uint64_t imm64;
-  
+
   dbgprintf(insn, "readImmediate()");
-  
+
   if (insn->numImmediatesConsumed == 2) {
     debug("Already consumed two immediates");
     return -1;
   }
-  
+
   if (size == 0)
     size = insn->immediateSize;
   else
     insn->immediateSize = size;
   insn->immediateOffset = insn->readerCursor - insn->startLocation;
-  
+
   switch (size) {
   case 1:
     if (consumeByte(insn, &imm8))
@@ -1450,9 +1463,9 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
     insn->immediates[insn->numImmediatesConsumed] = imm64;
     break;
   }
-  
+
   insn->numImmediatesConsumed++;
-  
+
   return 0;
 }
 
@@ -1465,7 +1478,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
  */
 static int readVVVV(struct InternalInstruction* insn) {
   dbgprintf(insn, "readVVVV()");
-        
+
   if (insn->vexSize == 3)
     insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
   else if (insn->vexSize == 2)
@@ -1490,14 +1503,14 @@ static int readOperands(struct InternalInstruction* insn) {
   int index;
   int hasVVVV, needVVVV;
   int sawRegImm = 0;
-  
+
   dbgprintf(insn, "readOperands()");
 
   /* If non-zero vvvv specified, need to make sure one of the operands
      uses it. */
   hasVVVV = !readVVVV(insn);
   needVVVV = hasVVVV && (insn->vvvv != 0);
-  
+
   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
     switch (x86OperandSets[insn->spec->operands][index].encoding) {
     case ENCODING_NONE:
@@ -1599,7 +1612,7 @@ static int readOperands(struct InternalInstruction* insn) {
 
   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
   if (needVVVV) return -1;
-  
+
   return 0;
 }
 
@@ -1607,7 +1620,7 @@ static int readOperands(struct InternalInstruction* insn) {
  * decodeInstruction - Reads and interprets a full instruction provided by the
  *   user.
  *
- * @param insn      - A pointer to the instruction to be populated.  Must be 
+ * @param insn      - A pointer to the instruction to be populated.  Must be
  *                    pre-allocated.
  * @param reader    - The function to be used to read the instruction's bytes.
  * @param readerArg - A generic argument to be passed to the reader to store
@@ -1632,7 +1645,7 @@ int decodeInstruction(struct InternalInstruction* insn,
                       uint64_t startLoc,
                       DisassemblerMode mode) {
   memset(insn, 0, sizeof(struct InternalInstruction));
-    
+
   insn->reader = reader;
   insn->readerArg = readerArg;
   insn->dlog = logger;
@@ -1641,7 +1654,7 @@ int decodeInstruction(struct InternalInstruction* insn,
   insn->readerCursor = startLoc;
   insn->mode = mode;
   insn->numImmediatesConsumed = 0;
-  
+
   if (readPrefixes(insn)       ||
       readOpcode(insn)         ||
       getID(insn, miiArg)      ||
@@ -1650,14 +1663,14 @@ int decodeInstruction(struct InternalInstruction* insn,
     return -1;
 
   insn->operands = &x86OperandSets[insn->spec->operands][0];
-  
+
   insn->length = insn->readerCursor - insn->startLocation;
-  
+
   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
             startLoc, insn->readerCursor, insn->length);
-    
+
   if (insn->length > 15)
     dbgprintf(insn, "Instruction exceeds 15-byte limit");
-  
+
   return 0;
 }
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 9e68388cf2a1..d8f727887f23 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -20,6 +20,7 @@
 #include "X86MCTargetDesc.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCInstrInfo.h"
 
 namespace llvm {
 
@@ -41,7 +42,6 @@ namespace X86 {
     AddrNumOperands = 5
   };
 } // end namespace X86;
- 
 
 /// X86II - This namespace holds all of the target specific flags that
 /// instruction info tracks.
@@ -274,11 +274,12 @@ namespace X86II {
 
     //// MRM_XX - A mod/rm byte of exactly 0xXX.
     MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
-    MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
-    MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
-    MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50,
-    MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54,
-    MRM_DE = 55, MRM_DF = 56,
+    MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39, MRM_CB = 40,
+    MRM_E8 = 41, MRM_F0 = 42, MRM_F8 = 45, MRM_F9 = 46,
+    MRM_D0 = 47, MRM_D1 = 48, MRM_D4 = 49, MRM_D5 = 50,
+    MRM_D6 = 51, MRM_D8 = 52, MRM_D9 = 53, MRM_DA = 54,
+    MRM_DB = 55, MRM_DC = 56, MRM_DD = 57, MRM_DE = 58,
+    MRM_DF = 59,
 
     /// RawFrmImm8 - This is used for the ENTER instruction, which has two
     /// immediates, the first of which is a 16-bit immediate (specified by
@@ -521,6 +522,26 @@ namespace X86II {
     }
   }
 
+  /// getOperandBias - compute any additional adjustment needed to
+  ///                  the offset to the start of the memory operand
+  ///                  in this instruction.
+  /// If this is a two-address instruction,skip one of the register operands.
+  /// FIXME: This should be handled during MCInst lowering.
+  inline int getOperandBias(const MCInstrDesc& Desc)
+  {
+    unsigned NumOps = Desc.getNumOperands();
+    unsigned CurOp = 0;
+    if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
+      ++CurOp;
+    else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+      assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+      // Special case for GATHER with 2 TIED_TO operands
+      // Skip the first 2 operands: dst, mask_wb
+      CurOp += 2;
+    }
+    return CurOp;
+  }
+
   /// getMemoryOperandNo - The function returns the MCInst operand # for the
   /// first field of the memory operand.  If the instruction doesn't have a
   /// memory operand, this returns -1.
@@ -574,17 +595,15 @@ namespace X86II {
         ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
       return FirstMemOp;
     }
-    case X86II::MRM_C1: case X86II::MRM_C2:
-    case X86II::MRM_C3: case X86II::MRM_C4:
-    case X86II::MRM_C8: case X86II::MRM_C9:
-    case X86II::MRM_E8: case X86II::MRM_F0:
-    case X86II::MRM_F8: case X86II::MRM_F9:
-    case X86II::MRM_D0: case X86II::MRM_D1:
-    case X86II::MRM_D4: case X86II::MRM_D5:
-    case X86II::MRM_D8: case X86II::MRM_D9:
-    case X86II::MRM_DA: case X86II::MRM_DB:
-    case X86II::MRM_DC: case X86II::MRM_DD:
-    case X86II::MRM_DE: case X86II::MRM_DF:
+    case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+    case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+    case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_E8:
+    case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9:
+    case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
+    case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
+    case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
+    case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
+    case X86II::MRM_DF:
       return -1;
     }
   }
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 5fbefaec5edb..182bec1e8415 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -979,18 +979,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
     return;
 
-  // If this is a two-address instruction, skip one of the register operands.
-  // FIXME: This should be handled during MCInst lowering.
   unsigned NumOps = Desc.getNumOperands();
-  unsigned CurOp = 0;
-  if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
-    ++CurOp;
-  else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
-    assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
-    // Special case for GATHER with 2 TIED_TO operands
-    // Skip the first 2 operands: dst, mask_wb
-    CurOp += 2;
-  }
+  unsigned CurOp = X86II::getOperandBias(Desc);
 
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
@@ -1136,17 +1126,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                      TSFlags, CurByte, OS, Fixups);
     CurOp += X86::AddrNumOperands;
     break;
-  case X86II::MRM_C1: case X86II::MRM_C2:
-  case X86II::MRM_C3: case X86II::MRM_C4:
-  case X86II::MRM_C8: case X86II::MRM_C9:
-  case X86II::MRM_D0: case X86II::MRM_D1:
-  case X86II::MRM_D4: case X86II::MRM_D5:
-  case X86II::MRM_D8: case X86II::MRM_D9:
-  case X86II::MRM_DA: case X86II::MRM_DB:
-  case X86II::MRM_DC: case X86II::MRM_DD:
-  case X86II::MRM_DE: case X86II::MRM_DF:
-  case X86II::MRM_E8: case X86II::MRM_F0:
-  case X86II::MRM_F8: case X86II::MRM_F9:
+  case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+  case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+  case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_D0:
+  case X86II::MRM_D1: case X86II::MRM_D4: case X86II::MRM_D5:
+  case X86II::MRM_D6: case X86II::MRM_D8: case X86II::MRM_D9:
+  case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
+  case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
+  case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
+  case X86II::MRM_F9:
     EmitByte(BaseOpcode, CurByte, OS);
 
     unsigned char MRM;
@@ -1158,10 +1146,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     case X86II::MRM_C4: MRM = 0xC4; break;
     case X86II::MRM_C8: MRM = 0xC8; break;
     case X86II::MRM_C9: MRM = 0xC9; break;
+    case X86II::MRM_CA: MRM = 0xCA; break;
+    case X86II::MRM_CB: MRM = 0xCB; break;
     case X86II::MRM_D0: MRM = 0xD0; break;
     case X86II::MRM_D1: MRM = 0xD1; break;
     case X86II::MRM_D4: MRM = 0xD4; break;
     case X86II::MRM_D5: MRM = 0xD5; break;
+    case X86II::MRM_D6: MRM = 0xD6; break;
     case X86II::MRM_D8: MRM = 0xD8; break;
     case X86II::MRM_D9: MRM = 0xD9; break;
     case X86II::MRM_DA: MRM = 0xDA; break;
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index bc272efcc9ce..ed64a32eeff2 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -9,6 +9,8 @@
 
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -27,7 +29,9 @@ namespace {
     X86WinCOFFObjectWriter(bool Is64Bit_);
     ~X86WinCOFFObjectWriter();
 
-    virtual unsigned getRelocType(unsigned FixupKind) const;
+    virtual unsigned getRelocType(const MCValue &Target,
+                                  const MCFixup &Fixup,
+                                  bool IsCrossSection) const LLVM_OVERRIDE;
   };
 }
 
@@ -38,7 +42,14 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_)
 
 X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
 
-unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
+unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
+                                              const MCFixup &Fixup,
+                                              bool IsCrossSection) const {
+  unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind();
+
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
   switch (FixupKind) {
   case FK_PCRel_4:
   case X86::reloc_riprel_4byte:
@@ -46,6 +57,9 @@ unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
     return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32;
   case FK_Data_4:
   case X86::reloc_signed_4byte:
+    if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
+      return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32NB :
+                       COFF::IMAGE_REL_I386_DIR32NB;
     return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32;
   case FK_Data_8:
     if (Is64Bit)
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 0216252c19b3..306e3ac1afb5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -120,8 +120,14 @@ def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
                                       "Support BMI2 instructions">;
 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
                                       "Support RTM instructions">;
+def FeatureHLE     : SubtargetFeature<"hle", "HasHLE", "true",
+                                      "Support HLE">;
 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
                                       "Support ADX instructions">;
+def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
+                                      "Support PRFCHW instructions">;
+def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
+                                      "Support RDSEED instruction">;
 def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
                                      "Use LEA for adjusting the stack pointer">;
 def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
@@ -130,6 +136,9 @@ def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
                                      "PadShortFunctions", "true",
                                      "Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+                                     "CallRegIndirect", "true",
+                                     "Call register indirect">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -143,9 +152,6 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
 class Proc<string Name, list<SubtargetFeature> Features>
  : ProcessorModel<Name, GenericModel, Features>;
 
-class AtomProc<string Name, list<SubtargetFeature> Features>
- : ProcessorModel<Name, AtomModel, Features>;
-
 def : Proc<"generic",         []>;
 def : Proc<"i386",            []>;
 def : Proc<"i486",            []>;
@@ -162,46 +168,61 @@ def : Proc<"pentium4",        [FeatureSSE2]>;
 def : Proc<"pentium4m",       [FeatureSSE2, FeatureSlowBTMem]>;
 def : Proc<"x86-64",          [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
                                FeatureFastUAMem]>;
-def : Proc<"yonah",           [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"prescott",        [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona",          [FeatureSSE3, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : Proc<"core2",           [FeatureSSSE3, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : Proc<"penryn",          [FeatureSSE41, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : AtomProc<"atom",        [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
-                               FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
-                               FeatureSlowDivide, FeaturePadShortFunctions]>;
+// Intel Core Duo.
+def : ProcessorModel<"yonah", SandyBridgeModel,
+                     [FeatureSSE3, FeatureSlowBTMem]>;
+
+// NetBurst.
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona",   [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Intel Core 2 Solo/Duo.
+def : ProcessorModel<"core2", SandyBridgeModel,
+                     [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"penryn", SandyBridgeModel,
+                     [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Atom.
+def : ProcessorModel<"atom", AtomModel,
+                     [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
+                      FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
+                      FeatureSlowDivide,
+                      FeatureCallRegIndirect,
+                      FeaturePadShortFunctions]>;
+
 // "Arrandale" along with corei3 and corei5
-def : Proc<"corei7",          [FeatureSSE42, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES]>;
-def : Proc<"nehalem",         [FeatureSSE42,  FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT]>;
+def : ProcessorModel<"corei7", SandyBridgeModel,
+                     [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>;
+
+def : ProcessorModel<"nehalem", SandyBridgeModel,
+                     [FeatureSSE42,  FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT]>;
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere",        [FeatureSSE42, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"westmere", SandyBridgeModel,
+                     [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT, FeatureAES,
+                      FeaturePCLMUL]>;
 // Sandy Bridge
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
-def : Proc<"corei7-avx",      [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"corei7-avx", SandyBridgeModel,
+                     [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
 // Ivy Bridge
-def : Proc<"core-avx-i",      [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
-                               FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
+def : ProcessorModel<"core-avx-i", SandyBridgeModel,
+                     [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+                      FeatureF16C, FeatureFSGSBase]>;
 
 // Haswell
-def : Proc<"core-avx2",       [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
-                               FeatureRDRAND, FeatureF16C, FeatureFSGSBase,
-                               FeatureMOVBE, FeatureLZCNT, FeatureBMI,
-                               FeatureBMI2, FeatureFMA,
-                               FeatureRTM]>;
+def : ProcessorModel<"core-avx2", HaswellModel,
+                     [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+                      FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
+                      FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
+                      FeatureHLE]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [Feature3DNow]>;
@@ -279,6 +300,9 @@ def ATTAsmParser : AsmParser {
 def ATTAsmParserVariant : AsmParserVariant {
   int Variant = 0;
 
+  // Variant name.
+  string Name = "att";
+
   // Discard comments in assembly strings.
   string CommentDelimiter = "#";
 
@@ -289,6 +313,9 @@ def ATTAsmParserVariant : AsmParserVariant {
 def IntelAsmParserVariant : AsmParserVariant {
   int Variant = 1;
 
+  // Variant name.
+  string Name = "intel";
+
   // Discard comments in assembly strings.
   string CommentDelimiter = ";";
 
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index ac5daec2b25e..6b228b0b0329 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -201,7 +201,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
   case X86II::MO_TLVP_PIC_BASE:
     O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
     break;
-  case X86II::MO_SECREL:      O << "@SECREL";      break;
+  case X86II::MO_SECREL:    O << "@SECREL32";  break;
   }
 }
 
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index b516be069699..9eafbd55a5ae 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -387,8 +387,8 @@ def CC_X86_32_ThisCall : CallingConv<[
   // Promote i8/i16 arguments to i32.
   CCIfType<[i8, i16], CCPromoteToType<i32>>,
 
-  // Pass sret arguments indirectly through EAX
-  CCIfSRet<CCAssignToReg<[EAX]>>,
+  // Pass sret arguments indirectly through stack.
+  CCIfSRet<CCAssignToStack<4, 4>>,
 
   // The first integer argument is passed in ECX
   CCIfType<[i32], CCAssignToReg<[ECX]>>,
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 2518e02e2a40..8fea6edc8d78 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1451,6 +1451,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
     MCE.emitByte(BaseOpcode);
     MCE.emitByte(0xC9);
     break;
+  case X86II::MRM_CA:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xCA);
+    break;
+  case X86II::MRM_CB:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xCB);
+    break;
   case X86II::MRM_E8:
     MCE.emitByte(BaseOpcode);
     MCE.emitByte(0xE8);
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 85155f55e0ff..cf44bd033bf3 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -68,12 +68,12 @@ class X86FastISel : public FastISel {
 
   virtual bool TargetSelectInstruction(const Instruction *I);
 
-  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// \brief The specified machine instr operand is a vreg, and that
   /// vreg is being provided by the specified load instruction.  If possible,
   /// try to fold the load as an operand to the instruction, returning true if
   /// possible.
-  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                             const LoadInst *LI);
+  virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                   const LoadInst *LI);
 
   virtual bool FastLowerArguments();
 
@@ -107,6 +107,8 @@ class X86FastISel : public FastISel {
 
   bool X86SelectShift(const Instruction *I);
 
+  bool X86SelectDivRem(const Instruction *I);
+
   bool X86SelectSelect(const Instruction *I);
 
   bool X86SelectTrunc(const Instruction *I);
@@ -691,11 +693,6 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
   if (S->isAtomic())
     return false;
 
-  unsigned SABIAlignment =
-    TD.getABITypeAlignment(S->getValueOperand()->getType());
-  if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
-    return false;
-
   MVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
     return false;
@@ -816,14 +813,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
   // The x86-64 ABI for returning structs by value requires that we copy
   // the sret argument into %rax for the return. We saved the argument into
   // a virtual register in the entry block, so now we copy the value out
-  // and into %rax.
-  if (Subtarget->is64Bit() && F.hasStructRetAttr()) {
+  // and into %rax. We also do the same with %eax for Win32.
+  if (F.hasStructRetAttr() &&
+      (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
     unsigned Reg = X86MFInfo->getSRetReturnReg();
     assert(Reg &&
            "SRetReturnReg should have been set in LowerFormalArguments()!");
+    unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            X86::RAX).addReg(Reg);
-    RetRegs.push_back(X86::RAX);
+            RetReg).addReg(Reg);
+    RetRegs.push_back(RetReg);
   }
 
   // Now emit the RET.
@@ -1233,6 +1232,124 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
   return true;
 }
 
+bool X86FastISel::X86SelectDivRem(const Instruction *I) {
+  const static unsigned NumTypes = 4; // i8, i16, i32, i64
+  const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
+  const static bool S = true;  // IsSigned
+  const static bool U = false; // !IsSigned
+  const static unsigned Copy = TargetOpcode::COPY;
+  // For the X86 DIV/IDIV instruction, in most cases the dividend
+  // (numerator) must be in a specific register pair highreg:lowreg,
+  // producing the quotient in lowreg and the remainder in highreg.
+  // For most data types, to set up the instruction, the dividend is
+  // copied into lowreg, and lowreg is sign-extended or zero-extended
+  // into highreg.  The exception is i8, where the dividend is defined
+  // as a single register rather than a register pair, and we
+  // therefore directly sign-extend or zero-extend the dividend into
+  // lowreg, instead of copying, and ignore the highreg.
+  const static struct DivRemEntry {
+    // The following portion depends only on the data type.
+    const TargetRegisterClass *RC;
+    unsigned LowInReg;  // low part of the register pair
+    unsigned HighInReg; // high part of the register pair
+    // The following portion depends on both the data type and the operation.
+    struct DivRemResult {
+    unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
+    unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
+                              // highreg, or copying a zero into highreg.
+    unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
+                              // zero/sign-extending into lowreg for i8.
+    unsigned DivRemResultReg; // Register containing the desired result.
+    bool IsOpSigned;          // Whether to use signed or unsigned form.
+    } ResultTable[NumOps];
+  } OpTable[NumTypes] = {
+    { &X86::GR8RegClass,  X86::AX,  0, {
+        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
+        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
+        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
+        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
+      }
+    }, // i8
+    { &X86::GR16RegClass, X86::AX,  X86::DX, {
+        { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
+        { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
+        { X86::DIV16r,  X86::MOV16r0, Copy,            X86::AX,  U }, // UDiv
+        { X86::DIV16r,  X86::MOV16r0, Copy,            X86::DX,  U }, // URem
+      }
+    }, // i16
+    { &X86::GR32RegClass, X86::EAX, X86::EDX, {
+        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
+        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
+        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
+        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
+      }
+    }, // i32
+    { &X86::GR64RegClass, X86::RAX, X86::RDX, {
+        { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
+        { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
+        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RAX, U }, // UDiv
+        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RDX, U }, // URem
+      }
+    }, // i64
+  };
+
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  unsigned TypeIndex, OpIndex;
+  switch (VT.SimpleTy) {
+  default: return false;
+  case MVT::i8:  TypeIndex = 0; break;
+  case MVT::i16: TypeIndex = 1; break;
+  case MVT::i32: TypeIndex = 2; break;
+  case MVT::i64: TypeIndex = 3;
+    if (!Subtarget->is64Bit())
+      return false;
+    break;
+  }
+
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unexpected div/rem opcode");
+  case Instruction::SDiv: OpIndex = 0; break;
+  case Instruction::SRem: OpIndex = 1; break;
+  case Instruction::UDiv: OpIndex = 2; break;
+  case Instruction::URem: OpIndex = 3; break;
+  }
+
+  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
+  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
+  unsigned Op0Reg = getRegForValue(I->getOperand(0));
+  if (Op0Reg == 0)
+    return false;
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0)
+    return false;
+
+  // Move op0 into low-order input register.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
+  // Zero-extend or sign-extend into high-order input register.
+  if (OpEntry.OpSignExtend) {
+    if (OpEntry.IsOpSigned)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(OpEntry.OpSignExtend));
+    else
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
+  }
+  // Generate the DIV/IDIV instruction.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
+  // Copy output register into result register.
+  unsigned ResultReg = createResultReg(TypeEntry.RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
+  UpdateValueMap(I, ResultReg);
+
+  return true;
+}
+
 bool X86FastISel::X86SelectSelect(const Instruction *I) {
   MVT VT;
   if (!isTypeLegal(I->getType(), VT))
@@ -1526,7 +1643,7 @@ bool X86FastISel::FastLowerArguments() {
   if (!FuncInfo.CanLowerReturn)
     return false;
 
-  if (Subtarget->isTargetWindows())
+  if (Subtarget->isTargetWin64())
     return false;
 
   const Function *F = FuncInfo.Fn;
@@ -2082,6 +2199,11 @@ X86FastISel::TargetSelectInstruction(const Instruction *I)  {
   case Instruction::AShr:
   case Instruction::Shl:
     return X86SelectShift(I);
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    return X86SelectDivRem(I);
   case Instruction::Select:
     return X86SelectSelect(I);
   case Instruction::Trunc:
@@ -2273,12 +2395,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
 }
 
 
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
-/// vreg is being provided by the specified load instruction.  If possible,
-/// try to fold the load as an operand to the instruction, returning true if
-/// possible.
-bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                                const LoadInst *LI) {
+bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                      const LoadInst *LI) {
   X86AddressMode AM;
   if (!X86SelectAddress(LI->getOperand(0), AM))
     return false;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 54cbd40274a7..5914ce3dd1d5 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -528,11 +528,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
     if (!MI.getFlag(MachineInstr::FrameSetup)) break;
 
     // We don't exect any more prolog instructions.
-    if (ExpectEnd) return 0;
+    if (ExpectEnd) return CU::UNWIND_MODE_DWARF;
 
     if (Opc == PushInstr) {
       // If there are too many saved registers, we cannot use compact encoding.
-      if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
+      if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF;
 
       SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
       StackAdjust += OffsetSize;
@@ -542,7 +542,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
       unsigned DstReg = MI.getOperand(0).getReg();
 
       if (DstReg != FramePtr || SrcReg != StackPtr)
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       StackAdjust = 0;
       memset(SavedRegs, 0, sizeof(SavedRegs));
@@ -552,7 +552,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
                Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
       if (StackSize)
         // We already have a stack size.
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       if (!MI.getOperand(0).isReg() ||
           MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
@@ -560,7 +560,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
         // We need this to be a stack adjustment pointer. Something like:
         //
         //   %RSP<def> = SUB64ri8 %RSP, 48
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       StackSize = MI.getOperand(2).getImm() / StackDivide;
       SubtractInstrIdx += InstrOffset;
@@ -574,31 +574,31 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
   if (HasFP) {
     if ((StackAdjust & 0xFF) != StackAdjust)
       // Offset was too big for compact encoding.
-      return 0;
+      return CU::UNWIND_MODE_DWARF;
 
     // Get the encoding of the saved registers when we have a frame pointer.
     uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
-    if (RegEnc == ~0U) return 0;
+    if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
 
-    CompactUnwindEncoding |= 0x01000000;
+    CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
     CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
-    CompactUnwindEncoding |= RegEnc & 0x7FFF;
+    CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
   } else {
     ++StackAdjust;
     uint32_t TotalStackSize = StackAdjust + StackSize;
     if ((TotalStackSize & 0xFF) == TotalStackSize) {
       // Frameless stack with a small stack size.
-      CompactUnwindEncoding |= 0x02000000;
+      CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
 
       // Encode the stack size.
       CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
     } else {
       if ((StackAdjust & 0x7) != StackAdjust)
         // The extra stack adjustments are too big for us to handle.
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       // Frameless stack with an offset too large for us to encode compactly.
-      CompactUnwindEncoding |= 0x03000000;
+      CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
 
       // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
       // instruction.
@@ -616,10 +616,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
     uint32_t RegEnc =
       encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
                                                Is64Bit);
-    if (RegEnc == ~0U) return 0;
+    if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
 
     // Encode the register encoding.
-    CompactUnwindEncoding |= RegEnc & 0x3FF;
+    CompactUnwindEncoding |=
+      RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
   }
 
   return CompactUnwindEncoding;
@@ -1472,6 +1473,13 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   // prologue.
   StackSize = MFI->getStackSize();
 
+  // If the front-end requested a fixed stack segment size, use that.
+  const Function *Fn = MF.getFunction();
+  if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::FixedStackSegment)) {
+    StackSize = MF.getTarget().Options.FixedStackSegmentSize;
+  }
+
   // When the frame size is less than 256 we just compare the stack
   // boundary directly to the value of the stack pointer, per gcc.
   bool CompareStackPointer = StackSize < kSplitStackAvailable;
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 3f08b9a2e8d2..6e309d88327b 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -19,8 +19,35 @@
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
-  class MCSymbol;
-  class X86TargetMachine;
+
+namespace CU {
+
+  /// Compact unwind encoding values.
+  enum CompactUnwindEncodings {
+    /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
+    /// the return address, then [RE]SP is moved to [RE]BP.
+    UNWIND_MODE_BP_FRAME                   = 0x01000000,
+
+    /// A frameless function with a small constant stack size.
+    UNWIND_MODE_STACK_IMMD                 = 0x02000000,
+
+    /// A frameless function with a large constant stack size.
+    UNWIND_MODE_STACK_IND                  = 0x03000000,
+
+    /// No compact unwind encoding is available.
+    UNWIND_MODE_DWARF                      = 0x04000000,
+
+    /// Mask for encoding the frame registers.
+    UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
+
+    /// Mask for encoding the frameless registers.
+    UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
+  };
+
+} // end CU namespace
+
+class MCSymbol;
+class X86TargetMachine;
 
 class X86FrameLowering : public TargetFrameLowering {
   const X86TargetMachine &TM;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 00fbe6924cc2..968b3583c36d 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -444,7 +444,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 
     if (OptLevel != CodeGenOpt::None &&
-        (N->getOpcode() == X86ISD::CALL ||
+        // Only does this when target favors doesn't favor register indirect
+        // call.
+        ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
          (N->getOpcode() == X86ISD::TC_RETURN &&
           // Only does this if load can be folded into TC_RETURN.
           (Subtarget->is64Bit() ||
@@ -1501,8 +1503,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
   SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
-                                           MVT::i32, MVT::i32, MVT::Other, Ops,
-                                           array_lengthof(Ops));
+                                           MVT::i32, MVT::i32, MVT::Other, Ops);
   cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
   return ResNode;
 }
@@ -1718,7 +1719,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
       Op = ADD;
       break;
   }
-  
+
   Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
   bool isUnOp = !Val.getNode();
   bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
@@ -1770,12 +1771,10 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   if (isUnOp) {
     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
-    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
   } else {
     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
-    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
   }
   cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
   SDValue RetVals[] = { Undef, Ret };
@@ -1969,8 +1968,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
   SDValue Segment = CurDAG->getRegister(0, MVT::i32);
   const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
                           Disp, Segment, VMask, Chain};
-  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
-                                           VTs, Ops, array_lengthof(Ops));
+  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), VTs, Ops);
   // Node has 2 outputs: VDst and MVT::Other.
   // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
   // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
@@ -2184,7 +2182,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
     SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
     SDValue Ops[] = {N1, InFlag};
-    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
 
     ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
     ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
@@ -2265,16 +2263,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
                         InFlag };
       if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
         ResHi = SDValue(CNode, 0);
         ResLo = SDValue(CNode, 1);
         Chain = SDValue(CNode, 2);
         InFlag = SDValue(CNode, 3);
       } else {
         SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
         Chain = SDValue(CNode, 0);
         InFlag = SDValue(CNode, 1);
       }
@@ -2285,15 +2281,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Ops[] = { N1, InFlag };
       if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
         ResHi = SDValue(CNode, 0);
         ResLo = SDValue(CNode, 1);
         InFlag = SDValue(CNode, 2);
       } else {
         SDVTList VTs = CurDAG->getVTList(MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
         InFlag = SDValue(CNode, 0);
       }
     }
@@ -2341,6 +2335,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
     }
 
+    // Propagate ordering to the last node, for now.
+    CurDAG->AssignOrdering(InFlag.getNode(), CurDAG->GetOrdering(Node));
+
     return NULL;
   }
 
@@ -2407,8 +2404,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
         Move =
           SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
-                                         MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+                                         MVT::Other, Ops), 0);
         Chain = Move.getValue(1);
         ReplaceUses(N0.getValue(1), Chain);
       } else {
@@ -2439,8 +2435,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
                         InFlag };
       SDNode *CNode =
-        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
-                               array_lengthof(Ops));
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
       InFlag = SDValue(CNode, 1);
       // Update the chain.
       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
@@ -2672,8 +2667,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
     MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
                                                    Node->getDebugLoc(),
-                                                   MVT::i32, MVT::Other, Ops,
-                                                   array_lengthof(Ops));
+                                                   MVT::i32, MVT::Other, Ops);
     Result->setMemRefs(MemOp, MemOp + 2);
 
     ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 10cf13f2aa9f..30c24f49985e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -163,10 +163,28 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   Subtarget = &TM.getSubtarget<X86Subtarget>();
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
-
   RegInfo = TM.getRegisterInfo();
   TD = getDataLayout();
 
+  resetOperationActions();
+}
+
+void X86TargetLowering::resetOperationActions() {
+  const TargetMachine &TM = getTargetMachine();
+  static bool FirstTimeThrough = true;
+
+  // If none of the target options have changed, then we don't need to reset the
+  // operation actions.
+  if (!FirstTimeThrough && TO == TM.Options) return;
+
+  if (!FirstTimeThrough) {
+    // Reinitialize the actions.
+    initActions();
+    FirstTimeThrough = false;
+  }
+
+  TO = TM.Options;
+
   // Set up the TargetLowering object.
   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
 
@@ -470,7 +488,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
   }
   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
-  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
   // support continuation, user-level threading, and etc.. As a result, no
   // other SjLj exception interfaces are implemented and please don't build
@@ -1053,23 +1071,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
 
-    if (Subtarget->hasInt256()) {
-      setOperationAction(ISD::SRL,             MVT::v2i64, Legal);
-      setOperationAction(ISD::SRL,             MVT::v4i32, Legal);
-
-      setOperationAction(ISD::SHL,             MVT::v2i64, Legal);
-      setOperationAction(ISD::SHL,             MVT::v4i32, Legal);
+    // In the customized shift lowering, the legal cases in AVX2 will be
+    // recognized.
+    setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
+    setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
 
-      setOperationAction(ISD::SRA,             MVT::v4i32, Legal);
-    } else {
-      setOperationAction(ISD::SRL,             MVT::v2i64, Custom);
-      setOperationAction(ISD::SRL,             MVT::v4i32, Custom);
+    setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
+    setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
 
-      setOperationAction(ISD::SHL,             MVT::v2i64, Custom);
-      setOperationAction(ISD::SHL,             MVT::v4i32, Custom);
+    setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
 
-      setOperationAction(ISD::SRA,             MVT::v4i32, Custom);
-    }
     setOperationAction(ISD::SDIV,              MVT::v8i16, Custom);
     setOperationAction(ISD::SDIV,              MVT::v4i32, Custom);
   }
@@ -1118,6 +1129,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Custom);
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
 
@@ -1186,14 +1198,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
 
-      setOperationAction(ISD::SRL,             MVT::v4i64, Legal);
-      setOperationAction(ISD::SRL,             MVT::v8i32, Legal);
-
-      setOperationAction(ISD::SHL,             MVT::v4i64, Legal);
-      setOperationAction(ISD::SHL,             MVT::v8i32, Legal);
-
-      setOperationAction(ISD::SRA,             MVT::v8i32, Legal);
-
       setOperationAction(ISD::SDIV,            MVT::v8i32, Custom);
     } else {
       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
@@ -1210,15 +1214,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
       // Don't lower v32i8 because there is no 128-bit byte mul
+    }
 
-      setOperationAction(ISD::SRL,             MVT::v4i64, Custom);
-      setOperationAction(ISD::SRL,             MVT::v8i32, Custom);
+    // In the customized shift lowering, the legal cases in AVX2 will be
+    // recognized.
+    setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
+    setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
 
-      setOperationAction(ISD::SHL,             MVT::v4i64, Custom);
-      setOperationAction(ISD::SHL,             MVT::v8i32, Custom);
+    setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
+    setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
 
-      setOperationAction(ISD::SRA,             MVT::v8i32, Custom);
-    }
+    setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
 
     // Custom lower several nodes for 256-bit types.
     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
@@ -1356,7 +1362,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
   setPrefLoopAlignment(4); // 2^4 bytes.
-  BenefitFromCodePlacementOpt = true;
 
   // Predictable cmov don't hurt on atom because it's in-order.
   PredictableSelectIsExpensive = !Subtarget->isAtom();
@@ -1679,10 +1684,11 @@ X86TargetLowering::LowerReturn(SDValue Chain,
 
   // The x86-64 ABIs require that for returning structs by value we copy
   // the sret argument into %rax/%eax (depending on ABI) for the return.
+  // Win32 requires us to put the sret argument to %eax as well.
   // We saved the argument into a virtual register in the entry block,
   // so now we copy the value out and into %rax/%eax.
-  if (Subtarget->is64Bit() &&
-      DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
+      (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
     MachineFunction &MF = DAG.getMachineFunction();
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
@@ -1690,12 +1696,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
            "SRetReturnReg should have been set in LowerFormalArguments().");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
-    unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX;
+    unsigned RetValReg
+        = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
+          X86::RAX : X86::EAX;
     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
     Flag = Chain.getValue(1);
 
     // RAX/EAX now acts like a return value.
-    RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64));
+    RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
   }
 
   RetOps[0] = Chain;  // Update chain.
@@ -1795,7 +1803,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
       if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
       SDValue Ops[] = { Chain, InFlag };
       Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
-                                         MVT::Other, MVT::Glue, Ops, 2), 1);
+                                         MVT::Other, MVT::Glue, Ops), 1);
       Val = Chain.getValue(0);
 
       // Round the f80 to the right size, which also moves it to the appropriate
@@ -2049,9 +2057,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   // The x86-64 ABIs require that for returning structs by value we copy
   // the sret argument into %rax/%eax (depending on ABI) for the return.
+  // Win32 requires us to put the sret argument to %eax as well.
   // Save the argument into a virtual register so that we can access it
   // from the return points.
-  if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
+  if (MF.getFunction()->hasStructRetAttr() &&
+      (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
     if (!Reg) {
@@ -4412,13 +4422,15 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
     if (Subtarget->hasInt256()) { // AVX2
       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
+                        array_lengthof(Ops));
     } else {
       // 256-bit logic and arithmetic instructions in AVX are all
       // floating-point, no support for integer ops. Emit fp zeroed vectors.
       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
+                        array_lengthof(Ops));
     }
   } else
     llvm_unreachable("Unexpected vector type");
@@ -4439,7 +4451,8 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
   if (VT.is256BitVector()) {
     if (HasInt256) { // AVX2
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
+                        array_lengthof(Ops));
     } else { // AVX
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
@@ -5109,7 +5122,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
     SDValue ResNode =
-        DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64,
+        DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops,
+                                array_lengthof(Ops), MVT::i64,
                                 LDBase->getPointerInfo(),
                                 LDBase->getAlignment(),
                                 false/*isVolatile*/, true/*ReadMem*/,
@@ -7632,10 +7646,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
 
   if (InFlag) {
     SDValue Ops[] = { Chain,  TGA, *InFlag };
-    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
+    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
   } else {
     SDValue Ops[]  = { Chain, TGA };
-    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
+    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
   }
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@@ -7945,7 +7959,7 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
   }
 
   SDValue Ops[2] = { Lo, Hi };
-  return DAG.getMergeValues(Ops, 2, dl);
+  return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
 }
 
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -8228,8 +8242,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
   SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
-  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
-                                         MVT::i64, MMO);
+  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
+                                         array_lengthof(Ops), MVT::i64, MMO);
 
   APInt FF(32, 0x5F800000ULL);
 
@@ -8321,8 +8335,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     MachineMemOperand *MMO =
       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
                               MachineMemOperand::MOLoad, MemSize, MemSize);
-    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
-                                    DstTy, MMO);
+    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops,
+                                    array_lengthof(Ops), DstTy, MMO);
     Chain = Value.getValue(1);
     SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
     StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
@@ -8336,7 +8350,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     // Build the FP_TO_INT*_IN_MEM
     SDValue Ops[] = { Chain, Value, StackSlot };
     SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
-                                           Ops, 3, DstTy, MMO);
+                                           Ops, array_lengthof(Ops), DstTy,
+                                           MMO);
     return std::make_pair(FIST, StackSlot);
   } else {
     SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
@@ -8348,8 +8363,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
       MVT::i32, eax.getValue(2));
     SDValue Ops[] = { eax, edx };
     SDValue pair = IsReplace
-      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2)
-      : DAG.getMergeValues(Ops, 2, DL);
+      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops))
+      : DAG.getMergeValues(Ops, array_lengthof(Ops), DL);
     return std::make_pair(pair, SDValue());
   }
 }
@@ -9340,11 +9355,49 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
   if (Swap)
     std::swap(Op0, Op1);
 
+  // Since SSE has no unsigned integer comparisons, we need to flip  the sign
+  // bits of the inputs before performing those operations.
+  if (FlipSigns) {
+    EVT EltVT = VT.getVectorElementType();
+    SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+                                      EltVT);
+    std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+    SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+                                    SignBits.size());
+    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+  }
+
   // Check that the operation in question is available (most are plain SSE2,
   // but PCMPGTQ and PCMPEQQ have different requirements).
   if (VT == MVT::v2i64) {
-    if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
-      return SDValue();
+    if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
+      assert(Subtarget->hasSSE2() && "Don't know how to lower!");
+
+      // First cast everything to the right type,
+      Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+      Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+      // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
+      SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
+      SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
+
+      // Create masks for only the low parts/high parts of the 64 bit integers.
+      const int MaskHi[] = { 1, 1, 3, 3 };
+      const int MaskLo[] = { 0, 0, 2, 2 };
+      SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
+      SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
+      SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
+
+      SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
+      Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
+
+      if (Invert)
+        Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+      return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+    }
+
     if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
       // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
       // pcmpeqd + pshufd + pand.
@@ -9369,19 +9422,6 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
     }
   }
 
-  // Since SSE has no unsigned integer comparisons, we need to flip  the sign
-  // bits of the inputs before performing those operations.
-  if (FlipSigns) {
-    EVT EltVT = VT.getVectorElementType();
-    SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
-                                      EltVT);
-    std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
-    SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
-                                    SignBits.size());
-    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
-    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
-  }
-
   SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
 
   // If the logical-not of the result is required, perform that now.
@@ -10922,28 +10962,47 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
 
-  // RDRAND intrinsics.
+  // RDRAND/RDSEED intrinsics.
   case Intrinsic::x86_rdrand_16:
   case Intrinsic::x86_rdrand_32:
-  case Intrinsic::x86_rdrand_64: {
+  case Intrinsic::x86_rdrand_64:
+  case Intrinsic::x86_rdseed_16:
+  case Intrinsic::x86_rdseed_32:
+  case Intrinsic::x86_rdseed_64: {
+    unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 ||
+                       IntNo == Intrinsic::x86_rdseed_32 ||
+                       IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED :
+                                                            X86ISD::RDRAND;
     // Emit the node with the right value type.
     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
-    SDValue Result = DAG.getNode(X86ISD::RDRAND, dl, VTs, Op.getOperand(0));
+    SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0));
 
-    // If the value returned by RDRAND was valid (CF=1), return 1. Otherwise
-    // return the value from Rand, which is always 0, casted to i32.
+    // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
+    // Otherwise return the value from Rand, which is always 0, casted to i32.
     SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
                       DAG.getConstant(1, Op->getValueType(1)),
                       DAG.getConstant(X86::COND_B, MVT::i32),
                       SDValue(Result.getNode(), 1) };
     SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
                                   DAG.getVTList(Op->getValueType(1), MVT::Glue),
-                                  Ops, 4);
+                                  Ops, array_lengthof(Ops));
 
     // Return { result, isValid, chain }.
     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
                        SDValue(Result.getNode(), 2));
   }
+
+  // XTEST intrinsics.
+  case Intrinsic::x86_xtest: {
+    SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
+    SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                DAG.getConstant(X86::COND_NE, MVT::i8),
+                                InTrans);
+    SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
+    return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
+                       Ret, SDValue(InTrans.getNode(), 1));
+  }
   }
 }
 
@@ -11224,7 +11283,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
   SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
                                           DAG.getVTList(MVT::Other),
-                                          Ops, 2, MVT::i16, MMO);
+                                          Ops, array_lengthof(Ops), MVT::i16,
+                                          MMO);
 
   // Load FP Control Word from stack slot
   SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
@@ -11491,16 +11551,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
-
+static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
+                                         const X86Subtarget *Subtarget) {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
 
-  if (!Subtarget->hasSSE2())
-    return SDValue();
-
   // Optimize shl/srl/sra with constant shift amount.
   if (isSplatVector(Amt.getNode())) {
     SDValue SclrAmt = Amt->getOperand(0);
@@ -11611,6 +11668,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+  if (!Subtarget->is64Bit() &&
+      (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+      Amt.getOpcode() == ISD::BITCAST &&
+      Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+    Amt = Amt.getOperand(0);
+    unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+                     VT.getVectorNumElements();
+    unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
+    uint64_t ShiftAmt = 0;
+    for (unsigned i = 0; i != Ratio; ++i) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+      if (C == 0)
+        return SDValue();
+      // 6 == Log2(64)
+      ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
+    }
+    // Check remaining shift amounts.
+    for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+      uint64_t ShAmt = 0;
+      for (unsigned j = 0; j != Ratio; ++j) {
+        ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+        if (C == 0)
+          return SDValue();
+        // 6 == Log2(64)
+        ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+      }
+      if (ShAmt != ShiftAmt)
+        return SDValue();
+    }
+    switch (Op.getOpcode()) {
+    default:
+      llvm_unreachable("Unknown shift opcode!");
+    case ISD::SHL:
+      return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+                         DAG.getConstant(ShiftAmt, MVT::i32));
+    case ISD::SRL:
+      return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+                         DAG.getConstant(ShiftAmt, MVT::i32));
+    case ISD::SRA:
+      return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+                         DAG.getConstant(ShiftAmt, MVT::i32));
+    }
+  }
+
+  return SDValue();
+}
+
+static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
+                                        const X86Subtarget* Subtarget) {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue R = Op.getOperand(0);
+  SDValue Amt = Op.getOperand(1);
+
+  if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
+      VT == MVT::v4i32 || VT == MVT::v8i16 ||
+      (Subtarget->hasInt256() &&
+       ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
+        VT == MVT::v8i32 || VT == MVT::v16i16))) {
+    SDValue BaseShAmt;
+    EVT EltVT = VT.getVectorElementType();
+
+    if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElts = VT.getVectorNumElements();
+      unsigned i, j;
+      for (i = 0; i != NumElts; ++i) {
+        if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
+          continue;
+        break;
+      }
+      for (j = i; j != NumElts; ++j) {
+        SDValue Arg = Amt.getOperand(j);
+        if (Arg.getOpcode() == ISD::UNDEF) continue;
+        if (Arg != Amt.getOperand(i))
+          break;
+      }
+      if (i != NumElts && j == NumElts)
+        BaseShAmt = Amt.getOperand(i);
+    } else {
+      if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+        Amt = Amt.getOperand(0);
+      if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
+               cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
+        SDValue InVec = Amt.getOperand(0);
+        if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+          unsigned NumElts = InVec.getValueType().getVectorNumElements();
+          unsigned i = 0;
+          for (; i != NumElts; ++i) {
+            SDValue Arg = InVec.getOperand(i);
+            if (Arg.getOpcode() == ISD::UNDEF) continue;
+            BaseShAmt = Arg;
+            break;
+          }
+        } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+           if (ConstantSDNode *C =
+               dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+             unsigned SplatIdx =
+               cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
+             if (C->getZExtValue() == SplatIdx)
+               BaseShAmt = InVec.getOperand(1);
+           }
+        }
+        if (BaseShAmt.getNode() == 0)
+          BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
+                                  DAG.getIntPtrConstant(0));
+      }
+    }
+
+    if (BaseShAmt.getNode()) {
+      if (EltVT.bitsGT(MVT::i32))
+        BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
+      else if (EltVT.bitsLT(MVT::i32))
+        BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+      switch (Op.getOpcode()) {
+      default:
+        llvm_unreachable("Unknown shift opcode!");
+      case ISD::SHL:
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: return SDValue();
+        case MVT::v2i64:
+        case MVT::v4i32:
+        case MVT::v8i16:
+        case MVT::v4i64:
+        case MVT::v8i32:
+        case MVT::v16i16:
+          return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
+        }
+      case ISD::SRA:
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: return SDValue();
+        case MVT::v4i32:
+        case MVT::v8i16:
+        case MVT::v8i32:
+        case MVT::v16i16:
+          return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
+        }
+      case ISD::SRL:
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: return SDValue();
+        case MVT::v2i64:
+        case MVT::v4i32:
+        case MVT::v8i16:
+        case MVT::v4i64:
+        case MVT::v8i32:
+        case MVT::v16i16:
+          return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
+        }
+      }
+    }
+  }
+
+  // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+  if (!Subtarget->is64Bit() &&
+      (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+      Amt.getOpcode() == ISD::BITCAST &&
+      Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+    Amt = Amt.getOperand(0);
+    unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+                     VT.getVectorNumElements();
+    std::vector<SDValue> Vals(Ratio);
+    for (unsigned i = 0; i != Ratio; ++i)
+      Vals[i] = Amt.getOperand(i);
+    for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+      for (unsigned j = 0; j != Ratio; ++j)
+        if (Vals[j] != Amt.getOperand(i + j))
+          return SDValue();
+    }
+    switch (Op.getOpcode()) {
+    default:
+      llvm_unreachable("Unknown shift opcode!");
+    case ISD::SHL:
+      return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
+    case ISD::SRL:
+      return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
+    case ISD::SRA:
+      return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue R = Op.getOperand(0);
+  SDValue Amt = Op.getOperand(1);
+  SDValue V;
+
+  if (!Subtarget->hasSSE2())
+    return SDValue();
+
+  V = LowerScalarImmediateShift(Op, DAG, Subtarget);
+  if (V.getNode())
+    return V;
+
+  V = LowerScalarVariableShift(Op, DAG, Subtarget);
+  if (V.getNode())
+      return V;
+
+  // AVX2 has VPSLLV/VPSRAV/VPSRLV.
+  if (Subtarget->hasInt256()) {
+    if (Op.getOpcode() == ISD::SRL &&
+        (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+         VT == MVT::v4i64 || VT == MVT::v8i32))
+      return Op;
+    if (Op.getOpcode() == ISD::SHL &&
+        (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+         VT == MVT::v4i64 || VT == MVT::v8i32))
+      return Op;
+    if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
+      return Op;
+  }
+
   // Lower SHL with variable shift amount.
   if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
     Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
@@ -11827,8 +12102,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
       // fall through
     case MVT::v4i32:
     case MVT::v8i16: {
-      SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
-                                         Op.getOperand(0), ShAmt, DAG);
+      // (sext (vzext x)) -> (vsext x)
+      SDValue Op0 = Op.getOperand(0);
+      SDValue Op00 = Op0.getOperand(0);
+      SDValue Tmp1;
+      // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+      if (Op0.getOpcode() == ISD::BITCAST &&
+          Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+        Tmp1 = LowerVectorIntExtend(Op00, DAG);
+      if (Tmp1.getNode()) {
+        SDValue Tmp1Op0 = Tmp1.getOperand(0);
+        assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+               "This optimization is invalid without a VZEXT.");
+        return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+      }
+
+      // If the above didn't work, then just use Shift-Left + Shift-Right.
+      Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
       return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
     }
   }
@@ -11852,9 +12142,7 @@ static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
       Zero,
       Chain
     };
-    SDNode *Res =
-      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
-                          array_lengthof(Ops));
+    SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
     return SDValue(Res, 0);
   }
 
@@ -11908,9 +12196,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
       Zero,
       Chain
     };
-    SDNode *Res =
-      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
-                         array_lengthof(Ops));
+    SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
     return SDValue(Res, 0);
   }
 
@@ -11944,7 +12230,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
   SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
-                                           Ops, 5, T, MMO);
+                                           Ops, array_lengthof(Ops), T, MMO);
   SDValue cpOut =
     DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
   return cpOut;
@@ -11966,7 +12252,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
     DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
     rdx.getValue(1)
   };
-  return DAG.getMergeValues(Ops, 2, dl);
+  return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
 }
 
 SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
@@ -12060,7 +12346,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
 
   // For MacOSX, we want to call an alternative entry point: __sincos_stret,
-  // which returns the values in two XMM registers.
+  // which returns the values as { float, float } (in XMM0) or
+  // { double, double } (which is returned in XMM0, XMM1).
   DebugLoc dl = Op.getDebugLoc();
   SDValue Arg = Op.getOperand(0);
   EVT ArgVT = Arg.getValueType();
@@ -12075,14 +12362,16 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   Entry.isZExt = false;
   Args.push_back(Entry);
 
+  bool isF64 = ArgVT == MVT::f64;
   // Only optimize x86_64 for now. i386 is a bit messy. For f32,
   // the small struct {f32, f32} is returned in (eax, edx). For f64,
   // the results are returned via SRet in memory.
-  const char *LibcallName = (ArgVT == MVT::f64)
-    ? "__sincos_stret" : "__sincosf_stret";
+  const char *LibcallName =  isF64 ? "__sincos_stret" : "__sincosf_stret";
   SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
 
-  StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+  Type *RetTy = isF64
+    ? (Type*)StructType::get(ArgTy, ArgTy, NULL)
+    : (Type*)VectorType::get(ArgTy, 4);
   TargetLowering::
     CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
                          false, false, false, false, 0,
@@ -12090,7 +12379,18 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
                          /*doesNotRet=*/false, /*isReturnValueUsed*/true,
                          Callee, Args, DAG, dl);
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
-  return CallResult.first;
+
+  if (isF64)
+    // Returned in xmm0 and xmm1.
+    return CallResult.first;
+
+  // Returned in bits 0:31 and 32:64 xmm0.
+  SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
+                               CallResult.first, DAG.getIntPtrConstant(0));
+  SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
+                               CallResult.first, DAG.getIntPtrConstant(1));
+  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
 }
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
@@ -12216,7 +12516,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
   SDValue Ops[] = { Chain, In1, In2L, In2H };
   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
   SDValue Result =
-    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64,
                             cast<MemSDNode>(Node)->getMemOperand());
   SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
@@ -12296,7 +12596,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                      eax.getValue(2));
     // Use a buildpair to merge the two 32-bit values into a 64-bit one.
     SDValue Ops[] = { eax, edx };
-    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops,
+                                  array_lengthof(Ops)));
     Results.push_back(edx.getValue(1));
     return;
   }
@@ -12335,7 +12636,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
                                   X86ISD::LCMPXCHG8_DAG;
     SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
-                                             Ops, 3, T, MMO);
+                                             Ops, array_lengthof(Ops), T, MMO);
     SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
                                         Regs64bit ? X86::RAX : X86::EAX,
                                         HalfT, Result.getValue(1));
@@ -12547,6 +12848,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::WIN_FTOL:           return "X86ISD::WIN_FTOL";
   case X86ISD::SAHF:               return "X86ISD::SAHF";
   case X86ISD::RDRAND:             return "X86ISD::RDRAND";
+  case X86ISD::RDSEED:             return "X86ISD::RDSEED";
   case X86ISD::FMADD:              return "X86ISD::FMADD";
   case X86ISD::FMSUB:              return "X86ISD::FMSUB";
   case X86ISD::FNMADD:             return "X86ISD::FNMADD";
@@ -12555,6 +12857,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FMSUBADD:           return "X86ISD::FMSUBADD";
   case X86ISD::PCMPESTRI:          return "X86ISD::PCMPESTRI";
   case X86ISD::PCMPISTRI:          return "X86ISD::PCMPISTRI";
+  case X86ISD::XTEST:              return "X86ISD::XTEST";
   }
 }
 
@@ -14820,7 +15123,8 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
         SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
         SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
         SDValue ResNode =
-          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                  array_lengthof(Ops),
                                   Ld->getMemoryVT(),
                                   Ld->getPointerInfo(),
                                   Ld->getAlignment(),
@@ -15512,6 +15816,51 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
       return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
 
+  // Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
+  if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
+      Cond.getOpcode() == ISD::SETCC) {
+
+    assert(Cond.getValueType().isVector() &&
+           "vector select expects a vector selector!");
+
+    EVT IntVT = Cond.getValueType();
+    bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
+    bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+    if (!TValIsAllOnes && !FValIsAllZeros) {
+      // Try invert the condition if true value is not all 1s and false value
+      // is not all 0s.
+      bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+      bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
+
+      if (TValIsAllZeros || FValIsAllOnes) {
+        SDValue CC = Cond.getOperand(2);
+        ISD::CondCode NewCC =
+          ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                               Cond.getOperand(0).getValueType().isInteger());
+        Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
+        std::swap(LHS, RHS);
+        TValIsAllOnes = FValIsAllOnes;
+        FValIsAllZeros = TValIsAllZeros;
+      }
+    }
+
+    if (TValIsAllOnes || FValIsAllZeros) {
+      SDValue Ret;
+
+      if (TValIsAllOnes && FValIsAllZeros)
+        Ret = Cond;
+      else if (TValIsAllOnes)
+        Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond,
+                          DAG.getNode(ISD::BITCAST, DL, IntVT, RHS));
+      else if (FValIsAllZeros)
+        Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond,
+                          DAG.getNode(ISD::BITCAST, DL, IntVT, LHS));
+
+      return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
+    }
+  }
+
   // If we know that this node is legal then we know that it is going to be
   // matched by one of the SSE/AVX BLEND instructions. These instructions only
   // depend on the highest bit in each word. Try to use SimplifyDemandedBits
@@ -15572,6 +15921,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
   SDValue SetCC;
   const ConstantSDNode* C = 0;
   bool needOppositeCond = (CC == X86::COND_E);
+  bool checkAgainstTrue = false; // Is it a comparison against 1?
 
   if ((C = dyn_cast<ConstantSDNode>(Op1)))
     SetCC = Op2;
@@ -15580,17 +15930,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
   else // Quit if all operands are not constants.
     return SDValue();
 
-  if (C->getZExtValue() == 1)
+  if (C->getZExtValue() == 1) {
     needOppositeCond = !needOppositeCond;
-  else if (C->getZExtValue() != 0)
+    checkAgainstTrue = true;
+  } else if (C->getZExtValue() != 0)
     // Quit if the constant is neither 0 or 1.
     return SDValue();
 
-  // Skip 'zext' node.
-  if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
-    SetCC = SetCC.getOperand(0);
+  bool truncatedToBoolWithAnd = false;
+  // Skip (zext $x), (trunc $x), or (and $x, 1) node.
+  while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+         SetCC.getOpcode() == ISD::TRUNCATE ||
+         SetCC.getOpcode() == ISD::AND) {
+    if (SetCC.getOpcode() == ISD::AND) {
+      int OpIdx = -1;
+      ConstantSDNode *CS;
+      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
+          CS->getZExtValue() == 1)
+        OpIdx = 1;
+      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
+          CS->getZExtValue() == 1)
+        OpIdx = 0;
+      if (OpIdx == -1)
+        break;
+      SetCC = SetCC.getOperand(OpIdx);
+      truncatedToBoolWithAnd = true;
+    } else
+      SetCC = SetCC.getOperand(0);
+  }
 
   switch (SetCC.getOpcode()) {
+  case X86ISD::SETCC_CARRY:
+    // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
+    // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
+    // i.e. it's a comparison against true but the result of SETCC_CARRY is not
+    // truncated to i1 using 'and'.
+    if (checkAgainstTrue && !truncatedToBoolWithAnd)
+      break;
+    assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
+           "Invalid use of SETCC_CARRY!");
+    // FALL THROUGH
   case X86ISD::SETCC:
     // Set the condition code or opposite one if necessary.
     CC = X86::CondCode(SetCC.getConstantOperandVal(0));
@@ -15606,9 +15985,15 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
       return SDValue();
     // Quit if false value is not a constant.
     if (!FVal) {
-      // A special case for rdrand, where 0 is set if false cond is found.
       SDValue Op = SetCC.getOperand(0);
-      if (Op.getOpcode() != X86ISD::RDRAND)
+      // Skip 'zext' or 'trunc' node.
+      if (Op.getOpcode() == ISD::ZERO_EXTEND ||
+          Op.getOpcode() == ISD::TRUNCATE)
+        Op = Op.getOperand(0);
+      // A special case for rdrand/rdseed, where 0 is set if false cond is
+      // found.
+      if ((Op.getOpcode() != X86ISD::RDRAND &&
+           Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
         return SDValue();
     }
     // Quit if false value is not the constant 0 or 1.
@@ -15920,124 +16305,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget *Subtarget) {
-  EVT VT = N->getValueType(0);
   if (N->getOpcode() == ISD::SHL) {
     SDValue V = PerformSHLCombine(N, DAG);
     if (V.getNode()) return V;
   }
 
-  // On X86 with SSE2 support, we can transform this to a vector shift if
-  // all elements are shifted by the same amount.  We can't do this in legalize
-  // because the a constant vector is typically transformed to a constant pool
-  // so we have no knowledge of the shift amount.
-  if (!Subtarget->hasSSE2())
-    return SDValue();
-
-  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
-      (!Subtarget->hasInt256() ||
-       (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
-    return SDValue();
-
-  SDValue ShAmtOp = N->getOperand(1);
-  EVT EltVT = VT.getVectorElementType();
-  DebugLoc DL = N->getDebugLoc();
-  SDValue BaseShAmt = SDValue();
-  if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
-    unsigned NumElts = VT.getVectorNumElements();
-    unsigned i = 0;
-    for (; i != NumElts; ++i) {
-      SDValue Arg = ShAmtOp.getOperand(i);
-      if (Arg.getOpcode() == ISD::UNDEF) continue;
-      BaseShAmt = Arg;
-      break;
-    }
-    // Handle the case where the build_vector is all undef
-    // FIXME: Should DAG allow this?
-    if (i == NumElts)
-      return SDValue();
-
-    for (; i != NumElts; ++i) {
-      SDValue Arg = ShAmtOp.getOperand(i);
-      if (Arg.getOpcode() == ISD::UNDEF) continue;
-      if (Arg != BaseShAmt) {
-        return SDValue();
-      }
-    }
-  } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
-             cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
-    SDValue InVec = ShAmtOp.getOperand(0);
-    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
-      unsigned NumElts = InVec.getValueType().getVectorNumElements();
-      unsigned i = 0;
-      for (; i != NumElts; ++i) {
-        SDValue Arg = InVec.getOperand(i);
-        if (Arg.getOpcode() == ISD::UNDEF) continue;
-        BaseShAmt = Arg;
-        break;
-      }
-    } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
-       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
-         unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
-         if (C->getZExtValue() == SplatIdx)
-           BaseShAmt = InVec.getOperand(1);
-       }
-    }
-    if (BaseShAmt.getNode() == 0) {
-      // Don't create instructions with illegal types after legalize
-      // types has run.
-      if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
-          !DCI.isBeforeLegalize())
-        return SDValue();
-
-      BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
-                              DAG.getIntPtrConstant(0));
-    }
-  } else
-    return SDValue();
-
-  // The shift amount is an i32.
-  if (EltVT.bitsGT(MVT::i32))
-    BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
-  else if (EltVT.bitsLT(MVT::i32))
-    BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
-
-  // The shift amount is identical so we can do a vector shift.
-  SDValue  ValOp = N->getOperand(0);
-  switch (N->getOpcode()) {
-  default:
-    llvm_unreachable("Unknown shift opcode!");
-  case ISD::SHL:
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: return SDValue();
-    case MVT::v2i64:
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v4i64:
-    case MVT::v8i32:
-    case MVT::v16i16:
-      return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
-    }
-  case ISD::SRA:
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: return SDValue();
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v8i32:
-    case MVT::v16i16:
-      return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
-    }
-  case ISD::SRL:
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: return SDValue();
-    case MVT::v2i64:
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v4i64:
-    case MVT::v8i32:
-    case MVT::v16i16:
-      return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
-    }
-  }
+  return SDValue();
 }
 
 // CMPEQCombine - Recognize the distinctive  (AND (setcc ...) (setcc ..))
@@ -16348,13 +16621,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
       // Validate that the Mask operand is a vector sra node.
       // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
       // there is no psrai.b
-      if (Mask.getOpcode() != X86ISD::VSRAI)
-        return SDValue();
-
-      // Check that the SRA is all signbits.
-      SDValue SraC = Mask.getOperand(1);
-      unsigned SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
       unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+      unsigned SraAmt = ~0;
+      if (Mask.getOpcode() == ISD::SRA) {
+        SDValue Amt = Mask.getOperand(1);
+        if (isSplatVector(Amt.getNode())) {
+          SDValue SclrAmt = Amt->getOperand(0);
+          if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
+            SraAmt = C->getZExtValue();
+        }
+      } else if (Mask.getOpcode() == X86ISD::VSRAI) {
+        SDValue SraC = Mask.getOperand(1);
+        SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
+      }
       if ((SraAmt + 1) != EltBits)
         return SDValue();
 
@@ -16528,11 +16807,10 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned RegSz = RegVT.getSizeInBits();
 
+  // On Sandybridge unaligned 256bit loads are inefficient.
   ISD::LoadExtType Ext = Ld->getExtensionType();
   unsigned Alignment = Ld->getAlignment();
-  bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8;
-
-  // On Sandybridge unaligned 256bit loads are inefficient.
+  bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
   if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
       !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
     unsigned NumElems = RegVT.getVectorNumElements();
@@ -16552,7 +16830,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
     SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
                                 Ld->getPointerInfo(), Ld->isVolatile(),
                                 Ld->isNonTemporal(), Ld->isInvariant(),
-                                std::max(Alignment/2U, 1U));
+                                std::min(16U, Alignment));
     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                              Load1.getValue(1),
                              Load2.getValue(1));
@@ -16723,13 +17001,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   DebugLoc dl = St->getDebugLoc();
   SDValue StoredVal = St->getOperand(1);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  unsigned Alignment = St->getAlignment();
-  bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8;
 
   // If we are saving a concatenation of two XMM registers, perform two stores.
   // On Sandy Bridge, 256-bit memory operations are executed by two
   // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
   // memory  operation.
+  unsigned Alignment = St->getAlignment();
+  bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
   if (VT.is256BitVector() && !Subtarget->hasInt256() &&
       StVT == VT && !IsAligned) {
     unsigned NumElems = VT.getVectorNumElements();
@@ -16749,7 +17027,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
                                 St->getPointerInfo(), St->isVolatile(),
                                 St->isNonTemporal(),
-                                std::max(Alignment/2U, 1U));
+                                std::min(16U, Alignment));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
   }
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index da1dad0f4045..2727e220d360 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -356,10 +356,17 @@ namespace llvm {
       // RDRAND - Get a random integer and indicate whether it is valid in CF.
       RDRAND,
 
+      // RDSEED - Get a NIST SP800-90B & C compliant random integer and
+      // indicate whether it is valid in CF.
+      RDSEED,
+
       // PCMP*STRI
       PCMPISTRI,
       PCMPESTRI,
 
+      // XTEST - Test if in transactional execution.
+      XTEST,
+
       // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
       // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
       // Atomic 64-bit binary operations.
@@ -716,6 +723,9 @@ namespace llvm {
     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
                       SelectionDAG &DAG) const;
 
+    /// \brief Reset the operation actions based on target options.
+    virtual void resetOperationActions();
+
   protected:
     std::pair<const TargetRegisterClass*, uint8_t>
     findRepresentativeClass(MVT VT) const;
@@ -727,6 +737,10 @@ namespace llvm {
     const X86RegisterInfo *RegInfo;
     const DataLayout *TD;
 
+    /// Used to store the TargetOptions so that we don't waste time resetting
+    /// the operation actions unless we have to.
+    TargetOptions TO;
+
     /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
     /// floating point ops.
     /// When SSE is available, use it for f32 operations.
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index bb362f5c7bb1..ba1aede3c1a0 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -84,13 +84,16 @@ defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd">;
 defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
 
 
-def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
+                   [(int_x86_mmx_femms)]>;
 
-def PREFETCH  : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
-                       "prefetch\t$addr", []>;
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
+                      "prefetch\t$addr",
+                      [(prefetch addr:$addr, (i32 0), imm, (i32 1))]>;
 
-def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
-                       "prefetchw\t$addr", []>;
+def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
+                  [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))]>, TB,
+                Requires<[HasPrefetchW]>;
 
 // "3DNowA" instructions
 defm PF2IW    : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index d86a4065a7a3..225e9720da0c 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -14,7 +14,7 @@
 
 //===----------------------------------------------------------------------===//
 // LEA - Load Effective Address
-
+let SchedRW = [WriteLEA] in {
 let neverHasSideEffects = 1 in
 def LEA16r   : I<0x8D, MRMSrcMem,
                  (outs GR16:$dst), (ins i32mem:$src),
@@ -36,41 +36,52 @@ let isReMaterializable = 1 in
 def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
                   "lea{q}\t{$src|$dst}, {$dst|$src}",
                   [(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
-
-
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 //  Fixed-Register Multiplication and Division Instructions.
 //
 
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+  // Memory operand.
+  ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+  // Register reads (implicit or explicit).
+  ReadAfterLd, ReadAfterLd]>;
+
 // Extra precision multiplication
 
 // AL is really implied by AX, but the registers in Defs must match the
 // SDNode results (i8, i32).
+// AL,AH = AL*GR8
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, GR8:$src)),
-                (implicit EFLAGS)], IIC_MUL8>;     // AL,AH = AL*GR8
-
+                (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
 def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
                "mul{w}\t$src",
-               [], IIC_MUL16_REG>, OpSize;    // AX,DX = AX*GR16
-
+               [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
 def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
-               "mul{l}\t$src",   // EAX,EDX = EAX*GR32
+               "mul{l}\t$src",
                [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
-               IIC_MUL32_REG>;
+               IIC_MUL32_REG>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
 def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
-                "mul{q}\t$src",          // RAX,RDX = RAX*GR64
+                "mul{q}\t$src",
                 [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
-                IIC_MUL64>;
-
+                IIC_MUL64>, Sched<[WriteIMul]>;
+// AL,AH = AL*[mem8]
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
                "mul{b}\t$src",
@@ -78,51 +89,60 @@ def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, (loadi8 addr:$src))),
-                (implicit EFLAGS)], IIC_MUL8>;   // AL,AH = AL*[mem8]
-
+                (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
 let mayLoad = 1, neverHasSideEffects = 1 in {
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
                "mul{w}\t$src",
-               [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
-
+               [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
               "mul{l}\t$src",
-              [], IIC_MUL32_MEM>;          // EAX,EDX = EAX*[mem32]
+              [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
-                "mul{q}\t$src", [], IIC_MUL64>;   // RAX,RDX = RAX*[mem64]
+                "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
 }
 
 let neverHasSideEffects = 1 in {
+// AL,AH = AL*GR8
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", [],
-              IIC_IMUL8>; // AL,AH = AL*GR8
+              IIC_IMUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", [],
-              IIC_IMUL16_RR>, OpSize;    // AX,DX = AX*GR16
+              IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", [],
-              IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+              IIC_IMUL32_RR>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
-              IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
+              IIC_IMUL64_RR>, Sched<[WriteIMul]>;
 
 let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
-                "imul{b}\t$src", [], IIC_IMUL8>;    // AL,AH = AL*[mem8]
+                "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
-                "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
-                // AX,DX = AX*[mem16]
+                "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize,
+              SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
-                "imul{l}\t$src", [], IIC_IMUL32_MEM>;  // EAX,EDX = EAX*[mem32]
+                "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
-                 "imul{q}\t$src", [], IIC_IMUL64>;  // RAX,RDX = RAX*[mem64]
+                 "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
 }
 } // neverHasSideEffects
 
@@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
 let Defs = [EFLAGS] in {
 let Constraints = "$src1 = $dst" in {
 
-let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
+let isCommutable = 1, SchedRW = [WriteIMul] in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
 // Register-Register Signed Integer Multiply
 def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
                   [(set GR64:$dst, EFLAGS,
                         (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
                  TB;
-}
+} // isCommutable, SchedRW
 
 // Register-Memory Signed Integer Multiply
+let SchedRW = [WriteIMulLd, ReadAfterLd] in {
 def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
                                   (ins GR16:$src1, i16mem:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
                         (X86smul_flag GR64:$src1, (load addr:$src2)))],
                         IIC_IMUL64_RM>,
                TB;
+} // SchedRW
 } // Constraints = "$src1 = $dst"
 
 } // Defs = [EFLAGS]
 
 // Surprisingly enough, these are not two address instructions!
 let Defs = [EFLAGS] in {
+let SchedRW = [WriteIMul] in {
 // Register-Integer Signed Integer Multiply
 def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
                       (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
@@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
                       [(set GR64:$dst, EFLAGS,
                             (X86smul_flag GR64:$src1, i64immSExt8:$src2))],
                             IIC_IMUL64_RRI>;
-
+} // SchedRW
 
 // Memory-Integer Signed Integer Multiply
+let SchedRW = [WriteIMulLd] in {
 def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
                       (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                             (X86smul_flag (load addr:$src1),
                                           i64immSExt8:$src2))],
                                           IIC_IMUL64_RMI>;
+} // SchedRW
 } // Defs = [EFLAGS]
 
 
@@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
 
 // unsigned division/remainder
 let hasSideEffects = 1 in { // so that we don't speculatively execute
+let SchedRW = [WriteIDiv] in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
                "div{b}\t$src", [], IIC_DIV8_REG>;
@@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
                 "div{q}\t$src", [], IIC_DIV64>;
+} // SchedRW
 
 let mayLoad = 1 in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "div{b}\t$src", [], IIC_DIV8_MEM>;
+               "div{b}\t$src", [], IIC_DIV8_MEM>,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "div{w}\t$src", [], IIC_DIV16>, OpSize;
+               "div{w}\t$src", [], IIC_DIV16>, OpSize,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
 def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
-               "div{l}\t$src", [], IIC_DIV32>;
+               "div{l}\t$src", [], IIC_DIV32>,
+             SchedLoadReg<WriteIDivLd>;
 // RDX:RAX/[mem64] = RAX,RDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
-                "div{q}\t$src", [], IIC_DIV64>;
+                "div{q}\t$src", [], IIC_DIV64>,
+             SchedLoadReg<WriteIDivLd>;
 }
 
 // Signed division/remainder.
+let SchedRW = [WriteIDiv] in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
                "idiv{b}\t$src", [], IIC_IDIV8>;
@@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
                 "idiv{q}\t$src", [], IIC_IDIV64>;
+} // SchedRW
 
 let mayLoad = 1 in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "idiv{b}\t$src", [], IIC_IDIV8>;
+               "idiv{b}\t$src", [], IIC_IDIV8>,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
+               "idiv{w}\t$src", [], IIC_IDIV16>, OpSize,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
 def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
-               "idiv{l}\t$src", [], IIC_IDIV32>;
+               "idiv{l}\t$src", [], IIC_IDIV32>,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
 def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
-                "idiv{q}\t$src", [], IIC_IDIV64>;
+                "idiv{q}\t$src", [], IIC_IDIV64>,
+             SchedLoadReg<WriteIDivLd>;
 }
 } // hasSideEffects = 0
 
@@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
 // unary instructions
 let CodeSize = 2 in {
 let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
                "neg{b}\t$dst",
                [(set GR8:$dst, (ineg GR8:$src1)),
@@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
 def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
                 [(set GR64:$dst, (ineg GR64:$src1)),
                  (implicit EFLAGS)], IIC_UNARY_REG>;
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
 
+// Read-modify-write negate.
+let SchedRW = [WriteALULd, WriteRMW] in {
 def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
                "neg{b}\t$dst",
                [(store (ineg (loadi8 addr:$dst)), addr:$dst),
@@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
 def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
                 [(store (ineg (loadi64 addr:$dst)), addr:$dst),
                  (implicit EFLAGS)], IIC_UNARY_MEM>;
+} // SchedRW
 } // Defs = [EFLAGS]
 
 
 // Note: NOT does not set EFLAGS!
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 // Match xor -1 to not. Favors these over a move imm + xor to save code size.
 let AddedComplexity = 15 in {
 def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
 def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
                 [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
 }
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
 
+let SchedRW = [WriteALULd, WriteRMW] in {
 def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
                "not{b}\t$dst",
                [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
@@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
                [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
 def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
                 [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+} // SchedRW
 } // CodeSize
 
 // TODO: inc/dec is slow for P4, but fast for Pentium-M.
 let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 let CodeSize = 2 in
 def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                "inc{b}\t$dst",
@@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                 Requires<[In64BitMode]>;
 } // isConvertibleToThreeAddress = 1, CodeSize = 2
 
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
 
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
   def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
                [(store (add (loadi8 addr:$dst), 1), addr:$dst),
                 (implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
                   [(store (add (loadi32 addr:$dst), -1), addr:$dst),
                     (implicit EFLAGS)], IIC_UNARY_MEM>,
                 Requires<[In64BitMode]>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 let CodeSize = 2 in
 def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                "dec{b}\t$dst",
@@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
                 [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
                 IIC_UNARY_REG>;
 } // CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
 
 
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
   def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
                [(store (add (loadi8 addr:$dst), -1), addr:$dst),
                 (implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -532,7 +575,7 @@ let CodeSize = 2 in {
   def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
                   [(store (add (loadi64 addr:$dst), -1), addr:$dst),
                    (implicit EFLAGS)], IIC_UNARY_MEM>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
 } // Defs = [EFLAGS]
 
 
@@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               Format f = MRMDestReg>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
+    Sched<[WriteALU]>;
 
 // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
 // just a regclass (no eflags) as a result.
@@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
   : ITy<opcode, MRMSrcReg, typeinfo,
         (outs typeinfo.RegClass:$dst),
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
+        mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+    Sched<[WriteALU]> {
   // The disassembler should know about this, but not the asmparser.
   let isCodeGenOnly = 1;
   let hasSideEffects = 0;
@@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
 class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
   : ITy<opcode, MRMSrcReg, typeinfo, (outs),
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>,
+    Sched<[WriteALU]> {
   // The disassembler should know about this, but not the asmparser.
   let isCodeGenOnly = 1;
   let hasSideEffects = 0;
@@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               dag outlist, list<dag> pattern>
   : ITy<opcode, MRMSrcMem, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+    Sched<[WriteALULd, ReadAfterLd]>;
 
 // BinOpRM_R - Instructions like "add reg, reg, [mem]".
 class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               Format f, dag outlist, list<dag> pattern>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+    Sched<[WriteALU]> {
   let ImmT = typeinfo.ImmEncoding;
 }
 
@@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                Format f, dag outlist, list<dag> pattern>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+    Sched<[WriteALU]> {
   let ImmT = Imm8; // Always 8-bit immediate.
 }
 
@@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               list<dag> pattern>
   : ITy<opcode, MRMDestMem, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+    Sched<[WriteALULd, WriteRMW]>;
 
 // BinOpMR_RMW - Instructions like "add [mem], reg".
 class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
               Format f, list<dag> pattern, bits<8> opcode = 0x80>
   : ITy<opcode, f, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+    Sched<[WriteALULd, WriteRMW]> {
   let ImmT = typeinfo.ImmEncoding;
 }
 
@@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
                Format f, list<dag> pattern>
   : ITy<0x82, f, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+    Sched<[WriteALULd, WriteRMW]> {
   let ImmT = Imm8; // Always 8-bit immediate.
 }
 
@@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               Register areg, string operands>
   : ITy<opcode, RawFrm, typeinfo,
         (outs), (ins typeinfo.ImmOperand:$src),
-        mnemonic, operands, []> {
+        mnemonic, operands, []>, Sched<[WriteALU]> {
   let ImmT = typeinfo.ImmEncoding;
   let Uses = [areg];
   let Defs = [areg];
@@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
   // register class is constrained to GR8_NOREX.
   let isPseudo = 1 in
   def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
-                        "", [], IIC_BIN_NONMEM>;
+                        "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
   def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
             [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
-            IIC_BIN_NONMEM>;
+            IIC_BIN_NONMEM>, Sched<[WriteALU]>;
   def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
             [(set RC:$dst, EFLAGS,
-             (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>;
+             (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>,
+           Sched<[WriteALULd, ReadAfterLd]>;
 }
 
 let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -1241,12 +1294,12 @@ let neverHasSideEffects = 1 in {
   let isCommutable = 1 in
   def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
              !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
-             [], IIC_MUL8>, T8XD, VEX_4V;
+             [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul]>;
 
   let mayLoad = 1 in
   def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
              !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
-             [], IIC_MUL8>, T8XD, VEX_4V;
+             [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd]>;
 }
 }
 
@@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in {
 // ADCX Instruction
 //
 let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+  let SchedRW = [WriteALU] in {
   def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
              "adcx{l}\t{$src, $dst|$dst, $src}",
              [], IIC_BIN_NONMEM>, T8, OpSize;
@@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
   def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
              "adcx{q}\t{$src, $dst|$dst, $src}",
              [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+  } // SchedRW
 
-  let mayLoad = 1 in {
+  let mayLoad = 1, SchedRW = [WriteALULd] in {
   def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
              "adcx{l}\t{$src, $dst|$dst, $src}",
              [], IIC_BIN_MEM>, T8, OpSize;
@@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
 // ADOX Instruction
 //
 let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+  let SchedRW = [WriteALU] in {
   def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
              "adox{l}\t{$src, $dst|$dst, $src}",
              [], IIC_BIN_NONMEM>, T8XS;
@@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
   def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
              "adox{q}\t{$src, $dst|$dst, $src}",
              [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+  } // SchedRW
 
-  let mayLoad = 1 in {
+  let mayLoad = 1, SchedRW = [WriteALULd] in {
   def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
              "adox{l}\t{$src, $dst|$dst, $src}",
              [], IIC_BIN_MEM>, T8XS;
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index 8f2d0a1aae9f..a967a4da5cf7 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -16,7 +16,7 @@
 // SetCC instructions.
 multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
   let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
-      isCommutable = 1 in {
+      isCommutable = 1, SchedRW = [WriteALU] in {
     def NAME#16rr
       : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
           !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
@@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
                 IIC_CMOV32_RR>, TB;
   }
 
-  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      SchedRW = [WriteALULd, ReadAfterLd] in {
     def NAME#16rm
       : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
           !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
@@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
     def r    : I<opc, MRM0r,  (outs GR8:$dst), (ins),
                      !strconcat(Mnemonic, "\t$dst"),
                      [(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
-                     IIC_SET_R>, TB;
+                     IIC_SET_R>, TB, Sched<[WriteALU]>;
     def m    : I<opc, MRM0m,  (outs), (ins i8mem:$dst),
                      !strconcat(Mnemonic, "\t$dst"),
                      [(store (X86setcc OpNode, EFLAGS), addr:$dst)],
-                     IIC_SET_M>, TB;
+                     IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>;
   } // Uses = [EFLAGS]
 }
 
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 734e5982b2aa..d9ff0c63c55f 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -149,11 +149,12 @@ let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
 //===----------------------------------------------------------------------===//
 // EH Pseudo Instructions
 //
+let SchedRW = [WriteSystem] in {
 let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
                     "ret\t#eh_return, addr: $addr",
-                    [(X86ehret GR32:$addr)], IIC_RET>;
+                    [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
 
 }
 
@@ -161,7 +162,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
                      "ret\t#eh_return, addr: $addr",
-                     [(X86ehret GR64:$addr)], IIC_RET>;
+                     [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
 
 }
 
@@ -186,6 +187,7 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
                           Requires<[In64BitMode]>;
   }
 }
+} // SchedRW
 
 let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
   def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
@@ -220,7 +222,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in {
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
-                 [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
+                 [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 
 // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
 // encoding and avoids a partial-register update sometimes, but doing so
@@ -229,11 +231,12 @@ def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
 // to an MCInst.
 def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
                  "",
-                 [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
+                 [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize,
+                 Sched<[WriteZero]>;
 
 // FIXME: Set encoding to pseudo.
 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
+                 [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 }
 
 // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -245,7 +248,7 @@ def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
 let Defs = [EFLAGS], isCodeGenOnly=1,
     AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
+                 [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 
 // Materialize i64 constant where top 32-bits are zero. This could theoretically
 // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -254,10 +257,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
                         "", [(set GR64:$dst, i64immZExt32:$src)],
-                        IIC_ALU_NONMEM>;
+                        IIC_ALU_NONMEM>, Sched<[WriteALU]>;
 
 // Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
 // FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
 // However, Pat<> can't replicate the destination reg into the inputs of the
 // result.
@@ -320,6 +323,7 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
 //===----------------------------------------------------------------------===//
 // String Pseudo Instructions
 //
+let SchedRW = [WriteMicrocoded] in {
 let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
 def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
                     [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
@@ -382,6 +386,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
                       [(X86rep_stos i64)], IIC_REP_STOS>, REP,
                      Requires<[In64BitMode]>;
 }
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Thread Local Storage Instructions
@@ -594,12 +599,13 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
 let isCodeGenOnly = 1, Defs = [EFLAGS] in
 def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
                       "or{l}\t{$zero, $dst|$dst, $zero}",
-                      [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
+                      [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK,
+                    Sched<[WriteALULd, WriteRMW]>;
 
 let hasSideEffects = 1 in
 def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
                      "#MEMBARRIER",
-                     [(X86MemBarrier)]>;
+                     [(X86MemBarrier)]>, Sched<[WriteLoad]>;
 
 // RegOpc corresponds to the mr version of the instruction
 // ImmOpc corresponds to the mi version of the instruction
@@ -607,7 +613,8 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
 // ImmMod corresponds to the instruction format of the mi and mi8 versions
 multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
                            Format ImmMod, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+    SchedRW = [WriteALULd, WriteRMW] in {
 
 def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                   RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
@@ -694,7 +701,8 @@ defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
 // Optimized codegen when the non-memory output is not used.
 multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
                           string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+    SchedRW = [WriteALULd, WriteRMW] in {
 
 def NAME#8m  : I<Opc8, Form, (outs), (ins i8mem :$dst),
                  !strconcat(mnemonic, "{b}\t$dst"),
@@ -728,7 +736,7 @@ let isCodeGenOnly = 1 in {
 multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
                           string mnemonic, SDPatternOperator frag,
                           InstrItinClass itin8, InstrItinClass itin> {
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
   let Defs = [AL, EFLAGS], Uses = [AL] in
   def NAME#8  : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
                   !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
@@ -748,14 +756,15 @@ let isCodeGenOnly = 1 in {
 }
 }
 
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+    SchedRW = [WriteALULd, WriteRMW] in {
 defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
                                 X86cas8, i64mem,
                                 IIC_CMPX_LOCK_8B>;
 }
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
-    Predicates = [HasCmpxchg16b] in {
+    Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
 defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
                                  X86cas16, i128mem,
                                  IIC_CMPX_LOCK_16B>, REX_W;
@@ -768,7 +777,8 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
 multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
                              string frag,
                              InstrItinClass itin8, InstrItinClass itin> {
-  let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+  let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
+      SchedRW = [WriteALULd, WriteRMW] in {
     def NAME#8  : I<opc8, MRMSrcMem, (outs GR8:$dst),
                     (ins GR8:$val, i8mem:$ptr),
                     !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
@@ -990,9 +1000,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
 // This corresponds to add $foo@tpoff, %rax
 def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
           (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
-          (MOV64rm tglobaltlsaddr :$dst)>;
 
 
 // Direct PC relative function call for small code model. 32-bit displacement
@@ -1192,7 +1199,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
 
 
 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
+// Try this before the selecting to OR.
+let AddedComplexity = 5, SchedRW = [WriteALU] in {
 
 let isConvertibleToThreeAddress = 1,
     Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
@@ -1239,7 +1247,7 @@ def ADD64ri32_DB : I<0, Pseudo,
                       [(set GR64:$dst, (or_is_add GR64:$src1,
                                                   i64immSExt32:$src2))]>;
 }
-} // AddedComplexity
+} // AddedComplexity, SchedRW
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index bfe954114c55..0e696513d47c 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -20,7 +20,7 @@
 // The X86retflag return instructions are variadic because we may add ST0 and
 // ST1 arguments when returning values on the x87 stack.
 let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1, FPForm = SpecialFP in {
+    hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
   def RET    : I   <0xC3, RawFrm, (outs), (ins variable_ops),
                     "ret",
                     [(X86retflag 0)], IIC_RET>;
@@ -46,7 +46,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
 }
 
 // Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
   def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
                         "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
   def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
@@ -58,7 +58,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
 }
 
 // Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
   multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
     def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
                        IIC_Jcc>;
@@ -85,7 +85,7 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
 defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
 
 // jcx/jecx/jrcx instructions.
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
   // These are the 32-bit versions of this instruction for the asmparser.  In
   // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
   // jecxz.
@@ -110,36 +110,46 @@ let isBranch = 1, isTerminator = 1 in {
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
-                     [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>;
+                     [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>,
+                   Sched<[WriteJump]>;
   def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
-                     [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>;
+                     [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>,
+                   Requires<[In32BitMode]>, Sched<[WriteJumpLd]>;
 
   def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
-                     [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>;
+                     [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>,
+                   Sched<[WriteJump]>;
   def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
-                     [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>;
+                     [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>,
+                   Requires<[In64BitMode]>, Sched<[WriteJumpLd]>;
 
   def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs),
                           (ins i16imm:$off, i16imm:$seg),
-                          "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize;
+                          "ljmp{w}\t{$seg, $off|$off, $seg}", [],
+                          IIC_JMP_FAR_PTR>, OpSize, Sched<[WriteJump]>;
   def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
                           (ins i32imm:$off, i16imm:$seg),
-                          "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>;
+                          "ljmp{l}\t{$seg, $off|$off, $seg}", [],
+                          IIC_JMP_FAR_PTR>, Sched<[WriteJump]>;
   def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
-                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+                   Sched<[WriteJump]>;
 
   def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
-                     "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize;
+                     "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize,
+                   Sched<[WriteJumpLd]>;
   def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
-                     "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+                     "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+                   Sched<[WriteJumpLd]>;
 }
 
 
 // Loop instructions
-
+let SchedRW = [WriteJump] in {
 def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>;
 def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>;
 def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>;
+}
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
@@ -152,27 +162,32 @@ let isCall = 1 in
   let Uses = [ESP] in {
     def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
                            (outs), (ins i32imm_pcrel:$dst),
-                           "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>;
+                           "call{l}\t$dst", [], IIC_CALL_RI>,
+                      Requires<[In32BitMode]>, Sched<[WriteJump]>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
                         "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
-                         Requires<[In32BitMode]>;
+                      Requires<[In32BitMode]>, Sched<[WriteJump]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
-                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>,
-                        Requires<[In32BitMode]>;
+                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))],
+                        IIC_CALL_MEM>,
+                      Requires<[In32BitMode,FavorMemIndirectCall]>,
+                      Sched<[WriteJumpLd]>;
 
     def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
                              (ins i16imm:$off, i16imm:$seg),
                              "lcall{w}\t{$seg, $off|$off, $seg}", [],
-                             IIC_CALL_FAR_PTR>, OpSize;
+                             IIC_CALL_FAR_PTR>, OpSize, Sched<[WriteJump]>;
     def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
                              (ins i32imm:$off, i16imm:$seg),
                              "lcall{l}\t{$seg, $off|$off, $seg}", [],
-                             IIC_CALL_FAR_PTR>;
+                             IIC_CALL_FAR_PTR>, Sched<[WriteJump]>;
 
     def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
-                        "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize;
+                        "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize,
+                      Sched<[WriteJumpLd]>;
     def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
-                        "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+                        "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>,
+                      Sched<[WriteJumpLd]>;
 
     // callw for 16 bit code for the assembler.
     let isAsmParserOnly = 1 in
@@ -185,7 +200,7 @@ let isCall = 1 in
 // Tail call stuff.
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
   let Uses = [ESP] in {
   def TCRETURNdi : PseudoI<(outs),
                      (ins i32imm_pcrel:$dst, i32imm:$offset), []>;
@@ -216,7 +231,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
 // RSP is marked as a use to prevent stack-pointer assignments that appear
 // immediately before calls from potentially appearing dead. Uses for argument
 // registers are added manually.
-let isCall = 1, Uses = [RSP] in {
+let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
   // NOTE: this pattern doesn't match "X86call imm", because we do not know
   // that the offset between an arbitrary immediate and the call will fit in
   // the 32-bit pcrel field that we have.
@@ -231,7 +246,7 @@ let isCall = 1, Uses = [RSP] in {
   def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
                         "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
                         IIC_CALL_MEM>,
-                      Requires<[In64BitMode]>;
+                      Requires<[In64BitMode,FavorMemIndirectCall]>;
 
   def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
                        "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
@@ -245,13 +260,12 @@ let isCall = 1, isCodeGenOnly = 1 in
     def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
                       (outs), (ins i64i32imm_pcrel:$dst),
                       "call{q}\t$dst", [], IIC_CALL_RI>,
-                    Requires<[IsWin64]>;
+                    Requires<[IsWin64]>, Sched<[WriteJump]>;
   }
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in
-  let Uses = [RSP],
-      usesCustomInserter = 1 in {
+    isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
+    SchedRW = [WriteJump] in {
   def TCRETURNdi64 : PseudoI<(outs),
                       (ins i64i32imm_pcrel:$dst, i32imm:$offset),
                       []>;
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 2eb454ded21b..6dc7175357b3 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in {
 let neverHasSideEffects = 1 in {
 def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
                    "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
-                   TB, OpSize;
+                   TB, OpSize, Sched<[WriteALU]>;
 let mayLoad = 1 in
 def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
                    "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
-                   TB, OpSize;
+                   TB, OpSize, Sched<[WriteALULd]>;
 } // neverHasSideEffects = 1
 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
                    "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+                   [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+                   Sched<[WriteALU]>;
 def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
                    "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB;
+                   [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB,
+                   Sched<[WriteALULd]>;
 def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
                    "movs{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+                   [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+                   Sched<[WriteALU]>;
 def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                    "movs{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
-                   TB;
+                   TB, Sched<[WriteALULd]>;
 
 let neverHasSideEffects = 1 in {
 def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
                    "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
-                   TB, OpSize;
+                   TB, OpSize, Sched<[WriteALU]>;
 let mayLoad = 1 in
 def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
                    "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
-                   TB, OpSize;
+                   TB, OpSize, Sched<[WriteALULd]>;
 } // neverHasSideEffects = 1
 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
                    "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+                   [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALU]>;
 def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
                    "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB;
+                   [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALULd]>;
 def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
                    "movz{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+                   [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALU]>;
 def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                    "movz{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
-                   TB;
+                   TB, Sched<[WriteALULd]>;
 
 // These are the same as the regular MOVZX32rr8 and MOVZX32rm8
 // except that they use GR32_NOREX for the output operand register class
@@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
 def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
                          (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                         [], IIC_MOVZX>, TB;
+                         [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
 let mayLoad = 1 in
 def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
                          (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                         [], IIC_MOVZX>, TB;
+                         [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
 }
 
 // MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -106,38 +112,42 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
 // were generalized, this would require a special register class.
 def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
                     "movs{bq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+                    [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+                    Sched<[WriteALU]>;
 def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
                     "movs{bq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
-                    TB;
+                    TB, Sched<[WriteALULd]>;
 def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
                     "movs{wq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+                    [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+                    Sched<[WriteALU]>;
 def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                     "movs{wq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
-                    TB;
+                    TB, Sched<[WriteALULd]>;
 def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>;
+                    [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>,
+                    Sched<[WriteALU]>;
 def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>;
+                    [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>,
+                    Sched<[WriteALULd]>;
 
 // movzbq and movzwq encodings for the disassembler
 def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
                        "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB;
+                       TB, Sched<[WriteALU]>;
 def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
                        "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB;
+                       TB, Sched<[WriteALULd]>;
 def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
                        "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB;
+                       TB, Sched<[WriteALU]>;
 def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                        "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB;
+                       TB, Sched<[WriteALULd]>;
 
 // FIXME: These should be Pat patterns.
 let isCodeGenOnly = 1 in {
@@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in {
 // Use movzbl instead of movzbq when the destination is a register; it's
 // equivalent due to implicit zero-extending, and it has a smaller encoding.
 def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                   "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+                   "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALU]>;
 def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
                    "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
-                   TB;
+                   TB, Sched<[WriteALULd]>;
 // Use movzwl instead of movzwq when the destination is a register; it's
 // equivalent due to implicit zero-extending, and it has a smaller encoding.
 def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                   "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+                   "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALU]>;
 def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                    "", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
-                   IIC_MOVZX>, TB;
+                   IIC_MOVZX>, TB, Sched<[WriteALULd]>;
 
 // There's no movzlq instruction, but movl can be used for this purpose, using
 // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
@@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
 // necessarily all zero. In such cases, we fall back to these explicit zext
 // instructions.
 def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
-                    "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>;
+                    "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
+                    Sched<[WriteALU]>;
 def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     "", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
-                    IIC_MOVZX>;
+                    IIC_MOVZX>, Sched<[WriteALULd]>;
 }
 
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 568726e08ece..2224a08d59f4 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -422,7 +422,7 @@ def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
 def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
 }
 
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
 def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src",
                     IIC_FLD>;
 def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src",
@@ -436,7 +436,7 @@ def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src",
 def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src",
                     IIC_FILD>;
 }
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
 def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst",
                     IIC_FST>;
 def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst",
@@ -481,7 +481,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
                     [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
 } // Predicates = [HasSSE3]
 
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
 def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst",
   IIC_FST>;
 def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
@@ -491,6 +491,7 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
 }
 
 // FP Stack manipulation instructions.
+let SchedRW = [WriteMove] in {
 def LD_Frr   : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op",
                    IIC_FLD>, D9;
 def ST_Frr   : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op",
@@ -499,6 +500,7 @@ def ST_FPrr  : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op",
                    IIC_FST>, DD;
 def XCH_F    : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op",
                    IIC_FXCH>, D9;
+}
 
 // Floating point constant loads.
 let isReMaterializable = 1 in {
@@ -516,19 +518,23 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
                 [(set RFP80:$dst, fpimm1)]>;
 }
 
+let SchedRW = [WriteZero] in {
 def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9;
 def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9;
-
+}
 
 // Floating point compares.
+let SchedRW = [WriteFAdd] in {
 def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
 def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
 def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // SchedRW
 } // Defs = [FPSW]
 
+let SchedRW = [WriteFAdd] in {
 // CC = ST(0) cmp ST(i)
 let Defs = [EFLAGS, FPSW] in {
 def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
@@ -566,8 +572,10 @@ def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
 def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
                    "fcompi\t$reg", IIC_FCOMI>, DF;
 }
+} // SchedRW
 
 // Floating point flag ops.
+let SchedRW = [WriteALU] in {
 let Defs = [AX], Uses = [FPSW] in
 def FNSTSW16r : I<0xE0, RawFrm,                  // AX = fp flags
                   (outs), (ins), "fnstsw %ax",
@@ -576,23 +584,26 @@ def FNSTSW16r : I<0xE0, RawFrm,                  // AX = fp flags
 def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
                   (outs), (ins i16mem:$dst), "fnstcw\t$dst",
                   [(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>;
-                  
+} // SchedRW
 let mayLoad = 1 in
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
-                  (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>;
+                  (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>,
+                Sched<[WriteLoad]>;
 
 // FPU control instructions
+let SchedRW = [WriteMicrocoded] in {
 let Defs = [FPSW] in
 def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB;
 def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
                 "ffree\t$reg", IIC_FFREE>, DD;
-
 // Clear exceptions
 
 let Defs = [FPSW] in
 def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB;
+} // SchedRW
 
 // Operandless floating-point instructions for the disassembler.
+let SchedRW = [WriteMicrocoded] in {
 def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>;
 
 def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9;
@@ -627,6 +638,7 @@ def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
 def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
                   "fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W,
                   Requires<[In64BitMode]>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 44e574d24656..a71e024f4e28 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -35,24 +35,27 @@ def MRM_C3 : Format<35>;
 def MRM_C4 : Format<36>;
 def MRM_C8 : Format<37>;
 def MRM_C9 : Format<38>;
-def MRM_E8 : Format<39>;
-def MRM_F0 : Format<40>;
-def MRM_F8 : Format<41>;
-def MRM_F9 : Format<42>;
+def MRM_CA : Format<39>;
+def MRM_CB : Format<40>;
+def MRM_E8 : Format<41>;
+def MRM_F0 : Format<42>;
 def RawFrmImm8 : Format<43>;
 def RawFrmImm16 : Format<44>;
-def MRM_D0 : Format<45>;
-def MRM_D1 : Format<46>;
-def MRM_D4 : Format<47>;
-def MRM_D5 : Format<48>;
-def MRM_D8 : Format<49>;
-def MRM_D9 : Format<50>;
-def MRM_DA : Format<51>;
-def MRM_DB : Format<52>;
-def MRM_DC : Format<53>;
-def MRM_DD : Format<54>;
-def MRM_DE : Format<55>;
-def MRM_DF : Format<56>;
+def MRM_F8 : Format<45>;
+def MRM_F9 : Format<46>;
+def MRM_D0 : Format<47>;
+def MRM_D1 : Format<48>;
+def MRM_D4 : Format<49>;
+def MRM_D5 : Format<50>;
+def MRM_D6 : Format<51>;
+def MRM_D8 : Format<52>;
+def MRM_D9 : Format<53>;
+def MRM_DA : Format<54>;
+def MRM_DB : Format<55>;
+def MRM_DC : Format<56>;
+def MRM_DD : Format<57>;
+def MRM_DE : Format<58>;
+def MRM_DF : Format<59>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -208,47 +211,47 @@ class PseudoI<dag oops, dag iops, list<dag> pattern>
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
-        list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+        list<dag> pattern, InstrItinClass itin = NoItinerary,
         Domain d = GenericDomain>
   : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+           list<dag> pattern, InstrItinClass itin = NoItinerary,
            Domain d = GenericDomain>
   : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
            : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
@@ -257,12 +260,12 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
 // FPStack Instruction Templates:
 // FPI - Floating Point Instruction template.
 class FPI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          InstrItinClass itin = IIC_DEFAULT>
+          InstrItinClass itin = NoItinerary>
   : I<o, F, outs, ins, asm, [], itin> {}
 
 // FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
 class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
-           InstrItinClass itin = IIC_DEFAULT>
+           InstrItinClass itin = NoItinerary>
   : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> {
   let FPForm = fp;
   let Pattern = pattern;
@@ -275,14 +278,14 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
 //   Iseg32 - 16-bit segment selector, 32-bit offset
 
 class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+              list<dag> pattern, InstrItinClass itin = NoItinerary>
       : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+              list<dag> pattern, InstrItinClass itin = NoItinerary>
       : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
@@ -292,7 +295,7 @@ def __xs : XS;
 
 // SI - SSE 1 & 2 scalar instructions
 class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
-         list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+         list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin> {
   let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
             !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
@@ -303,7 +306,7 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
 
 // SIi8 - SSE 1 & 2 scalar instructions
 class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin> {
   let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
             !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
@@ -350,25 +353,25 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   VPSI  - SSE1 instructions with TB prefix in AVX form.
 
 class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
 class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
 class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
         Requires<[UseSSE1]>;
 class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
         Requires<[UseSSE1]>;
 class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
         Requires<[HasAVX]>;
 class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB,
         Requires<[HasAVX]>;
 
@@ -388,42 +391,42 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //               MMX operands.
 
 class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
 class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
 class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>;
 class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
 class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[UseSSE2]>;
 class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[UseSSE2]>;
 class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
         Requires<[HasAVX]>;
 class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
         Requires<[HasAVX]>;
 class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
         OpSize, Requires<[HasAVX]>;
 class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
 class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-                list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+                list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
 
 // SSE3 Instruction Templates:
@@ -433,15 +436,15 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   S3DI  - SSE3 instructions with XD prefix.
 
 class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
         Requires<[UseSSE3]>;
 class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
         Requires<[UseSSE3]>;
 class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[UseSSE3]>;
 
@@ -458,19 +461,19 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
 // classes. They need to be enabled even if AVX is enabled.
 
 class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[UseSSSE3]>;
 class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[UseSSSE3]>;
 class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[HasSSSE3]>;
 class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[HasSSSE3]>;
 
@@ -480,11 +483,11 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
 //
 class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[UseSSE41]>;
 class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[UseSSE41]>;
 
@@ -492,19 +495,19 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 // 
 //   SS428I - SSE 4.2 instructions with T8 prefix.
 class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[UseSSE42]>;
 
 //   SS42FI - SSE 4.2 instructions with T8XD prefix.
 // NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
 class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
 
 //   SS42AI = SSE 4.2 instructions with TA prefix
 class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[UseSSE42]>;
 
@@ -514,11 +517,11 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   AVX8I - AVX instructions with T8 and OpSize prefix.
 //   AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
 class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
         Requires<[HasAVX]>;
 class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+              list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
         Requires<[HasAVX]>;
 
@@ -528,11 +531,11 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   AVX28I - AVX2 instructions with T8 and OpSize prefix.
 //   AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8.
 class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
         Requires<[HasAVX2]>;
 class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+              list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
         Requires<[HasAVX2]>;
 
@@ -541,53 +544,53 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 // AES8I
 // These use the same encoding as the SSE4.2 T8 and TA encodings.
 class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag>pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[HasAES]>;
 
 class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[HasAES]>;
 
 // PCLMUL Instruction Templates
 class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag>pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         OpSize, Requires<[HasPCLMUL]>;
 
 class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-                  list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+                  list<dag>pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>;
 
 // FMA3 Instruction Templates
 class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag>pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, T8,
-        OpSize, VEX_4V, Requires<[HasFMA]>;
+        OpSize, VEX_4V, FMASC, Requires<[HasFMA]>;
 
 // FMA4 Instruction Templates
 class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag>pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, TA,
-        OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+        OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>;
 
 // XOP 2, 3 and 4 Operand Instruction Template
 class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
          XOP, XOP9, Requires<[HasXOP]>;
 
 // XOP 2, 3 and 4 Operand Instruction Templates with imm byte
 class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
          XOP, XOP8, Requires<[HasXOP]>;
 
 //  XOP 5 operand instruction (VEX encoding!)
 class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag>pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
 
@@ -595,33 +598,33 @@ class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
 //
 
 class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
-         list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+         list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W;
 
 class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W;
 
 // MMX Instruction templates
@@ -635,23 +638,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
 // MMXID  - MMX instructions with XD prefix.
 // MMXIS  - MMX instructions with XS prefix.
 class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
 class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, 
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
 class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>;
 class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>;
 class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, 
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
 class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
 class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 17714acd86cd..7c0423f81825 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3655,7 +3655,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                     const SmallVectorImpl<MachineOperand> &MOs,
                                     unsigned Size, unsigned Align) const {
   const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+  bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
   bool isTwoAddrFold = false;
+
+  // Atom favors register form of call. So, we do not fold loads into calls
+  // when X86Subtarget is Atom.
+  if (isCallRegIndirect &&
+    (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
+    return NULL;
+  }
+
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
     MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
@@ -4272,7 +4281,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
     bool isAligned = (*MMOs.first) &&
                      (*MMOs.first)->getAlignment() >= Alignment;
     Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
-                              VT, MVT::Other, &AddrOps[0], AddrOps.size());
+                              VT, MVT::Other, AddrOps);
     NewNodes.push_back(Load);
 
     // Preserve memory reference information.
@@ -4294,8 +4303,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   if (Load)
     BeforeOps.push_back(SDValue(Load, 0));
   std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
-  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
-                                      BeforeOps.size());
+  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
   NewNodes.push_back(NewNode);
 
   // Emit the store instruction.
@@ -4317,8 +4325,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
                      (*MMOs.first)->getAlignment() >= Alignment;
     SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
                                                          isAligned, TM),
-                                       dl, MVT::Other,
-                                       &AddrOps[0], AddrOps.size());
+                                       dl, MVT::Other, AddrOps);
     NewNodes.push_back(Store);
 
     // Preserve memory reference information.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index d989ec7bb052..3380d8c64ea7 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -142,6 +142,9 @@ def X86sahf    : SDNode<"X86ISD::SAHF",     SDTX86sahf>;
 def X86rdrand  : SDNode<"X86ISD::RDRAND",   SDTX86rdrand,
                         [SDNPHasChain, SDNPSideEffect]>;
 
+def X86rdseed  : SDNode<"X86ISD::RDSEED",   SDTX86rdrand,
+                        [SDNPHasChain, SDNPSideEffect]>;
+
 def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
                         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
@@ -603,7 +606,12 @@ def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;
 def HasBMI       : Predicate<"Subtarget->hasBMI()">;
 def HasBMI2      : Predicate<"Subtarget->hasBMI2()">;
 def HasRTM       : Predicate<"Subtarget->hasRTM()">;
+def HasHLE       : Predicate<"Subtarget->hasHLE()">;
+def HasTSX       : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
 def HasADX       : Predicate<"Subtarget->hasADX()">;
+def HasPRFCHW    : Predicate<"Subtarget->hasPRFCHW()">;
+def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
+def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">;
 def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
 def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
 def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@@ -626,6 +634,7 @@ def OptForSize   : Predicate<"OptForSize">;
 def OptForSpeed  : Predicate<"!OptForSize">;
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+def FavorMemIndirectCall  : Predicate<"!Subtarget->callRegIndirect()">;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
@@ -758,7 +767,7 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
 //
 
 // Nop
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteZero] in {
   def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>;
   def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
                 "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize;
@@ -769,8 +778,9 @@ let neverHasSideEffects = 1 in {
 
 // Constructing a stack frame.
 def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
-                 "enter\t$len, $lvl", [], IIC_ENTER>;
+                 "enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>;
 
+let SchedRW = [WriteALU] in {
 let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
 def LEAVE    : I<0xC9, RawFrm,
                  (outs), (ins), "leave", [], IIC_LEAVE>,
@@ -780,13 +790,14 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
 def LEAVE64  : I<0xC9, RawFrm,
                  (outs), (ins), "leave", [], IIC_LEAVE>,
                  Requires<[In64BitMode]>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 //  Miscellaneous Instructions.
 //
 
 let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
 def POP16r  : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
                 IIC_POP_REG16>, OpSize;
 def POP32r  : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
@@ -803,9 +814,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [],
 def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize;
 def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
                Requires<[In32BitMode]>;
-}
+} // mayLoad, SchedRW
 
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
 def PUSH16r  : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
                  IIC_PUSH_REG>, OpSize;
 def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
@@ -832,29 +843,30 @@ def PUSHF16  : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
 def PUSHF32  : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
                Requires<[In32BitMode]>;
 
-}
+} // mayStore, SchedRW
 }
 
 let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
 def POP64r   : I<0x58, AddRegFrm,
                  (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>;
 def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
                 IIC_POP_REG>;
 def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [],
                 IIC_POP_MEM>;
-}
-let mayStore = 1 in {
+} // mayLoad, SchedRW
+let mayStore = 1, SchedRW = [WriteStore] in {
 def PUSH64r  : I<0x50, AddRegFrm,
                  (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>;
 def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
                  IIC_PUSH_REG>;
 def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
                  IIC_PUSH_MEM>;
-}
+} // mayStore, SchedRW
 }
 
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1,
+    SchedRW = [WriteStore] in {
 def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
                      "push{q}\t$imm", [], IIC_PUSH_IMM>;
 def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
@@ -865,23 +877,24 @@ def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
 
 let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
 def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
-               Requires<[In64BitMode]>;
+               Requires<[In64BitMode]>, Sched<[WriteLoad]>;
 let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
 def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
-                 Requires<[In64BitMode]>;
+                 Requires<[In64BitMode]>, Sched<[WriteStore]>;
 
 let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
-    mayLoad=1, neverHasSideEffects=1 in {
+    mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
 def POPA32   : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>,
                Requires<[In32BitMode]>;
 }
 let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
-    mayStore=1, neverHasSideEffects=1 in {
+    mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
 def PUSHA32  : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>,
                Requires<[In32BitMode]>;
 }
 
-let Constraints = "$src = $dst" in {    // GR32 = bswap GR32
+let Constraints = "$src = $dst", SchedRW = [WriteALU] in {
+// GR32 = bswap GR32
 def BSWAP32r : I<0xC8, AddRegFrm,
                  (outs GR32:$dst), (ins GR32:$src),
                  "bswap{l}\t$dst",
@@ -890,60 +903,63 @@ def BSWAP32r : I<0xC8, AddRegFrm,
 def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
                   "bswap{q}\t$dst",
                   [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
 // Bit scan instructions.
 let Defs = [EFLAGS] in {
 def BSF16rr  : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
-                  IIC_BSF>, TB, OpSize;
+                  IIC_BSF>, TB, OpSize, Sched<[WriteShift]>;
 def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
-                  IIC_BSF>, TB, OpSize;
+                  IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>;
 def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
-                 [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB;
+                 [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB,
+               Sched<[WriteShift]>;
 def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
-                 IIC_BSF>, TB;
+                 IIC_BSF>, TB, Sched<[WriteShiftLd]>;
 def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                   "bsf{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
-                  IIC_BSF>, TB;
+                  IIC_BSF>, TB, Sched<[WriteShift]>;
 def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                   "bsf{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
-                  IIC_BSF>, TB;
+                  IIC_BSF>, TB, Sched<[WriteShiftLd]>;
 
 def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>,
-                 TB, OpSize;
+                 TB, OpSize, Sched<[WriteShift]>;
 def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
                  IIC_BSR>, TB,
-                 OpSize;
+                 OpSize, Sched<[WriteShiftLd]>;
 def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
-                 [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB;
+                 [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB,
+               Sched<[WriteShift]>;
 def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
-                 IIC_BSR>, TB;
+                 IIC_BSR>, TB, Sched<[WriteShiftLd]>;
 def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                   "bsr{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB;
+                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB,
+               Sched<[WriteShift]>;
 def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                   "bsr{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
-                  IIC_BSR>, TB;
+                  IIC_BSR>, TB, Sched<[WriteShiftLd]>;
 } // Defs = [EFLAGS]
 
-
+let SchedRW = [WriteMicrocoded] in {
 // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
 let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
 def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>;
@@ -971,12 +987,12 @@ def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>;
 def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize;
 def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>;
 def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>;
-
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
-
+let SchedRW = [WriteMove] in {
 let neverHasSideEffects = 1 in {
 def MOV8rr  : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
@@ -987,6 +1003,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
 def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
 }
+
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def MOV8ri  : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
@@ -1004,7 +1021,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
                       "mov{q}\t{$src, $dst|$dst, $src}",
                       [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
 }
+} // SchedRW
 
+let SchedRW = [WriteStore] in {
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
                    [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
@@ -1017,9 +1036,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
 def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                       "mov{q}\t{$src, $dst|$dst, $src}",
                       [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
 
 /// moffs8, moffs16 and moffs32 versions of moves.  The immediate is a
 /// 32-bit offset from the PC.  These are only valid in x86-32 mode.
+let SchedRW = [WriteALU] in {
 def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
                    "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>,
                    Requires<[In32BitMode]>;
@@ -1038,6 +1059,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
 def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
                       "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>,
                      Requires<[In32BitMode]>;
+}
 
 // FIXME: These definitions are utterly broken
 // Just leave them commented out for now because they're useless outside
@@ -1055,7 +1077,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
 */
 
 
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
 def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
                    "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
 def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1066,7 +1088,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                      "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
 }
 
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
 def MOV8rm  : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}",
                 [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>;
@@ -1081,6 +1103,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                  [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>;
 }
 
+let SchedRW = [WriteStore] in {
 def MOV8mr  : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}",
                 [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1093,6 +1116,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
 def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}",
                  [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
 
 // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
 // that they can be used for copying and storing h registers, which can't be
@@ -1101,34 +1125,37 @@ let isCodeGenOnly = 1 in {
 let neverHasSideEffects = 1 in
 def MOV8rr_NOREX : I<0x88, MRMDestReg,
                      (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
-                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", [], IIC_MOV>;
+                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", [], IIC_MOV>,
+                   Sched<[WriteMove]>;
 let mayStore = 1 in
 def MOV8mr_NOREX : I<0x88, MRMDestMem,
                      (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
                      "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", [],
-                     IIC_MOV_MEM>;
+                     IIC_MOV_MEM>, Sched<[WriteStore]>;
 let mayLoad = 1, neverHasSideEffects = 1,
     canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
                      (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
                      "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", [],
-                     IIC_MOV_MEM>;
+                     IIC_MOV_MEM>, Sched<[WriteLoad]>;
 }
 
 
 // Condition code ops, incl. set if equal/not equal/...
+let SchedRW = [WriteALU] in {
 let Defs = [EFLAGS], Uses = [AH] in
 def SAHF     : I<0x9E, RawFrm, (outs),  (ins), "sahf",
                  [(set EFLAGS, (X86sahf AH))], IIC_AHF>;
 let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
 def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", [],
                 IIC_AHF>;  // AH = flags
-
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Bit tests instructions: BT, BTS, BTR, BTC.
 
 let Defs = [EFLAGS] in {
+let SchedRW = [WriteALU] in {
 def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                "bt{w}\t{$src2, $src1|$src1, $src2}",
                [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>,
@@ -1139,13 +1166,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
 def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                "bt{q}\t{$src2, $src1|$src1, $src2}",
                [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB;
+} // SchedRW
 
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
 // perspective, this is pretty bizarre. Make these instructions disassembly
 // only for now.
 
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
   def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                  "bt{w}\t{$src2, $src1|$src1, $src2}",
   //               [(X86bt (loadi16 addr:$src1), GR16:$src2),
@@ -1166,6 +1194,7 @@ let mayLoad = 1, hasSideEffects = 0 in {
                   >, TB;
 }
 
+let SchedRW = [WriteALU] in {
 def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))],
@@ -1178,10 +1207,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))],
                 IIC_BT_RI>, TB;
+} // SchedRW
 
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
+let SchedRW = [WriteALU] in {
 def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
@@ -1194,8 +1225,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt (loadi64 addr:$src1),
                                      i64immSExt8:$src2))], IIC_BT_MI>, TB;
+} // SchedRW
 
 let hasSideEffects = 0 in {
+let SchedRW = [WriteALU] in {
 def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
                 OpSize, TB;
@@ -1203,8 +1236,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
 def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
 
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
                 OpSize, TB;
@@ -1214,6 +1248,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
 }
 
+let SchedRW = [WriteALU] in {
 def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
                     OpSize, TB;
@@ -1221,8 +1256,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
 def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                     "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
 
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
                     OpSize, TB;
@@ -1232,6 +1268,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
 }
 
+let SchedRW = [WriteALU] in {
 def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
                 OpSize, TB;
@@ -1239,8 +1276,9 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
 def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
 
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
                 OpSize, TB;
@@ -1250,6 +1288,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
 }
 
+let SchedRW = [WriteALU] in {
 def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
                     OpSize, TB;
@@ -1257,8 +1296,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
 def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                     "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
 
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
                     OpSize, TB;
@@ -1268,6 +1308,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
 }
 
+let SchedRW = [WriteALU] in {
 def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
                 OpSize, TB;
@@ -1275,8 +1316,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
 def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
 
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
                 OpSize, TB;
@@ -1286,6 +1328,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
 }
 
+let SchedRW = [WriteALU] in {
 def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
                     OpSize, TB;
@@ -1293,8 +1336,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
 def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                     "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
 
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
                     OpSize, TB;
@@ -1315,7 +1359,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
 // operand is referenced, the atomicity is ensured.
 multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
                        InstrItinClass itin> {
-  let Constraints = "$val = $dst" in {
+  let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in {
     def NAME#8rm  : I<opc8, MRMSrcMem, (outs GR8:$dst),
                       (ins GR8:$val, i8mem:$ptr),
                       !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
@@ -1350,6 +1394,7 @@ multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
 defm XCHG    : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>;
 
 // Swap between registers.
+let SchedRW = [WriteALU] in {
 let Constraints = "$val = $dst" in {
 def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
                 "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
@@ -1374,9 +1419,9 @@ def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
                    Requires<[In64BitMode]>;
 def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
                   "xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>;
+} // SchedRW
 
-
-
+let SchedRW = [WriteALU] in {
 def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                 "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
 def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1386,8 +1431,9 @@ def XADD32rr  : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
 def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                    "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+} // SchedRW
 
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def XADD8rm   : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
                  "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
 def XADD16rm  : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
@@ -1400,6 +1446,7 @@ def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
 
 }
 
+let SchedRW = [WriteALU] in {
 def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                    "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
                    IIC_CMPXCHG_REG8>, TB;
@@ -1412,7 +1459,9 @@ def CMPXCHG32rr  : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
 def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                       "cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
                       IIC_CMPXCHG_REG>, TB;
+} // SchedRW
 
+let SchedRW = [WriteALULd, WriteRMW] in {
 let mayLoad = 1, mayStore = 1 in {
 def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
                      "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
@@ -1436,7 +1485,7 @@ let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
 def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
                     "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>,
                     TB, Requires<[HasCmpxchg16b]>;
-
+} // SchedRW
 
 
 // Lock instruction prefix
@@ -1459,17 +1508,21 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs),  (ins), "repne", []>;
 
 
 // String manipulation instructions
+let SchedRW = [WriteMicrocoded] in {
 def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>;
 def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize;
 def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>;
 def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>;
+}
 
+let SchedRW = [WriteSystem] in {
 def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>;
 def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize;
 def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>;
-
+}
 
 // Flag instructions
+let SchedRW = [WriteALU] in {
 def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>;
 def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>;
 def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
@@ -1479,10 +1532,13 @@ def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>;
 def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>;
 
 def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
+}
 
 // Table lookup instructions
-def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>;
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
+           Sched<[WriteLoad]>;
 
+let SchedRW = [WriteMicrocoded] in {
 // ASCII Adjust After Addition
 // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
 def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
@@ -1512,7 +1568,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
 // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
 def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
             Requires<[In32BitMode]>;
+} // SchedRW
 
+let SchedRW = [WriteSystem] in {
 // Check Array Index Against Bounds
 def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                    "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize,
@@ -1528,11 +1586,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
 def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                  "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
                  Requires<[In32BitMode]>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // MOVBE Instructions
 //
 let Predicates = [HasMOVBE] in {
+  let SchedRW = [WriteALULd] in {
   def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                     "movbe{w}\t{$src, $dst|$dst, $src}",
                     [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>,
@@ -1545,6 +1605,8 @@ let Predicates = [HasMOVBE] in {
                      "movbe{q}\t{$src, $dst|$dst, $src}",
                      [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>,
                      T8;
+  }
+  let SchedRW = [WriteStore] in {
   def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                     "movbe{w}\t{$src, $dst|$dst, $src}",
                     [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>,
@@ -1557,6 +1619,7 @@ let Predicates = [HasMOVBE] in {
                      "movbe{q}\t{$src, $dst|$dst, $src}",
                      [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>,
                      T8;
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -1574,6 +1637,21 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
                      [(set GR64:$dst, EFLAGS, (X86rdrand))]>, TB;
 }
 
+//===----------------------------------------------------------------------===//
+// RDSEED Instruction
+//
+let Predicates = [HasRDSEED], Defs = [EFLAGS] in {
+  def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins),
+                    "rdseed{w}\t$dst",
+                    [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize, TB;
+  def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
+                    "rdseed{l}\t$dst",
+                    [(set GR32:$dst, EFLAGS, (X86rdseed))]>, TB;
+  def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins),
+                     "rdseed{q}\t$dst",
+                     [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB;
+}
+
 //===----------------------------------------------------------------------===//
 // LZCNT Instruction
 //
@@ -1755,90 +1833,90 @@ include "X86InstrCompiler.td"
 // Assembler Mnemonic Aliases
 //===----------------------------------------------------------------------===//
 
-def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"cbw",  "cbtw">;
-def : MnemonicAlias<"cwde", "cwtl">;
-def : MnemonicAlias<"cwd",  "cwtd">;
-def : MnemonicAlias<"cdq", "cltd">;
-def : MnemonicAlias<"cdqe", "cltq">;
-def : MnemonicAlias<"cqo", "cqto">;
+def : MnemonicAlias<"cbw",  "cbtw", "att">;
+def : MnemonicAlias<"cwde", "cwtl", "att">;
+def : MnemonicAlias<"cwd",  "cwtd", "att">;
+def : MnemonicAlias<"cdq",  "cltd", "att">;
+def : MnemonicAlias<"cdqe", "cltq", "att">;
+def : MnemonicAlias<"cqo",  "cqto", "att">;
 
 // lret maps to lretl, it is not ambiguous with lretq.
-def : MnemonicAlias<"lret", "lretl">;
+def : MnemonicAlias<"lret", "lretl", "att">;
 
-def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"leavel", "leave", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"loopz", "loope">;
-def : MnemonicAlias<"loopnz", "loopne">;
+def : MnemonicAlias<"loopz",  "loope",  "att">;
+def : MnemonicAlias<"loopnz", "loopne", "att">;
 
-def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popfd",  "popfl">;
+def : MnemonicAlias<"pop",   "popl",  "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop",   "popq",  "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf",  "popfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf",  "popfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl", "att">;
 
 // FIXME: This is wrong for "push reg".  "push %bx" should turn into pushw in
 // all modes.  However: "push (addr)" and "push $42" should default to
 // pushl/pushq depending on the current mode.  Similar for "pop %bx"
-def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushfd", "pushfl">;
+def : MnemonicAlias<"push",   "pushl",  "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push",   "pushq",  "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf",  "pushfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf",  "pushfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl", "att">;
 
-def : MnemonicAlias<"repe", "rep">;
-def : MnemonicAlias<"repz", "rep">;
-def : MnemonicAlias<"repnz", "repne">;
+def : MnemonicAlias<"repe",  "rep",   "att">;
+def : MnemonicAlias<"repz",  "rep",   "att">;
+def : MnemonicAlias<"repnz", "repne", "att">;
 
-def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"retl", "ret", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"salb", "shlb">;
-def : MnemonicAlias<"salw", "shlw">;
-def : MnemonicAlias<"sall", "shll">;
-def : MnemonicAlias<"salq", "shlq">;
+def : MnemonicAlias<"salb", "shlb", "att">;
+def : MnemonicAlias<"salw", "shlw", "att">;
+def : MnemonicAlias<"sall", "shll", "att">;
+def : MnemonicAlias<"salq", "shlq", "att">;
 
-def : MnemonicAlias<"smovb", "movsb">;
-def : MnemonicAlias<"smovw", "movsw">;
-def : MnemonicAlias<"smovl", "movsl">;
-def : MnemonicAlias<"smovq", "movsq">;
+def : MnemonicAlias<"smovb", "movsb", "att">;
+def : MnemonicAlias<"smovw", "movsw", "att">;
+def : MnemonicAlias<"smovl", "movsl", "att">;
+def : MnemonicAlias<"smovq", "movsq", "att">;
 
-def : MnemonicAlias<"ud2a", "ud2">;
-def : MnemonicAlias<"verrw", "verr">;
+def : MnemonicAlias<"ud2a",  "ud2",  "att">;
+def : MnemonicAlias<"verrw", "verr", "att">;
 
 // System instruction aliases.
-def : MnemonicAlias<"iret", "iretl">;
-def : MnemonicAlias<"sysret", "sysretl">;
-def : MnemonicAlias<"sysexit", "sysexitl">;
+def : MnemonicAlias<"iret",    "iretl",    "att">;
+def : MnemonicAlias<"sysret",  "sysretl",  "att">;
+def : MnemonicAlias<"sysexit", "sysexitl", "att">;
 
-def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lgdtl", "lgdt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt", "att">, Requires<[In64BitMode]>;
 
 
 // Floating point stack aliases.
-def : MnemonicAlias<"fcmovz",   "fcmove">;
-def : MnemonicAlias<"fcmova",   "fcmovnbe">;
-def : MnemonicAlias<"fcmovnae", "fcmovb">;
-def : MnemonicAlias<"fcmovna",  "fcmovbe">;
-def : MnemonicAlias<"fcmovae",  "fcmovnb">;
-def : MnemonicAlias<"fcomip",   "fcompi">;
-def : MnemonicAlias<"fildq",    "fildll">;
-def : MnemonicAlias<"fistpq",   "fistpll">;
-def : MnemonicAlias<"fisttpq",  "fisttpll">;
-def : MnemonicAlias<"fldcww",   "fldcw">;
-def : MnemonicAlias<"fnstcww", "fnstcw">;
-def : MnemonicAlias<"fnstsww", "fnstsw">;
-def : MnemonicAlias<"fucomip",  "fucompi">;
-def : MnemonicAlias<"fwait",    "wait">;
+def : MnemonicAlias<"fcmovz",   "fcmove",   "att">;
+def : MnemonicAlias<"fcmova",   "fcmovnbe", "att">;
+def : MnemonicAlias<"fcmovnae", "fcmovb",   "att">;
+def : MnemonicAlias<"fcmovna",  "fcmovbe",  "att">;
+def : MnemonicAlias<"fcmovae",  "fcmovnb",  "att">;
+def : MnemonicAlias<"fcomip",   "fcompi",   "att">;
+def : MnemonicAlias<"fildq",    "fildll",   "att">;
+def : MnemonicAlias<"fistpq",   "fistpll",  "att">;
+def : MnemonicAlias<"fisttpq",  "fisttpll", "att">;
+def : MnemonicAlias<"fldcww",   "fldcw",    "att">;
+def : MnemonicAlias<"fnstcww",  "fnstcw",   "att">;
+def : MnemonicAlias<"fnstsww",  "fnstsw",   "att">;
+def : MnemonicAlias<"fucomip",  "fucompi",  "att">;
+def : MnemonicAlias<"fwait",    "wait",     "att">;
 
 
 class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 127af6f7f93a..49721df7c118 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -20,6 +20,7 @@
 // MMX Multiclasses
 //===----------------------------------------------------------------------===//
 
+let Sched = WriteVecALU in {
 def MMX_INTALU_ITINS : OpndItins<
   IIC_MMX_ALU_RR, IIC_MMX_ALU_RM
 >;
@@ -35,11 +36,14 @@ def MMX_PHADDSUBW : OpndItins<
 def MMX_PHADDSUBD : OpndItins<
   IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM
 >;
+}
 
+let Sched = WriteVecIMul in
 def MMX_PMUL_ITINS : OpndItins<
   IIC_MMX_PMUL, IIC_MMX_PMUL
 >;
 
+let Sched = WriteVecALU in {
 def MMX_PSADBW_ITINS : OpndItins<
   IIC_MMX_PSADBW, IIC_MMX_PSADBW
 >;
@@ -47,11 +51,13 @@ def MMX_PSADBW_ITINS : OpndItins<
 def MMX_MISC_FUNC_ITINS : OpndItins<
   IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG
 >;
+}
 
 def MMX_SHIFT_ITINS : ShiftOpndItins<
   IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI
 >;
 
+let Sched = WriteShuffle in {
 def MMX_UNPCK_H_ITINS : OpndItins<
   IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM
 >;
@@ -67,7 +73,9 @@ def MMX_PCK_ITINS : OpndItins<
 def MMX_PSHUF_ITINS : OpndItins<
   IIC_MMX_PSHUF, IIC_MMX_PSHUF
 >;
+} // Sched
 
+let Sched = WriteCvtF2I in {
 def MMX_CVT_PD_ITINS : OpndItins<
   IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM
 >;
@@ -75,6 +83,7 @@ def MMX_CVT_PD_ITINS : OpndItins<
 def MMX_CVT_PS_ITINS : OpndItins<
   IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM
 >;
+}
 
 let Constraints = "$src1 = $dst" in {
   // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
@@ -84,7 +93,8 @@ let Constraints = "$src1 = $dst" in {
     def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
                  (ins VR64:$src1, VR64:$src2),
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr> {
+                 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+              Sched<[itins.Sched]> {
       let isCommutable = Commutable;
     }
     def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
@@ -92,7 +102,7 @@ let Constraints = "$src1 = $dst" in {
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1,
                                    (bitconvert (load_mmx addr:$src2))))],
-                 itins.rm>;
+                 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 
   multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -101,17 +111,19 @@ let Constraints = "$src1 = $dst" in {
     def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
                                   (ins VR64:$src1, VR64:$src2),
                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>;
+                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+             Sched<[WriteVecShift]>;
     def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
                                   (ins VR64:$src1, i64mem:$src2),
                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (IntId VR64:$src1,
                                     (bitconvert (load_mmx addr:$src2))))],
-                  itins.rm>;
+                  itins.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>;
     def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
                                    (ins VR64:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>;
+           [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>,
+           Sched<[WriteVecShift]>;
   }
 }
 
@@ -120,13 +132,14 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
                                Intrinsic IntId64, OpndItins itins> {
   def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>;
+                   [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>,
+             Sched<[itins.Sched]>;
 
   def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                    [(set VR64:$dst,
                      (IntId64 (bitconvert (memopmmx addr:$src))))],
-                   itins.rm>;
+                   itins.rm>, Sched<[itins.Sched.Folded]>;
 }
 
 /// Binary MMX instructions requiring SSSE3.
@@ -137,13 +150,15 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
   def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
        (ins VR64:$src1, VR64:$src2),
         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>;
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>,
+      Sched<[itins.Sched]>;
   def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
        (ins VR64:$src1, i64mem:$src2),
         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
        [(set VR64:$dst,
          (IntId64 VR64:$src1,
-          (bitconvert (memopmmx addr:$src2))))], itins.rm>;
+          (bitconvert (memopmmx addr:$src2))))], itins.rm>,
+      Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 }
 
@@ -164,9 +179,11 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
                          string asm, OpndItins itins, Domain d> {
   def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                  [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>;
+                  [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>,
+            Sched<[itins.Sched]>;
   def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                  [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>;
+                  [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>,
+            Sched<[itins.Sched.Folded]>;
 }
 
 multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -174,11 +191,11 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
                     PatFrag ld_frag, string asm, Domain d> {
   def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
               asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], 
-              IIC_DEFAULT, d>;
+              NoItinerary, d>;
   def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
                    (ins DstRC:$src1, x86memop:$src2), asm,
               [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], 
-              IIC_DEFAULT, d>;
+              NoItinerary, d>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -197,16 +214,17 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst, 
                          (x86mmx (scalar_to_vector GR32:$src)))],
-                        IIC_MMX_MOV_MM_RM>;
+                        IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
 let canFoldAsLoad = 1 in
 def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst,
                         (x86mmx (scalar_to_vector (loadi32 addr:$src))))],
-                        IIC_MMX_MOV_MM_RM>;
+                        IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
 let mayStore = 1 in
 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
-                        "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
+                        "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>,
+                   Sched<[WriteStore]>;
 
 // Low word of MMX to GPR.
 def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
@@ -214,16 +232,18 @@ def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
 def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
                          "movd\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst,
-                          (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>;
+                          (MMX_X86movd2w (x86mmx VR64:$src)))],
+                          IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
 
 let neverHasSideEffects = 1 in
 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              "movd\t{$src, $dst|$dst, $src}",
-                             [], IIC_MMX_MOV_MM_RM>;
+                             [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
 
 // These are 64 bit moves, but since the OS X assembler doesn't
 // recognize a register-register movq, we write them as
 // movd.
+let SchedRW = [WriteMove] in {
 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
                                "movd\t{$src, $dst|$dst, $src}", 
@@ -237,6 +257,9 @@ let neverHasSideEffects = 1 in
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}", [],
                         IIC_MMX_MOVQ_RR>;
+} // SchedRW
+
+let SchedRW = [WriteLoad] in {
 let canFoldAsLoad = 1 in
 def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
                         "movq\t{$src, $dst|$dst, $src}",
@@ -246,7 +269,9 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                         [(store (x86mmx VR64:$src), addr:$dst)],
                         IIC_MMX_MOVQ_RM>;
+} // SchedRW
 
+let SchedRW = [WriteMove] in {
 def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
                              (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst,
@@ -271,11 +296,12 @@ def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
 def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
                               (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
                               [], IIC_MMX_MOVQ_RR>;
+} // SchedRW
 
 def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                          "movntq\t{$src, $dst|$dst, $src}",
                          [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
-                         IIC_MMX_MOVQ_RM>;
+                         IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>;
 
 let AddedComplexity = 15 in
 // movd to MMX register zero-extends
@@ -283,7 +309,7 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                              "movd\t{$src, $dst|$dst, $src}",
               [(set VR64:$dst,
                     (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))],
-                            IIC_MMX_MOV_MM_RM>;
+                            IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
 let AddedComplexity = 20 in
 def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
                            (ins i32mem:$src),
@@ -291,7 +317,7 @@ def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
           [(set VR64:$dst,
                 (x86mmx (X86vzmovl (x86mmx
                                    (scalar_to_vector (loadi32 addr:$src))))))],
-                            IIC_MMX_MOV_MM_RM>;
+                            IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
 
 // Arithmetic Instructions
 defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
@@ -491,14 +517,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
                              (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))],
-                          IIC_MMX_PSHUF>;
+                          IIC_MMX_PSHUF>, Sched<[WriteShuffle]>;
 def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
                           (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
                              (int_x86_sse_pshuf_w (load_mmx addr:$src1),
                                                    imm:$src2))],
-                          IIC_MMX_PSHUF>;
+                          IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>;
 
 
 
@@ -532,7 +558,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
                            "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                            [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
                                              (iPTR imm:$src2)))],
-                           IIC_MMX_PEXTR>;
+                           IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
 let Constraints = "$src1 = $dst" in {
   def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
                       (outs VR64:$dst), 
@@ -540,7 +566,7 @@ let Constraints = "$src1 = $dst" in {
                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
                                         GR32:$src2, (iPTR imm:$src3)))],
-                      IIC_MMX_PINSRW>;
+                      IIC_MMX_PINSRW>, Sched<[WriteShuffle]>;
 
   def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
                      (outs VR64:$dst),
@@ -549,7 +575,7 @@ let Constraints = "$src1 = $dst" in {
                      [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
                                          (i32 (anyext (loadi16 addr:$src2))),
                                        (iPTR imm:$src3)))],
-                     IIC_MMX_PINSRW>;
+                     IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 // Mask creation
@@ -570,6 +596,7 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
           (x86mmx (MMX_MOVQ64rm addr:$src))>;
 
 // Misc.
+let SchedRW = [WriteShuffle] in {
 let Uses = [EDI] in
 def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                         "maskmovq\t{$mask, $src|$src, $mask}",
@@ -580,6 +607,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                            "maskmovq\t{$mask, $src|$src, $mask}",
                            [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
                            IIC_MMX_MASKMOV>;
+}
 
 // 64-bit bit convert.
 let Predicates = [HasSSE2] in {
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f22a6b29cd2c..384238741b18 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -16,6 +16,8 @@
 class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
   InstrItinClass rr = arg_rr;
   InstrItinClass rm = arg_rm;
+  // InstrSchedModel info.
+  X86FoldableSchedWrite Sched = WriteFAdd;
 }
 
 class SizeItins<OpndItins arg_s, OpndItins arg_d> {
@@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
 
 
 // scalar
+let Sched = WriteFAdd in {
 def SSE_ALU_F32S : OpndItins<
   IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
 >;
@@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins<
 def SSE_ALU_F64S : OpndItins<
   IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
 >;
+}
 
 def SSE_ALU_ITINS_S : SizeItins<
   SSE_ALU_F32S, SSE_ALU_F64S
 >;
 
+let Sched = WriteFMul in {
 def SSE_MUL_F32S : OpndItins<
   IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
 >;
@@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins<
 def SSE_MUL_F64S : OpndItins<
   IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
 >;
+}
 
 def SSE_MUL_ITINS_S : SizeItins<
   SSE_MUL_F32S, SSE_MUL_F64S
 >;
 
+let Sched = WriteFDiv in {
 def SSE_DIV_F32S : OpndItins<
   IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
 >;
@@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins<
 def SSE_DIV_F64S : OpndItins<
   IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
 >;
+}
 
 def SSE_DIV_ITINS_S : SizeItins<
   SSE_DIV_F32S, SSE_DIV_F64S
 >;
 
 // parallel
+let Sched = WriteFAdd in {
 def SSE_ALU_F32P : OpndItins<
   IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
 >;
@@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins<
 def SSE_ALU_F64P : OpndItins<
   IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
 >;
+}
 
 def SSE_ALU_ITINS_P : SizeItins<
   SSE_ALU_F32P, SSE_ALU_F64P
 >;
 
+let Sched = WriteFMul in {
 def SSE_MUL_F32P : OpndItins<
   IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
 >;
@@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins<
 def SSE_MUL_F64P : OpndItins<
   IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
 >;
+}
 
 def SSE_MUL_ITINS_P : SizeItins<
   SSE_MUL_F32P, SSE_MUL_F64P
 >;
 
+let Sched = WriteFDiv in {
 def SSE_DIV_F32P : OpndItins<
   IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
 >;
@@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins<
 def SSE_DIV_F64P : OpndItins<
   IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
 >;
+}
 
 def SSE_DIV_ITINS_P : SizeItins<
   SSE_DIV_F32P, SSE_DIV_F64P
@@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins<
   IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
 >;
 
+let Sched = WriteVecALU in {
 def SSE_INTALU_ITINS_P : OpndItins<
   IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
 >;
@@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins<
 def SSE_INTALUQ_ITINS_P : OpndItins<
   IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
 >;
+}
 
+let Sched = WriteVecIMul in
 def SSE_INTMUL_ITINS_P : OpndItins<
   IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
 >;
@@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
+       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+       Sched<[itins.Sched]>;
   }
   def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
+       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(
                  !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, RC:$src2))], itins.rr>;
+             RC:$src1, RC:$src2))], itins.rr>,
+       Sched<[itins.Sched]>;
   def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
                                           SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, mem_cpat:$src2))], itins.rm>;
+             RC:$src1, mem_cpat:$src2))], itins.rm>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// sse12_fp_packed - SSE 1 & 2 packed instructions class
@@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
+       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+       Sched<[itins.Sched]>;
   let mayLoad = 1 in
     def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
-          itins.rm, d>;
+          itins.rm, d>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
@@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       pat_rr, IIC_DEFAULT, d>;
+       pat_rr, NoItinerary, d>,
+       Sched<[WriteVecLogic]>;
   def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       pat_rm, IIC_DEFAULT, d>;
+       pat_rm, NoItinerary, d>,
+       Sched<[WriteVecLogicLd, ReadAfterLd]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -345,7 +370,7 @@ let Predicates = [HasAVX] in {
 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
 // This is expanded by ExpandPostRAPseudos.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1 in {
+    isPseudo = 1, SchedRW = [WriteZero] in {
   def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
                    [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
   def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
@@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-zeros value if folding it would be beneficial.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1 in {
+    isPseudo = 1, SchedRW = [WriteZero] in {
 def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
                [(set VR128:$dst, (v4f32 immAllZerosV))]>;
 }
@@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
 // at the rename stage without using any execution unit, so SET0PSY
 // and SET0PDY can be used for vector int instructions without penalty
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1, Predicates = [HasAVX] in {
+    isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
 def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
                  [(set VR256:$dst, (v8f32 immAllZerosV))]>;
 }
@@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-ones value if folding it would be beneficial.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1 in {
+    isPseudo = 1, SchedRW = [WriteZero] in {
   def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
                        [(set VR128:$dst, (v4i32 immAllOnesV))]>;
   let Predicates = [HasAVX2] in
@@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
               !strconcat(base_opc, asm_opr),
               [(set VR128:$dst, (vt (OpNode VR128:$src1,
                                  (scalar_to_vector RC:$src2))))],
-              IIC_SSE_MOV_S_RR>;
+              IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
 
   // For the disassembler
   let isCodeGenOnly = 1, hasSideEffects = 0 in
   def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
                   (ins VR128:$src1, RC:$src2),
                   !strconcat(base_opc, asm_opr),
-                  [], IIC_SSE_MOV_S_RR>;
+                  [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
 }
 
 multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
@@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
   def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                      [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
-                     VEX, VEX_LIG;
+                     VEX, VEX_LIG, Sched<[WriteStore]>;
   // SSE1 & 2
   let Constraints = "$src1 = $dst" in {
     defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
@@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
 
   def NAME#mr   : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
+                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+                  Sched<[WriteStore]>;
 }
 
 // Loading from memory automatically zeroing upper bits.
@@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
   def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                      [(set RC:$dst, (mem_pat addr:$src))],
-                     IIC_SSE_MOV_S_RM>, VEX, VEX_LIG;
+                     IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
   def NAME#rm   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                      [(set RC:$dst, (mem_pat addr:$src))],
-                     IIC_SSE_MOV_S_RM>;
+                     IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
 }
 
 defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
@@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
                             bit IsReMaterializable = 1> {
 let neverHasSideEffects = 1 in
   def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
-              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
+           Sched<[WriteMove]>;
 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
   def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-                   [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
+                   [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
+           Sched<[WriteLoad]>;
 }
 
 defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
@@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
                               "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
                               TB, OpSize;
 
+let SchedRW = [WriteStore] in {
 def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
                    [(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
                    [(store (v4f64 VR256:$src), addr:$dst)],
                    IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+} // SchedRW
 
 // For disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
   def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
                           (ins VR128:$src),
                           "movaps\t{$src, $dst|$dst, $src}", [],
@@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
 def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
           (VMOVUPDYmr addr:$dst, VR256:$src)>;
 
+let SchedRW = [WriteStore] in {
 def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
                    [(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
                    [(store (v2f64 VR128:$src), addr:$dst)],
                    IIC_SSE_MOVU_P_MR>;
+} // SchedRW
 
 // For disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
   def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                          "movaps\t{$src, $dst|$dst, $src}", [],
                          IIC_SSE_MOVA_P_RR>;
@@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in {
 
 // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
 // bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
 def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                        "movaps\t{$src, $dst|$dst, $src}", [],
                        IIC_SSE_MOVA_P_RR>, VEX;
@@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
 
 // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
 // bits are disregarded. FIXME: Set encoding to pseudo!
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
 let isCodeGenOnly = 1 in {
   def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
                          "movaps\t{$src, $dst|$dst, $src}",
@@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
      [(set VR128:$dst,
        (psnode VR128:$src1,
               (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
-              itin, SSEPackedSingle>, TB;
+              itin, SSEPackedSingle>, TB,
+     Sched<[WriteShuffleLd, ReadAfterLd]>;
 
   def PDrm : PI<opc, MRMSrcMem,
          (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
          !strconcat(base_opc, "d", asm_opr),
      [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
                               (scalar_to_vector (loadf64 addr:$src2)))))],
-              itin, SSEPackedDouble>, TB, OpSize;
+              itin, SSEPackedDouble>, TB, OpSize,
+     Sched<[WriteShuffleLd, ReadAfterLd]>;
 
 }
 
@@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in {
                                     IIC_SSE_MOV_LH>;
 }
 
+let SchedRW = [WriteStore] in {
 def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    [(store (f64 (vector_extract (v2f64 VR128:$src),
                                  (iPTR 0))), addr:$dst)],
                                  IIC_SSE_MOV_LH>;
+} // SchedRW
 
 let Predicates = [HasAVX] in {
   // Shuffle with VMOVLPS
@@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in {
                                     IIC_SSE_MOV_LH>;
 }
 
+let SchedRW = [WriteStore] in {
 // v2f64 extract element 1 is always custom lowered to unpack high to low
 // and extract element 0 so the non-store version isn't too horrible.
 def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    [(store (f64 (vector_extract
                                  (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
                                  (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+} // SchedRW
 
 let Predicates = [HasAVX] in {
   // VMOVHPS patterns
@@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in {
                       [(set VR128:$dst,
                         (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
                         IIC_SSE_MOV_LH>,
-                      VEX_4V;
+                      VEX_4V, Sched<[WriteShuffle]>;
   def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
                                        (ins VR128:$src1, VR128:$src2),
                       "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set VR128:$dst,
                         (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
                         IIC_SSE_MOV_LH>,
-                      VEX_4V;
+                      VEX_4V, Sched<[WriteShuffle]>;
 }
 let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
   def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
                       "movlhps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
                         (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
-                        IIC_SSE_MOV_LH>;
+                        IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
   def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
                                        (ins VR128:$src1, VR128:$src2),
                       "movhlps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
                         (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
-                        IIC_SSE_MOV_LH>;
+                        IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins<
   IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
 >;
 
+let Sched = WriteCvtI2F in
 def SSE_CVT_PS : OpndItins<
   IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
 >;
 
+let Sched = WriteCvtI2F in
 def SSE_CVT_Scalar : OpndItins<
   IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
 >;
 
+let Sched = WriteCvtF2I in
 def SSE_CVT_SS2SI_32 : OpndItins<
   IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
 >;
 
+let Sched = WriteCvtF2I in
 def SSE_CVT_SS2SI_64 : OpndItins<
   IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
 >;
 
+let Sched = WriteCvtF2I in
 def SSE_CVT_SD2SI : OpndItins<
   IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
 >;
@@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                      string asm, OpndItins itins> {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
                         [(set DstRC:$dst, (OpNode SrcRC:$src))],
-                        itins.rr>;
+                        itins.rr>, Sched<[itins.Sched]>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
                         [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
-                        itins.rm>;
+                        itins.rm>, Sched<[itins.Sched.Folded]>;
 }
 
 multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                        OpndItins itins> {
 let neverHasSideEffects = 1 in {
   def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-             [], itins.rr, d>;
+             [], itins.rr, d>, Sched<[itins.Sched]>;
   let mayLoad = 1 in
   def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-             [], itins.rm, d>;
+             [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
 }
 }
 
@@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                           X86MemOperand x86memop, string asm> {
 let neverHasSideEffects = 1 in {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
-              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+           Sched<[WriteCvtI2F]>;
   let mayLoad = 1 in
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
               (ins DstRC:$src1, x86memop:$src),
-              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+           Sched<[WriteCvtI2FLd, ReadAfterLd]>;
 } // neverHasSideEffects = 1
 }
 
@@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          string asm, OpndItins itins> {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
+              [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
+           Sched<[itins.Sched]>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
+              [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
+           Sched<[itins.Sched.Folded]>;
 }
 
 multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
                   !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
                   !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
               [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
-              itins.rr>;
+              itins.rr>, Sched<[itins.Sched]>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
               (ins DstRC:$src1, x86memop:$src2),
               !if(Is2Addr,
                   !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
                   !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
               [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
-              itins.rm>;
+              itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
@@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in {
 def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
                        (ins FR64:$src1, FR64:$src2),
                       "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
-                      IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
+                      IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
+                      Sched<[WriteCvtF2F]>;
 let mayLoad = 1 in
 def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                        (ins FR64:$src1, f64mem:$src2),
                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [], IIC_SSE_CVT_Scalar_RM>,
-                      XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+                      XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
+                      Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
 def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
@@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround FR64:$src))],
-                      IIC_SSE_CVT_Scalar_RR>;
+                      IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
 def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround (loadf64 addr:$src)))],
                       IIC_SSE_CVT_Scalar_RM>,
                       XD,
-                  Requires<[UseSSE2, OptForSize]>;
+                  Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
 
 def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                        "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
-                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+                       Sched<[WriteCvtF2F]>;
 def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                        "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
                                           VR128:$src1, sse_load_f64:$src2))],
-                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+                       Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 
 let Constraints = "$src1 = $dst" in {
 def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
@@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
                        "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
-                       IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>;
+                       IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
+                       Sched<[WriteCvtF2F]>;
 def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                        "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
                                           VR128:$src1, sse_load_f64:$src2))],
-                       IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>;
+                       IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
+                       Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
 // Convert scalar single to scalar double
@@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
                     (ins FR32:$src1, FR32:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [], IIC_SSE_CVT_Scalar_RR>,
-                    XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+                    XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
+                    Sched<[WriteCvtF2F]>;
 let mayLoad = 1 in
 def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     (ins FR32:$src1, f32mem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [], IIC_SSE_CVT_Scalar_RM>,
-                    XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+                    XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
+                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
 def : Pat<(f64 (fextend FR32:$src)),
@@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (fextend FR32:$src))],
                    IIC_SSE_CVT_Scalar_RR>, XS,
-                 Requires<[UseSSE2]>;
+                 Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
 def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (extloadf32 addr:$src))],
                    IIC_SSE_CVT_Scalar_RM>, XS,
-                 Requires<[UseSSE2, OptForSize]>;
+                 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
 
 // extload f32 -> f64.  This matches load+fextend because we have a hack in
 // the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
-                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
+                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+                    Sched<[WriteCvtF2F]>;
 def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
-                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
+                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
 def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
-                    IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>;
+                    IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>,
+                    Sched<[WriteCvtF2F]>;
 def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
-                    IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>;
+                    IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
+                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
 // Convert packed single/double fp to doubleword
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
-                       IIC_SSE_CVT_PS_RR>, VEX;
+                       IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvtps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
-                       IIC_SSE_CVT_PS_RM>, VEX;
+                       IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
 def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR256:$dst,
                           (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
-                        IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+                        IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR256:$dst,
                           (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
-                        IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+                        IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
-                     IIC_SSE_CVT_PS_RR>;
+                     IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
-                     IIC_SSE_CVT_PS_RM>;
+                     IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
 
 
 // Convert Packed Double FP to Packed DW Integers
@@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in {
 def VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
-                       VEX;
+                       VEX, Sched<[WriteCvtF2I]>;
 
 // XMM only
 def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
 def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
+                         (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+                       Sched<[WriteCvtF2ILd]>;
 
 // YMM only
 def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                        "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L;
+                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
+                       Sched<[WriteCvtF2I]>;
 def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                        "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
-                       VEX, VEX_L;
+                       VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
 def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
                 (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
 }
@@ -1895,11 +1956,11 @@ def CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
-                      IIC_SSE_CVT_PD_RM>;
+                      IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
 def CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
-                      IIC_SSE_CVT_PD_RR>;
+                      IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
 
 // Convert with truncation packed single/double fp to doubleword
 // SSE2 packed instructions with XS prefix
@@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
                            (int_x86_sse2_cvttps2dq VR128:$src))],
-                         IIC_SSE_CVT_PS_RR>, VEX;
+                         IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
 def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvttps2dq
                                             (memopv4f32 addr:$src)))],
-                         IIC_SSE_CVT_PS_RM>, VEX;
+                         IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
 def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
                           [(set VR256:$dst,
                             (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
-                          IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+                          IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
                           [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
                                              (memopv8f32 addr:$src)))],
-                          IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+                          IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
+                          Sched<[WriteCvtF2ILd]>;
 
 def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
-                       IIC_SSE_CVT_PS_RR>;
+                       IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
 def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
-                       IIC_SSE_CVT_PS_RM>;
+                       IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
 
 let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                               (int_x86_sse2_cvttpd2dq VR128:$src))],
-                              IIC_SSE_CVT_PD_RR>, VEX;
+                              IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
 
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
@@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvttpd2dqx\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                             (memopv2f64 addr:$src)))],
-                         IIC_SSE_CVT_PD_RM>, VEX;
+                         IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
 
 // YMM only
 def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
                            (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
-                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
                           (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
-                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
 def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
                 (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
 
@@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in {
 def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
-                      IIC_SSE_CVT_PD_RR>;
+                      IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
 def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                         (memopv2f64 addr:$src)))],
-                                        IIC_SSE_CVT_PD_RM>;
+                                        IIC_SSE_CVT_PD_RM>,
+                      Sched<[WriteCvtF2ILd]>;
 
 // Convert packed single to packed double
 let Predicates = [HasAVX] in {
@@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in {
 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
-                     IIC_SSE_CVT_PD_RR>, TB, VEX;
+                     IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>;
 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
-                    IIC_SSE_CVT_PD_RM>, TB, VEX;
+                    IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>;
 def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
-                     IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L;
+                     IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>;
 def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
-                     IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L;
+                     IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
 }
 
 let Predicates = [UseSSE2] in {
 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
-                       IIC_SSE_CVT_PD_RR>, TB;
+                       IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>;
 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                    "cvtps2pd\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
-                   IIC_SSE_CVT_PD_RM>, TB;
+                   IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>;
 }
 
 // Convert Packed DW Integers to Packed Double FP
@@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in {
 let neverHasSideEffects = 1, mayLoad = 1 in
 def VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
-                     []>, VEX;
+                     []>, VEX, Sched<[WriteCvtI2FLd]>;
 def VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
-                       (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+                       (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
+                   Sched<[WriteCvtI2F]>;
 def VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvtdq2_pd_256
-                        (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L;
+                        (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+                    Sched<[WriteCvtI2FLd]>;
 def VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
-                       (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L;
+                       (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
+                    Sched<[WriteCvtI2F]>;
 }
 
 let neverHasSideEffects = 1, mayLoad = 1 in
 def CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
-                       IIC_SSE_CVT_PD_RR>;
+                       IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
 def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
-                       IIC_SSE_CVT_PD_RM>;
+                       IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
 
 // AVX 256-bit register conversion intrinsics
 let Predicates = [HasAVX] in {
@@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in {
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
-                       IIC_SSE_CVT_PD_RR>, VEX;
+                       IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
 
 // XMM only
 def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                         "cvtpd2psx\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
-                        IIC_SSE_CVT_PD_RM>, VEX;
+                        IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
 
 // YMM only
 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
-                        IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+                        IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
-                        IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+                        IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
 def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
                 (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
 
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
-                     IIC_SSE_CVT_PD_RR>;
+                     IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
-                     IIC_SSE_CVT_PD_RM>;
+                     IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
 
 
 // AVX 256-bit register conversion intrinsics
@@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
   def rr : SIi8<0xC2, MRMSrcReg,
                 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
-                itins.rr>;
+                itins.rr>, Sched<[itins.Sched]>;
   def rm : SIi8<0xC2, MRMSrcMem,
                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1),
                                          (ld_frag addr:$src2), imm:$cc))],
-                                         itins.rm>;
+                                         itins.rm>,
+           Sched<[itins.Sched.Folded, ReadAfterLd]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let neverHasSideEffects = 1 in {
     def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
                       (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
-                      IIC_SSE_ALU_F32S_RR>;
+                      IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
     let mayLoad = 1 in
     def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
                       (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
-                      IIC_SSE_ALU_F32S_RM>;
+                      IIC_SSE_ALU_F32S_RM>,
+                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 }
 
@@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
                       (ins VR128:$src1, VR128:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
                                                VR128:$src, imm:$cc))],
-                                               itins.rr>;
+                                               itins.rr>,
+           Sched<[itins.Sched]>;
   def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
                       (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
                                                (load addr:$src), imm:$cc))],
-                                               itins.rm>;
+                                               itins.rm>,
+           Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 // Aliases to match intrinsics which expect XMM operand(s).
@@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
   def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
                      [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
-                     IIC_SSE_COMIS_RR, d>;
+                     IIC_SSE_COMIS_RR, d>,
+          Sched<[WriteFAdd]>;
   def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
                      [(set EFLAGS, (OpNode (vt RC:$src1),
                                            (ld_frag addr:$src2)))],
-                                           IIC_SSE_COMIS_RM, d>;
+                                           IIC_SSE_COMIS_RM, d>,
+          Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 
 let Defs = [EFLAGS] in {
@@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
   def rri : PIi8<0xC2, MRMSrcReg,
              (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
              [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
-             IIC_SSE_CMPP_RR, d>;
+             IIC_SSE_CMPP_RR, d>,
+            Sched<[WriteFAdd]>;
   def rmi : PIi8<0xC2, MRMSrcMem,
              (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
              [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
-             IIC_SSE_CMPP_RM, d>;
+             IIC_SSE_CMPP_RM, d>,
+            Sched<[WriteFAddLd, ReadAfterLd]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let neverHasSideEffects = 1 in {
     def rri_alt : PIi8<0xC2, MRMSrcReg,
                (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
-               asm_alt, [], IIC_SSE_CMPP_RR, d>;
+               asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
     def rmi_alt : PIi8<0xC2, MRMSrcMem,
                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
-               asm_alt, [], IIC_SSE_CMPP_RM, d>;
+               asm_alt, [], IIC_SSE_CMPP_RM, d>,
+               Sched<[WriteFAddLd, ReadAfterLd]>;
   }
 }
 
@@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
   def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
                    (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
-                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+            Sched<[WriteShuffleLd, ReadAfterLd]>;
   let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
     def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
                    (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
-                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+              Sched<[WriteShuffle]>;
 }
 
 defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
                 (outs RC:$dst), (ins RC:$src1, RC:$src2),
                 asm, [(set RC:$dst,
                            (vt (OpNode RC:$src1, RC:$src2)))],
-                           IIC_SSE_UNPCK, d>;
+                           IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>;
     def rm : PI<opc, MRMSrcMem,
                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
                 asm, [(set RC:$dst,
                            (vt (OpNode RC:$src1,
                                        (mem_frag addr:$src2))))],
-                                       IIC_SSE_UNPCK, d>;
+                                       IIC_SSE_UNPCK, d>,
+             Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
                                 Domain d> {
   def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-                     [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
+                     [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+             Sched<[WriteVecLogic]>;
   def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
-                IIC_SSE_MOVMSK, d>, REX_W;
+                IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in {
   // Assembler Only
   def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
              "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
-             SSEPackedSingle>, TB, VEX;
+             SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
   def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
              "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
              SSEPackedDouble>, TB,
-             OpSize, VEX;
+             OpSize, VEX, Sched<[WriteVecLogic]>;
   def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
              "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
-             SSEPackedSingle>, TB, VEX, VEX_L;
+             SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
   def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
              "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
              SSEPackedDouble>, TB,
-             OpSize, VEX, VEX_L;
+             OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
 }
 
 defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+       Sched<[itins.Sched]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
@@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (OpVT (OpNode RC:$src1,
                                      (bitconvert (memop_frag addr:$src2)))))],
-                                     itins.rm>;
+                                     itins.rm>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in {
 ///
 /// And, we have a special variant form for a full-vector intrinsic form.
 
+let Sched = WriteFSqrt in {
 def SSE_SQRTP : OpndItins<
   IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
 >;
@@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins<
 def SSE_SQRTS : OpndItins<
   IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
 >;
+}
 
+let Sched = WriteFRcp in {
 def SSE_RCPP : OpndItins<
   IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
 >;
@@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins<
 def SSE_RCPS : OpndItins<
   IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
 >;
+}
 
 /// sse1_fp_unop_s - SSE1 unops in scalar form.
 multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
                       (ins FR32:$src1, FR32:$src2),
                       !strconcat("v", OpcodeStr,
                                  "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG;
+                      []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
   let mayLoad = 1 in {
   def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
                       (ins FR32:$src1,f32mem:$src2),
                       !strconcat("v", OpcodeStr,
                                  "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG;
+                      []>, VEX_4V, VEX_LIG,
+                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
   def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
                       (ins VR128:$src1, ssmem:$src2),
                       !strconcat("v", OpcodeStr,
                                  "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG;
+                      []>, VEX_4V, VEX_LIG,
+                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 }
 
   def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode FR32:$src))]>;
+                [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
   // For scalar unary operations, fold a load into the operation
   // only in OptForSize mode. It eliminates an instruction, but it also
   // eliminates a whole-register clobber (the load), so it introduces a
@@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
   def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                 [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
-            Requires<[UseSSE1, OptForSize]>;
+            Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
   def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
+                    [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
+                Sched<[itins.Sched]>;
   def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
                     !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
+                    [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
+                Sched<[itins.Sched.Folded]>;
 }
 
 /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
@@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
                        (ins FR32:$src1, FR32:$src2),
                        !strconcat("v", OpcodeStr,
                            "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                []>, VEX_4V, VEX_LIG;
+                []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
   let mayLoad = 1 in {
   def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
                       (ins FR32:$src1,f32mem:$src2),
                       !strconcat("v", OpcodeStr,
                                  "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG;
+                      []>, VEX_4V, VEX_LIG,
+                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
   def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
                       (ins VR128:$src1, ssmem:$src2),
                       !strconcat("v", OpcodeStr,
                                  "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG;
+                      []>, VEX_4V, VEX_LIG,
+                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 }
 
   def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode FR32:$src))]>;
+                [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
   // For scalar unary operations, fold a load into the operation
   // only in OptForSize mode. It eliminates an instruction, but it also
   // eliminates a whole-register clobber (the load), so it introduces a
@@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
   def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                 [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
-            Requires<[UseSSE1, OptForSize]>;
+            Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
   let Constraints = "$src1 = $dst" in {
     def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
                       (ins VR128:$src1, VR128:$src2),
                       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                      [], itins.rr>;
+                      [], itins.rr>, Sched<[itins.Sched]>;
     let mayLoad = 1, hasSideEffects = 0 in
     def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
                       (ins VR128:$src1, ssmem:$src2),
                       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                      [], itins.rm>;
+                      [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 }
 
@@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in {
                        !strconcat("v", OpcodeStr,
                                   "ps\t{$src, $dst|$dst, $src}"),
                        [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
-                       itins.rr>, VEX;
+                       itins.rr>, VEX, Sched<[itins.Sched]>;
   def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        !strconcat("v", OpcodeStr,
                                   "ps\t{$src, $dst|$dst, $src}"),
                        [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
-                       itins.rm>, VEX;
+                       itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
   def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                         !strconcat("v", OpcodeStr,
                                    "ps\t{$src, $dst|$dst, $src}"),
                         [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
-                        itins.rr>, VEX, VEX_L;
+                        itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
   def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                         !strconcat("v", OpcodeStr,
                                    "ps\t{$src, $dst|$dst, $src}"),
                         [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
-                        itins.rm>, VEX, VEX_L;
+                        itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
 }
 
   def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
+                [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
+            Sched<[itins.Sched]>;
   def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
+                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
+            Sched<[itins.Sched.Folded]>;
 }
 
 /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
@@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in {
                            !strconcat("v", OpcodeStr,
                                       "ps\t{$src, $dst|$dst, $src}"),
                            [(set VR128:$dst, (V4F32Int VR128:$src))],
-                           itins.rr>, VEX;
+                           itins.rr>, VEX, Sched<[itins.Sched]>;
   def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                           !strconcat("v", OpcodeStr,
                           "ps\t{$src, $dst|$dst, $src}"),
                           [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
-                          itins.rm>, VEX;
+                          itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
   def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                             !strconcat("v", OpcodeStr,
                                        "ps\t{$src, $dst|$dst, $src}"),
                             [(set VR256:$dst, (V8F32Int VR256:$src))],
-                            itins.rr>, VEX, VEX_L;
+                            itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
   def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
                           (ins f256mem:$src),
                           !strconcat("v", OpcodeStr,
                                     "ps\t{$src, $dst|$dst, $src}"),
                           [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
-                          itins.rm>, VEX, VEX_L;
+                          itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
 }
 
   def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V4F32Int VR128:$src))],
-                    itins.rr>;
+                    itins.rr>, Sched<[itins.Sched]>;
   def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
-                    itins.rm>;
+                    itins.rm>, Sched<[itins.Sched.Folded]>;
 }
 
 /// sse2_fp_unop_s - SSE2 unops in scalar form.
@@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
                       (ins FR64:$src1, FR64:$src2),
                       !strconcat("v", OpcodeStr,
                                  "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG;
+                      []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
   let mayLoad = 1 in {
   def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
                       (ins FR64:$src1,f64mem:$src2),
                       !strconcat("v", OpcodeStr,
                                  "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG;
+                      []>, VEX_4V, VEX_LIG,
+                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
   def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
                       (ins VR128:$src1, sdmem:$src2),
                       !strconcat("v", OpcodeStr,
                                  "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG;
+                      []>, VEX_4V, VEX_LIG,
+                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 }
 
   def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
+                [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
+            Sched<[itins.Sched]>;
   // See the comments in sse1_fp_unop_s for why this is OptForSize.
   def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                 [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
-            Requires<[UseSSE2, OptForSize]>;
+            Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
   def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
+                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
+                Sched<[itins.Sched]>;
   def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
                     !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
+                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
+                Sched<[itins.Sched.Folded]>;
 }
 
 /// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in {
                        !strconcat("v", OpcodeStr,
                                   "pd\t{$src, $dst|$dst, $src}"),
                        [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
-                       itins.rr>, VEX;
+                       itins.rr>, VEX, Sched<[itins.Sched]>;
   def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        !strconcat("v", OpcodeStr,
                                   "pd\t{$src, $dst|$dst, $src}"),
                        [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
-                       itins.rm>, VEX;
+                       itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
   def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                         !strconcat("v", OpcodeStr,
                                    "pd\t{$src, $dst|$dst, $src}"),
                         [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
-                        itins.rr>, VEX, VEX_L;
+                        itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
   def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                         !strconcat("v", OpcodeStr,
                                    "pd\t{$src, $dst|$dst, $src}"),
                         [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
-                        itins.rm>, VEX, VEX_L;
+                        itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
 }
 
   def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
               !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
+              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
+            Sched<[itins.Sched]>;
   def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
+                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
+            Sched<[itins.Sched.Folded]>;
 }
 
 // Square root.
@@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in {
 //===----------------------------------------------------------------------===//
 
 let AddedComplexity = 400 in { // Prefer non-temporal versions
-  def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
-                       (ins f128mem:$dst, VR128:$src),
-                       "movntps\t{$src, $dst|$dst, $src}",
-                       [(alignednontemporalstore (v4f32 VR128:$src),
-                                                 addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX;
-  def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
-                       (ins f128mem:$dst, VR128:$src),
-                       "movntpd\t{$src, $dst|$dst, $src}",
-                       [(alignednontemporalstore (v2f64 VR128:$src),
-                                                 addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX;
-
-  let ExeDomain = SSEPackedInt in
-  def VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
-                           (ins f128mem:$dst, VR128:$src),
-                           "movntdq\t{$src, $dst|$dst, $src}",
-                           [(alignednontemporalstore (v2i64 VR128:$src),
-                                                     addr:$dst)],
-                                                     IIC_SSE_MOVNT>, VEX;
-
-  def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
-            (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
-
-  def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
-                       (ins f256mem:$dst, VR256:$src),
-                       "movntps\t{$src, $dst|$dst, $src}",
-                       [(alignednontemporalstore (v8f32 VR256:$src),
-                                                 addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX, VEX_L;
-  def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
-                       (ins f256mem:$dst, VR256:$src),
-                       "movntpd\t{$src, $dst|$dst, $src}",
-                       [(alignednontemporalstore (v4f64 VR256:$src),
-                                                 addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX, VEX_L;
-  let ExeDomain = SSEPackedInt in
-  def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
-                      (ins f256mem:$dst, VR256:$src),
-                      "movntdq\t{$src, $dst|$dst, $src}",
-                      [(alignednontemporalstore (v4i64 VR256:$src),
-                                                addr:$dst)],
-                                                IIC_SSE_MOVNT>, VEX, VEX_L;
-}
+let SchedRW = [WriteStore] in {
+def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+                     (ins f128mem:$dst, VR128:$src),
+                     "movntps\t{$src, $dst|$dst, $src}",
+                     [(alignednontemporalstore (v4f32 VR128:$src),
+                                               addr:$dst)],
+                                               IIC_SSE_MOVNT>, VEX;
+def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+                     (ins f128mem:$dst, VR128:$src),
+                     "movntpd\t{$src, $dst|$dst, $src}",
+                     [(alignednontemporalstore (v2f64 VR128:$src),
+                                               addr:$dst)],
+                                               IIC_SSE_MOVNT>, VEX;
+
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
+                         (ins f128mem:$dst, VR128:$src),
+                         "movntdq\t{$src, $dst|$dst, $src}",
+                         [(alignednontemporalstore (v2i64 VR128:$src),
+                                                   addr:$dst)],
+                                                   IIC_SSE_MOVNT>, VEX;
+
+def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+                     (ins f256mem:$dst, VR256:$src),
+                     "movntps\t{$src, $dst|$dst, $src}",
+                     [(alignednontemporalstore (v8f32 VR256:$src),
+                                               addr:$dst)],
+                                               IIC_SSE_MOVNT>, VEX, VEX_L;
+def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+                     (ins f256mem:$dst, VR256:$src),
+                     "movntpd\t{$src, $dst|$dst, $src}",
+                     [(alignednontemporalstore (v4f64 VR256:$src),
+                                               addr:$dst)],
+                                               IIC_SSE_MOVNT>, VEX, VEX_L;
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+                    (ins f256mem:$dst, VR256:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4i64 VR256:$src),
+                                              addr:$dst)],
+                                              IIC_SSE_MOVNT>, VEX, VEX_L;
 
-let AddedComplexity = 400 in { // Prefer non-temporal versions
 def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     "movntps\t{$src, $dst|$dst, $src}",
                     [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
@@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
                     IIC_SSE_MOVNT>;
 
-def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
-          (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
-
 // There is no AVX form for instructions below this point
 def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                  "movnti{l}\t{$src, $dst|$dst, $src}",
@@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                      [(nontemporalstore (i64 GR64:$src), addr:$dst)],
                      IIC_SSE_MOVNT>,
                   TB, Requires<[HasSSE2]>;
-}
+} // SchedRW = [WriteStore]
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+          (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
+} // AddedComplexity
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Prefetch and memory fence
 //===----------------------------------------------------------------------===//
 
 // Prefetch intrinsic.
-let Predicates = [HasSSE1] in {
+let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
 def PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
     "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
     IIC_SSE_PREFETCH>, TB;
@@ -3402,6 +3502,8 @@ def PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
     IIC_SSE_PREFETCH>, TB;
 }
 
+// FIXME: How should these memory instructions be modeled?
+let SchedRW = [WriteLoad] in {
 // Flush cache
 def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
                "clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
@@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
 def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
                "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
                TB, Requires<[HasSSE2]>;
+} // SchedRW
 
 def : Pat<(X86SFence), (SFENCE)>;
 def : Pat<(X86LFence), (LFENCE)>;
@@ -3432,17 +3535,17 @@ def : Pat<(X86MFence), (MFENCE)>;
 
 def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
                   "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
-                  IIC_SSE_LDMXCSR>, VEX;
+                  IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>;
 def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
                   "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
-                  IIC_SSE_STMXCSR>, VEX;
+                  IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
 
 def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
                   "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
-                  IIC_SSE_LDMXCSR>;
+                  IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>;
 def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
                   "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
-                  IIC_SSE_STMXCSR>;
+                  IIC_SSE_STMXCSR>, Sched<[WriteStore]>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
 
 let ExeDomain = SSEPackedInt in { // SSE integer instructions
 
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
 def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
                     VEX;
@@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
 }
 
 // For Disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
 def VMOVDQArr_REV  : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                         "movdqa\t{$src, $dst|$dst, $src}", [],
                         IIC_SSE_MOVA_P_RR>,
@@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
 }
 
 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
-    neverHasSideEffects = 1 in {
+    neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
 def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
                    VEX;
@@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in {
 }
 }
 
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
 def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
                      (ins i128mem:$dst, VR128:$src),
                      "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
 }
 }
 
+let SchedRW = [WriteMove] in {
 let neverHasSideEffects = 1 in
 def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
@@ -3538,9 +3642,10 @@ def MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                        "movdqu\t{$src, $dst|$dst, $src}",
                        [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
 }
+} // SchedRW
 
 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
-    neverHasSideEffects = 1 in {
+    neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
 def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
                    [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3552,7 +3657,7 @@ def MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                  XS, Requires<[UseSSE2]>;
 }
 
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
 def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
                    [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
 // SSE2 - Packed Integer Arithmetic Instructions
 //===---------------------------------------------------------------------===//
 
+let Sched = WriteVecIMul in
 def SSE_PMADD : OpndItins<
   IIC_SSE_PMADD, IIC_SSE_PMADD
 >;
@@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+       [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
+      Sched<[itins.Sched]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
-       itins.rm>;
+       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
@@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
-        itins.rr>;
+        itins.rr>, Sched<[WriteVecShift]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, i128mem:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode RC:$src1,
-                       (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+                       (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+      Sched<[WriteVecShiftLd, ReadAfterLd]>;
   def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
        (ins RC:$src1, i32i8imm:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
+       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+       Sched<[WriteVecShift]>;
 }
 
 /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
@@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+       Sched<[itins.Sched]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
-                                     (bitconvert (memop_frag addr:$src2)))))]>;
+                                     (bitconvert (memop_frag addr:$src2)))))]>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                             VR128, v4i32, v4i32, bc_v4i32,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
   // 128-bit logical shifts.
   def VPSLLDQri : PDIi8<0x73, MRM7r,
                     (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                              VR256, v8i32, v4i32, bc_v4i32,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
   // 256-bit logical shifts.
   def VPSLLDQYri : PDIi8<0x73, MRM7r,
                     (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
@@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
                            VR128, v4i32, v4i32, bc_v4i32,
                            SSE_INTSHIFT_ITINS_P>;
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
   // 128-bit logical shifts.
   def PSLLDQri : PDIi8<0x73, MRM7r,
                        (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in {
                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                       [(set VR128:$dst,
                         (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
-                      IIC_SSE_PSHUF>, VEX;
+                      IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
   def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
                       (ins i128mem:$src1, i8imm:$src2),
                       !strconcat("v", OpcodeStr,
                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR128:$dst,
                        (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
-                        (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX;
+                        (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+                  Sched<[WriteShuffleLd]>;
 }
 
 let Predicates = [HasAVX2] in {
@@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in {
                                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                        [(set VR256:$dst,
                          (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
-                       IIC_SSE_PSHUF>, VEX, VEX_L;
+                       IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
   def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
                        (ins i256mem:$src1, i8imm:$src2),
                        !strconcat("v", OpcodeStr,
                                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                       [(set VR256:$dst,
                         (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
-                         (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L;
+                         (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+                   Sched<[WriteShuffleLd]>;
 }
 
 let Predicates = [UseSSE2] in {
@@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in {
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                 [(set VR128:$dst,
                   (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
-                IIC_SSE_PSHUF>;
+                IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
   def mi : Ii8<0x70, MRMSrcMem,
                (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
                !strconcat(OpcodeStr,
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                 [(set VR128:$dst,
                   (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
-                          (i8 imm:$src2))))], IIC_SSE_PSHUF>;
+                          (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+           Sched<[WriteShuffleLd]>;
 }
 }
 } // ExeDomain = SSEPackedInt
@@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
           !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
           !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
-      IIC_SSE_UNPCK>;
+      IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
   def rm : PDI<opc, MRMSrcMem,
       (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
       !if(Is2Addr,
@@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
       [(set VR128:$dst, (OpNode VR128:$src1,
                                   (bc_frag (memopv2i64
                                                addr:$src2))))],
-                                               IIC_SSE_UNPCK>;
+                                               IIC_SSE_UNPCK>,
+      Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
@@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
   def Yrr : PDI<opc, MRMSrcReg,
       (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
       !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-      [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+      [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
+      Sched<[WriteShuffle]>;
   def Yrm : PDI<opc, MRMSrcMem,
       (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
       !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
       [(set VR256:$dst, (OpNode VR256:$src1,
-                                  (bc_frag (memopv4i64 addr:$src2))))]>;
+                                  (bc_frag (memopv4i64 addr:$src2))))]>,
+      Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
            "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
        [(set VR128:$dst,
-         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
+         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
+       Sched<[WriteShuffle]>;
   def rmi : Ii8<0xC4, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1,
                         i16mem:$src2, i32i8imm:$src3),
@@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
        [(set VR128:$dst,
          (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
-                    imm:$src3))], IIC_SSE_PINSRW>;
+                    imm:$src3))], IIC_SSE_PINSRW>,
+       Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 // Extract
@@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
                     "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
-                                                imm:$src2))]>, TB, OpSize, VEX;
+                                                imm:$src2))]>, TB, OpSize, VEX,
+                Sched<[WriteShuffle]>;
 def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
-                                                imm:$src2))], IIC_SSE_PEXTRW>;
+                                                imm:$src2))], IIC_SSE_PEXTRW>,
+               Sched<[WriteShuffleLd, ReadAfterLd]>;
 
 // Insert
 let Predicates = [HasAVX] in {
@@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in {
   def  VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
        (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
        "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-       []>, TB, OpSize, VEX_4V;
+       []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
 }
 
 let Constraints = "$src1 = $dst" in
@@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in
 // SSE2 - Packed Mask Creation
 //===---------------------------------------------------------------------===//
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
 
 def VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}",
@@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
 // SSE2 - Conditional Store
 //===---------------------------------------------------------------------===//
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
 
 let Uses = [EDI] in
 def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
@@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
-                        VEX;
+                        VEX, Sched<[WriteMove]>;
 def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
                         IIC_SSE_MOVDQ>,
-                      VEX;
+                      VEX, Sched<[WriteLoad]>;
 def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))],
-                          IIC_SSE_MOVDQ>, VEX;
+                          IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
 def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))],
-                       IIC_SSE_MOVDQ>, VEX;
+                       IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
 
 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
+                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+                  Sched<[WriteMove]>;
 def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
-                        IIC_SSE_MOVDQ>;
+                        IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))],
-                          IIC_SSE_MOVDQ>;
+                          IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
 def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))],
-                       IIC_SSE_MOVDQ>;
+                       IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
 
 //===---------------------------------------------------------------------===//
 // Move Int Doubleword to Single Scalar
@@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
 def VMOVDI2SSrr  : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert GR32:$src))],
-                      IIC_SSE_MOVDQ>, VEX;
+                      IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
 
 def VMOVDI2SSrm  : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
                       IIC_SSE_MOVDQ>,
-                      VEX;
+                      VEX, Sched<[WriteLoad]>;
 def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert GR32:$src))],
-                      IIC_SSE_MOVDQ>;
+                      IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
 
 def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
-                      IIC_SSE_MOVDQ>;
+                      IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 
 //===---------------------------------------------------------------------===//
 // Move Packed Doubleword Int to Packed Double Int
@@ -4317,26 +4439,29 @@ def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
 def VMOVPDI2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
-                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
+                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
+                    Sched<[WriteMove]>;
 def VMOVPDI2DImr  : VPDI<0x7E, MRMDestMem, (outs),
                        (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
-                                     VEX;
+                                     VEX, Sched<[WriteLoad]>;
 def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
-                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
+                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+                   Sched<[WriteMove]>;
 def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)],
-                                     IIC_SSE_MOVDQ>;
+                                     IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 
 //===---------------------------------------------------------------------===//
 // Move Packed Doubleword Int first element to Doubleword Int
 //
+let SchedRW = [WriteMove] in {
 def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "vmov{d|q}\t{$src, $dst|$dst, $src}",
                           [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
@@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
                                                          (iPTR 0)))],
                                                          IIC_SSE_MOVD_ToGP>;
+} //SchedRW
 
 //===---------------------------------------------------------------------===//
 // Bitcast FR64 <-> GR64
@@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in
 def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
                         [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
-                        VEX;
+                        VEX, Sched<[WriteLoad]>;
 def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                          "mov{d|q}\t{$src, $dst|$dst, $src}",
                          [(set GR64:$dst, (bitconvert FR64:$src))],
-                         IIC_SSE_MOVDQ>, VEX;
+                         IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
 def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                          "movq\t{$src, $dst|$dst, $src}",
                          [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
-                         IIC_SSE_MOVDQ>, VEX;
+                         IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
 
 def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                        "movq\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
-                       IIC_SSE_MOVDQ>;
+                       IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set GR64:$dst, (bitconvert FR64:$src))],
-                       IIC_SSE_MOVD_ToGP>;
+                       IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
 def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                        "movq\t{$src, $dst|$dst, $src}",
                        [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
-                       IIC_SSE_MOVDQ>;
+                       IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 
 //===---------------------------------------------------------------------===//
 // Move Scalar Single to Double Int
@@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
 def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32:$src))],
-                      IIC_SSE_MOVD_ToGP>, VEX;
+                      IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
 def VMOVSS2DImr  : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
-                      IIC_SSE_MOVDQ>, VEX;
+                      IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
 def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32:$src))],
-                      IIC_SSE_MOVD_ToGP>;
+                      IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
 def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
-                      IIC_SSE_MOVDQ>;
+                      IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 
 //===---------------------------------------------------------------------===//
 // Patterns and instructions to describe movd/movq to XMM register zero-extends
 //
+let SchedRW = [WriteMove] in {
 let AddedComplexity = 15 in {
 def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                        "movd\t{$src, $dst|$dst, $src}",
@@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                                       (v2i64 (scalar_to_vector GR64:$src)))))],
                                       IIC_SSE_MOVDQ>;
 }
+} // SchedRW
 
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, SchedRW = [WriteLoad] in {
 def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
@@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                          (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
                                                    (loadi32 addr:$src))))))],
                                                    IIC_SSE_MOVDQ>;
-}
+} // AddedComplexity, SchedRW
 
 let Predicates = [HasAVX] in {
   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
@@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
 //===---------------------------------------------------------------------===//
 // Move Quadword Int to Packed Quadword Int
 //
+
+let SchedRW = [WriteLoad] in {
 def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                     "vmovq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst,
@@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                       (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
                       IIC_SSE_MOVDQ>, XS,
                     Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
+} // SchedRW
 
 //===---------------------------------------------------------------------===//
 // Move Packed Quadword Int to Quadword Int
 //
+let SchedRW = [WriteStore] in {
 def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
                                     (iPTR 0))), addr:$dst)],
                                     IIC_SSE_MOVDQ>;
+} // SchedRW
 
 //===---------------------------------------------------------------------===//
 // Store / copy lower 64-bits of a XMM register.
 //
 def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
-                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX,
+                  Sched<[WriteStore]>;
 def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
                      [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
-                     IIC_SSE_MOVDQ>;
+                     IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 
 let AddedComplexity = 20 in
 def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
                                                  (loadi64 addr:$src))))))],
                                                  IIC_SSE_MOVDQ>,
-                     XS, VEX, Requires<[HasAVX]>;
+                     XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
 
 let AddedComplexity = 20 in
 def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
                                                  (loadi64 addr:$src))))))],
                                                  IIC_SSE_MOVDQ>,
-                     XS, Requires<[UseSSE2]>;
+                     XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
 
 let Predicates = [HasAVX], AddedComplexity = 20 in {
   def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
@@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
 // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
 // IA32 document. movq xmm1, xmm2 does clear the high bits.
 //
+let SchedRW = [WriteVecLogic] in {
 let AddedComplexity = 15 in
 def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
@@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
                     IIC_SSE_MOVQ_RR>,
                       XS, Requires<[UseSSE2]>;
+} // SchedRW
 
+let SchedRW = [WriteVecLogicLd] in {
 let AddedComplexity = 20 in
 def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
@@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                                              IIC_SSE_MOVDQ>,
                       XS, Requires<[UseSSE2]>;
 }
+} // SchedRW
 
 let AddedComplexity = 20 in {
   let Predicates = [HasAVX] in {
@@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in {
 }
 
 // Instructions to match in the assembler
+let SchedRW = [WriteMove] in {
 def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                       "movq\t{$src, $dst|$dst, $src}", [],
                       IIC_SSE_MOVDQ>, VEX, VEX_W;
@@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
 def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "movd\t{$src, $dst|$dst, $src}", [],
                           IIC_SSE_MOVDQ>, VEX, VEX_W;
+} // SchedRW
 
 // Instructions for the disassembler
 // xr = XMM register
 // xm = mem64
 
+let SchedRW = [WriteMove] in {
 let Predicates = [HasAVX] in
 def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                  "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
 def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                  "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
+} // SchedRW
 
 //===---------------------------------------------------------------------===//
 // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
@@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
 def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                       [(set RC:$dst, (vt (OpNode RC:$src)))],
-                      IIC_SSE_MOV_LH>;
+                      IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
 def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                       [(set RC:$dst, (OpNode (mem_frag addr:$src)))],
-                      IIC_SSE_MOV_LH>;
+                      IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> {
 let neverHasSideEffects = 1 in
 def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [], IIC_SSE_MOV_LH>;
+                    [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
 def rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
                       (v2f64 (X86Movddup
                               (scalar_to_vector (loadf64 addr:$src)))))],
-                              IIC_SSE_MOV_LH>;
+                              IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
 }
 
 // FIXME: Merge with above classe when there're patterns for the ymm version
 multiclass sse3_replicate_dfp_y<string OpcodeStr> {
 def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
+                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
+                    Sched<[WriteShuffle]>;
 def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst,
                       (v4f64 (X86Movddup
-                              (scalar_to_vector (loadf64 addr:$src)))))]>;
+                              (scalar_to_vector (loadf64 addr:$src)))))]>,
+                    Sched<[WriteShuffleLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in {
 // SSE3 - Move Unaligned Integer
 //===---------------------------------------------------------------------===//
 
+let SchedRW = [WriteLoad] in {
 let Predicates = [HasAVX] in {
   def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "vlddqu\t{$src, $dst|$dst, $src}",
@@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "lddqu\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
                    IIC_SSE_LDDQU>;
+}
 
 //===---------------------------------------------------------------------===//
 // SSE3 - Arithmetic
@@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
+       [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>,
+       Sched<[itins.Sched]>;
   def rm : I<0xD0, MRMSrcMem,
        (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
+       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+      Sched<[WriteFAdd]>;
 
   def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
-        IIC_SSE_HADDSUB_RM>;
+        IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
                   X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
@@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+      Sched<[WriteFAdd]>;
 
   def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
-        IIC_SSE_HADDSUB_RM>;
+        IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
                     (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
-                    OpSize;
+                    OpSize, Sched<[WriteVecALU]>;
 
   def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
                     (ins i128mem:$src),
@@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
                     [(set VR128:$dst,
                       (IntId128
                        (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
-                    OpSize;
+                    OpSize, Sched<[WriteVecALULd]>;
 }
 
 /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
@@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
                     (ins VR256:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst, (IntId256 VR256:$src))]>,
-                    OpSize;
+                    OpSize, Sched<[WriteVecALU]>;
 
   def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
                     (ins i256mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst,
                       (IntId256
-                       (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
+                       (bitconvert (memopv4i64 addr:$src))))]>, OpSize,
+                    Sched<[WriteVecALULd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
 // SSSE3 - Packed Binary Operator Instructions
 //===---------------------------------------------------------------------===//
 
+let Sched = WriteVecALU in {
 def SSE_PHADDSUBD : OpndItins<
   IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
 >;
@@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins<
 def SSE_PHADDSUBW : OpndItins<
   IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
 >;
+}
+let Sched = WriteShuffle in
 def SSE_PSHUFB : OpndItins<
   IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
 >;
+let Sched = WriteVecALU in
 def SSE_PSIGN : OpndItins<
   IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
 >;
+let Sched = WriteVecIMul in
 def SSE_PMULHRSW : OpndItins<
   IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
 >;
@@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
-       OpSize;
+       OpSize, Sched<[itins.Sched]>;
   def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
@@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst,
          (OpVT (OpNode RC:$src1,
-          (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
+          (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
-       OpSize;
+       OpSize, Sched<[itins.Sched]>;
   def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
        (ins VR128:$src1, i128mem:$src2),
        !if(Is2Addr,
@@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
          (IntId128 VR128:$src1,
-          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
-      [], IIC_SSE_PALIGNR>, OpSize;
+      [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
   let mayLoad = 1 in
   def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
       (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
@@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
-      [], IIC_SSE_PALIGNR>, OpSize;
+      [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
   }
 }
 
@@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
       (ins VR256:$src1, VR256:$src2, i8imm:$src3),
       !strconcat(asm,
                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-      []>, OpSize;
+      []>, OpSize, Sched<[WriteShuffle]>;
   let mayLoad = 1 in
   def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
       (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
       !strconcat(asm,
                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-      []>, OpSize;
+      []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
   }
 }
 
@@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
 // SSSE3 - Thread synchronization
 //===---------------------------------------------------------------------===//
 
+let SchedRW = [WriteSystem] in {
 let usesCustomInserter = 1 in {
 def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
                 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
@@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in
 def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
                 [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
                 TB, Requires<[HasSSE3]>;
+} // SchedRW
 
 def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
 def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
@@ -6679,7 +6839,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
-                  IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+                  NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
 
   def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
                   (ins RC:$src1, x86memop:$src2, RC:$src3),
@@ -6688,7 +6848,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                   [(set RC:$dst,
                         (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
                                RC:$src3))],
-                  IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+                  NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
 }
 
 let Predicates = [HasAVX] in {
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 1185941d344d..5b6298b541bc 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -15,7 +15,7 @@
 
 let Defs = [EFLAGS] in {
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 let Uses = [CL] in {
 def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shl{b}\t{%cl, $dst|$dst, CL}",
@@ -62,9 +62,10 @@ def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
                  "shl{q}\t$dst", [], IIC_SR>;
 } // hasSideEffects = 0
 } // isConvertibleToThreeAddress = 1
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
 // using CL?
 let Uses = [CL] in {
@@ -118,8 +119,9 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
                   "shl{q}\t$dst",
                  [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                  IIC_SR>;
+} // SchedRW
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 let Uses = [CL] in {
 def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shr{b}\t{%cl, $dst|$dst, CL}",
@@ -163,9 +165,10 @@ def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
 def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
                  "shr{q}\t$dst",
                  [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
                  "shr{b}\t{%cl, $dst|$dst, CL}",
@@ -216,8 +219,9 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
                   "shr{q}\t$dst",
                  [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                  IIC_SR>;
+} // SchedRW
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 let Uses = [CL] in {
 def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "sar{b}\t{%cl, $dst|$dst, CL}",
@@ -273,9 +277,10 @@ def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
                  "sar{q}\t$dst",
                  [(set GR64:$dst, (sra GR64:$src1, (i8 1)))],
                  IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
                  "sar{b}\t{%cl, $dst|$dst, CL}",
@@ -330,13 +335,14 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
                   "sar{q}\t$dst",
                  [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                  IIC_SR>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Rotate instructions
 //===----------------------------------------------------------------------===//
 
 let hasSideEffects = 0 in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
                "rcl{b}\t$dst", [], IIC_SR>;
 def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
@@ -405,6 +411,7 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
 
 } // Constraints = "$src = $dst"
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
                "rcl{b}\t$dst", [], IIC_SR>;
 def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
@@ -458,9 +465,10 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
 def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
                   "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 }
+} // SchedRW
 } // hasSideEffects = 0
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -512,8 +520,9 @@ def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
                   "rol{q}\t$dst",
                   [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))],
                   IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
                  "rol{b}\t{%cl, $dst|$dst, CL}",
@@ -568,8 +577,9 @@ def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
                  "rol{q}\t$dst",
                [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                IIC_SR>;
+} // SchedRW
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 let Uses = [CL] in {
 def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t{%cl, $dst|$dst, CL}",
@@ -620,8 +630,9 @@ def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                   "ror{q}\t$dst",
                   [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))],
                   IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
                  "ror{b}\t{%cl, $dst|$dst, CL}",
@@ -676,13 +687,14 @@ def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
                  "ror{q}\t$dst",
                [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                IIC_SR>;
+} // SchedRW
 
 
 //===----------------------------------------------------------------------===//
 // Double shift instructions (generalizations of rotate)
 //===----------------------------------------------------------------------===//
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 
 let Uses = [CL] in {
 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), 
@@ -765,8 +777,9 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
                                        (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
                  TB;
 }
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
                    "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
@@ -840,6 +853,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
                                        (i8 imm:$src3)), addr:$dst)],
                                        IIC_SHD64_MEM_IM>,
                  TB;
+} // SchedRW
 
 } // Defs = [EFLAGS]
 
@@ -857,12 +871,12 @@ multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
 let neverHasSideEffects = 1 in {
   def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
                !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               []>, TAXD, VEX;
+               []>, TAXD, VEX, Sched<[WriteShift]>;
   let mayLoad = 1 in
   def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
                (ins x86memop:$src1, i8imm:$src2),
                !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               []>, TAXD, VEX;
+               []>, TAXD, VEX, Sched<[WriteShiftLd]>;
 }
 }
 
@@ -870,11 +884,17 @@ multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
 let neverHasSideEffects = 1 in {
   def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX_4VOp3;
+             VEX_4VOp3, Sched<[WriteShift]>;
   let mayLoad = 1 in
   def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX_4VOp3;
+             VEX_4VOp3,
+             Sched<[WriteShiftLd,
+                    // x86memop:$src1
+                    ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                    ReadDefault,
+                    // RC:$src1
+                    ReadAfterLd]>;
 }
 }
 
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 3caa1b538c3b..bab3cdd00095 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+let SchedRW = [WriteSystem] in {
 let Defs = [RAX, RDX] in
   def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>,
               TB;
@@ -35,6 +36,7 @@ let Uses = [EFLAGS] in
   def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
 def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
               [(int_x86_int (i8 3))], IIC_INT3>;
+} // SchedRW
 
 def : Pat<(debugtrap),
           (INT3)>;
@@ -43,6 +45,7 @@ def : Pat<(debugtrap),
 // FIXME: This doesn't work because InstAlias can't match immediate constants.
 //def : InstAlias<"int\t$3", (INT3)>;
 
+let SchedRW = [WriteSystem] in {
 
 def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
               [(int_x86_int imm:$trap)], IIC_INT>;
@@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize;
 def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>;
 def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
              Requires<[In64BitMode]>;
+} // SchedRW
 
 
 //===----------------------------------------------------------------------===//
 //  Input/Output Instructions.
 //
+let SchedRW = [WriteSystem] in {
 let Defs = [AL], Uses = [DX] in
 def IN8rr  : I<0xEC, RawFrm, (outs), (ins),
                "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>;
@@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
 def IN8  : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>;
 def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>,  OpSize;
 def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Moves to and from debug registers
 
+let SchedRW = [WriteSystem] in {
 def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
 def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
@@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
 def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
                 "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Moves to and from control registers
 
+let SchedRW = [WriteSystem] in {
 def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
 def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
@@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
 def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
                 "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Segment override instruction prefixes
@@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
 // Moves to and from segment registers.
 //
 
+let SchedRW = [WriteMove] in {
 def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
                 "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize;
 def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
@@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
 def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Segmentation support instructions.
 
+let SchedRW = [WriteSystem] in {
 def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
 
 def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 
@@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
               "verw\t$seg", [], IIC_VERW_MEM>, TB;
 def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
               "verw\t$seg", [], IIC_VERW_REG>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Descriptor-table support instructions
 
+let SchedRW = [WriteSystem] in {
 def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
               "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
 def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
@@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
                 "lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
 def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
                 "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
-                
+} // SchedRW
+
 //===----------------------------------------------------------------------===//
 // Specialized register support
+let SchedRW = [WriteSystem] in {
 def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
 def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
 def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
@@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
                 "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
                 
 def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Cache instructions
+let SchedRW = [WriteSystem] in {
 def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB;
 def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // XSAVE instructions
+let SchedRW = [WriteSystem] in {
 let Defs = [RDX, RAX], Uses = [RCX] in
   def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
 
@@ -428,16 +449,17 @@ let Uses = [RDX, RAX] in {
   def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
                "xsave\t$dst", []>, TB;
   def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
-                 "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                 "xsave{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
   def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
                "xrstor\t$dst", []>, TB;
   def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
-                 "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                 "xrstor{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
   def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
                   "xsaveopt\t$dst", []>, TB;
   def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
-                    "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                    "xsaveopt{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
 }
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // VIA PadLock crypto instructions
@@ -493,8 +515,15 @@ let Predicates = [HasFSGSBase, In64BitMode] in {
 //===----------------------------------------------------------------------===//
 // INVPCID Instruction
 def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
-                "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
                 Requires<[In32BitMode]>;
 def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
-                "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
                 Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// SMAP Instruction
+let Defs = [EFLAGS], Uses = [EFLAGS] in {
+  def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
+  def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
+}
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index a37a8cc74417..363a190aa854 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -15,6 +15,9 @@
 //===----------------------------------------------------------------------===//
 // TSX instructions
 
+def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+                     [SDNPHasChain, SDNPSideEffect]>;
+
 let usesCustomInserter = 1 in
 def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
                "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
@@ -27,6 +30,10 @@ def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst),
 def XEND : I<0x01, MRM_D5, (outs), (ins),
              "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
 
+let Defs = [EFLAGS] in
+def XTEST : I<0x01, MRM_D6, (outs), (ins),
+              "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>;
+
 def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
                  "xabort\t$imm",
                  [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
new file mode 100644
index 000000000000..84c9203c3787
--- /dev/null
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -0,0 +1,126 @@
+//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Haswell to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def HaswellModel : SchedMachineModel {
+  // All x86 instructions are modeled as a single micro-op, and HW can decode 4
+  // instructions per cycle.
+  let IssueWidth = 4;
+  let MinLatency = 0; // 0 = Out-of-order execution.
+  let LoadLatency = 4;
+  let ILPWindow = 30;
+  let MispredictPenalty = 16;
+}
+
+let SchedModel = HaswellModel in {
+
+// Haswell can issue micro-ops to 8 different ports in one cycle.
+
+// Ports 0, 1, 5, 6 and 7 handle all computation.
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores. Port 7 can handle address calculations.
+def HWPort0 : ProcResource<1>;
+def HWPort1 : ProcResource<1>;
+def HWPort2 : ProcResource<1>;
+def HWPort3 : ProcResource<1>;
+def HWPort4 : ProcResource<1>;
+def HWPort5 : ProcResource<1>;
+def HWPort6 : ProcResource<1>;
+def HWPort7 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def HWPort23  : ProcResGroup<[HWPort2, HWPort3]>;
+def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
+def HWPort05  : ProcResGroup<[HWPort0, HWPort5]>;
+def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
+def HWPort15  : ProcResGroup<[HWPort1, HWPort5]>;
+def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
+def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
+
+// Integer division issued on port 0.
+def HWDivider : ProcResource<1>;
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
+                          ProcResourceKind ExePort,
+                          int Lat> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+  // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
+     let Latency = !add(Lat, 4);
+  }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [HWPort4]>;
+
+def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
+def : WriteRes<WriteLoad,  [HWPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove,  [HWPort0156]>;
+def : WriteRes<WriteZero,  []>;
+
+defm : HWWriteResPair<WriteALU,   HWPort0156, 1>;
+defm : HWWriteResPair<WriteIMul,  HWPort1,   3>;
+defm : HWWriteResPair<WriteShift, HWPort056,  1>;
+defm : HWWriteResPair<WriteJump,  HWPort5,   1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [HWPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
+  let Latency = 25;
+  let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
+  let Latency = 29;
+  let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : HWWriteResPair<WriteFAdd,   HWPort1, 3>;
+defm : HWWriteResPair<WriteFMul,   HWPort0, 5>;
+defm : HWWriteResPair<WriteFDiv,   HWPort0, 12>; // 10-14 cycles.
+defm : HWWriteResPair<WriteFRcp,   HWPort0, 5>;
+defm : HWWriteResPair<WriteFSqrt,  HWPort0, 15>;
+defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
+defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
+defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
+
+// Vector integer operations.
+defm : HWWriteResPair<WriteVecShift, HWPort05,  1>;
+defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
+defm : HWWriteResPair<WriteVecALU,   HWPort15,  1>;
+defm : HWWriteResPair<WriteVecIMul,  HWPort0,   5>;
+defm : HWWriteResPair<WriteShuffle,  HWPort15,  1>;
+
+def : WriteRes<WriteSystem,     [HWPort0156]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
+} // SchedModel
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
new file mode 100644
index 000000000000..b36b3ad947e0
--- /dev/null
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -0,0 +1,122 @@
+//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Sandy Bridge to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def SandyBridgeModel : SchedMachineModel {
+  // All x86 instructions are modeled as a single micro-op, and SB can decode 4
+  // instructions per cycle.
+  // FIXME: Identify instructions that aren't a single fused micro-op.
+  let IssueWidth = 4;
+  let MinLatency = 0; // 0 = Out-of-order execution.
+  let LoadLatency = 4;
+  let ILPWindow = 20;
+  let MispredictPenalty = 16;
+}
+
+let SchedModel = SandyBridgeModel in {
+
+// Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
+
+// Ports 0, 1, and 5 handle all computation.
+def SBPort0 : ProcResource<1>;
+def SBPort1 : ProcResource<1>;
+def SBPort5 : ProcResource<1>;
+
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores.
+def SBPort23 : ProcResource<2>;
+
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+def SBPort4 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def SBPort05  : ProcResGroup<[SBPort0, SBPort5]>;
+def SBPort15  : ProcResGroup<[SBPort1, SBPort5]>;
+def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
+
+// Integer division issued on port 0.
+def SBDivider : ProcResource<1>;
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
+                          ProcResourceKind ExePort,
+                          int Lat> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+  // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> {
+     let Latency = !add(Lat, 4);
+  }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [SBPort4]>;
+
+def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
+def : WriteRes<WriteLoad,  [SBPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove,  [SBPort015]>;
+def : WriteRes<WriteZero,  []>;
+
+defm : SBWriteResPair<WriteALU,   SBPort015, 1>;
+defm : SBWriteResPair<WriteIMul,  SBPort1,   3>;
+defm : SBWriteResPair<WriteShift, SBPort05,  1>;
+defm : SBWriteResPair<WriteJump,  SBPort5,   1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [SBPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> {
+  let Latency = 25;
+  let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
+  let Latency = 29;
+  let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : SBWriteResPair<WriteFAdd,   SBPort1, 3>;
+defm : SBWriteResPair<WriteFMul,   SBPort0, 5>;
+defm : SBWriteResPair<WriteFDiv,   SBPort0, 12>; // 10-14 cycles.
+defm : SBWriteResPair<WriteFRcp,   SBPort0, 5>;
+defm : SBWriteResPair<WriteFSqrt,  SBPort0, 15>;
+defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
+defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
+defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
+
+// Vector integer operations.
+defm : SBWriteResPair<WriteVecShift, SBPort05,  1>;
+defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
+defm : SBWriteResPair<WriteVecALU,   SBPort15,  1>;
+defm : SBWriteResPair<WriteVecIMul,  SBPort0,   5>;
+defm : SBWriteResPair<WriteShuffle,  SBPort15,  1>;
+
+def : WriteRes<WriteSystem,     [SBPort015]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
+} // SchedModel
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index da0ca7d28ee7..9fbde88b7100 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -53,6 +53,10 @@ def WriteLoad  : SchedWrite;
 def WriteStore : SchedWrite;
 def WriteMove  : SchedWrite;
 
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
 // Branches don't produce values, so they have no latency, but they still
 // consume resources. Indirect branches can fold loads.
 defm WriteJump : X86SchedWritePair;
@@ -63,6 +67,10 @@ defm WriteFMul  : X86SchedWritePair; // Floating point multiplication.
 defm WriteFDiv  : X86SchedWritePair; // Floating point division.
 defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
 defm WriteFRcp  : X86SchedWritePair; // Floating point reciprocal.
+defm WriteFMA   : X86SchedWritePair; // Fused Multiply Add.
+
+// FMA Scheduling helper class.
+class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
 // Vector integer operations.
 defm WriteVecALU   : X86SchedWritePair; // Vector integer ALU op, no logicals.
@@ -79,9 +87,14 @@ defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
 defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
 defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
 
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for X86
-def IIC_DEFAULT     : InstrItinClass;
 def IIC_ALU_MEM     : InstrItinClass;
 def IIC_ALU_NONMEM  : InstrItinClass;
 def IIC_LEA         : InstrItinClass;
@@ -556,3 +569,5 @@ def GenericModel : SchedMachineModel {
 }
 
 include "X86ScheduleAtom.td"
+include "X86SchedSandyBridge.td"
+include "X86SchedHaswell.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 1e5f2d6c9a53..cce8f1b11436 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -33,7 +33,6 @@ def AtomItineraries : ProcessorItineraries<
   // InstrItinData<class, [InstrStage<N, [P0], 0>,  InstrStage<N, [P1]>] >,
   //
   // Default is 1 cycle, port0 or port1
-  InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >,
   InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
   InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
   InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 0f2c008ab965..14619b63927b 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -37,8 +37,7 @@ using namespace llvm;
 /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
 /// current subtarget according to how we should reference it in a non-pcrel
 /// context.
-unsigned char X86Subtarget::
-ClassifyBlockAddressReference() const {
+unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
   if (isPICStyleGOT())    // 32-bit ELF targets.
     return X86II::MO_GOTOFF;
 
@@ -283,6 +282,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
         HasLZCNT = true;
         ToggleFeature(X86::FeatureLZCNT);
       }
+      if (IsIntel && ((ECX >> 8) & 0x1)) {
+        HasPRFCHW = true;
+        ToggleFeature(X86::FeaturePRFCHW);
+      }
       if (IsAMD) {
         if ((ECX >> 6) & 0x1) {
           HasSSE4A = true;
@@ -310,6 +313,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
         HasBMI = true;
         ToggleFeature(X86::FeatureBMI);
       }
+      if ((EBX >> 4) & 0x1) {
+        HasHLE = true;
+        ToggleFeature(X86::FeatureHLE);
+      }
       if (IsIntel && ((EBX >> 5) & 0x1)) {
         X86SSELevel = AVX2;
         ToggleFeature(X86::FeatureAVX2);
@@ -322,6 +329,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
         HasRTM = true;
         ToggleFeature(X86::FeatureRTM);
       }
+      if (IsIntel && ((EBX >> 19) & 0x1)) {
+        HasADX = true;
+        ToggleFeature(X86::FeatureADX);
+      }
+      if (IsIntel && ((EBX >> 18) & 0x1)) {
+        HasRDSEED = true;
+        ToggleFeature(X86::FeatureRDSEED);
+      }
     }
   }
 }
@@ -439,7 +454,10 @@ void X86Subtarget::initializeEnvironment() {
   HasBMI = false;
   HasBMI2 = false;
   HasRTM = false;
+  HasHLE = false;
   HasADX = false;
+  HasPRFCHW = false;
+  HasRDSEED = false;
   IsBTMemSlow = false;
   IsUAMemFast = false;
   HasVectorUAMem = false;
@@ -448,6 +466,7 @@ void X86Subtarget::initializeEnvironment() {
   HasSlowDivide = false;
   PostRAScheduler = false;
   PadShortFunctions = false;
+  CallRegIndirect = false;
   stackAlignment = 4;
   // FIXME: this is a known good value for Yonah. How about others?
   MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index e97da4b6f4f2..6fbdb1d5f00f 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -121,9 +121,18 @@ class X86Subtarget : public X86GenSubtargetInfo {
   /// HasRTM - Processor has RTM instructions.
   bool HasRTM;
 
+  /// HasHLE - Processor has HLE.
+  bool HasHLE;
+
   /// HasADX - Processor has ADX instructions.
   bool HasADX;
 
+  /// HasPRFCHW - Processor has PRFCHW instructions.
+  bool HasPRFCHW;
+
+  /// HasRDSEED - Processor has RDSEED instructions.
+  bool HasRDSEED;
+
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
@@ -153,6 +162,10 @@ class X86Subtarget : public X86GenSubtargetInfo {
   /// a stall when returning too early.
   bool PadShortFunctions;
 
+  /// CallRegIndirect - True if the Calls with memory reference should be converted
+  /// to a register-based indirect call.
+  bool CallRegIndirect;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -253,7 +266,10 @@ class X86Subtarget : public X86GenSubtargetInfo {
   bool hasBMI() const { return HasBMI; }
   bool hasBMI2() const { return HasBMI2; }
   bool hasRTM() const { return HasRTM; }
+  bool hasHLE() const { return HasHLE; }
   bool hasADX() const { return HasADX; }
+  bool hasPRFCHW() const { return HasPRFCHW; }
+  bool hasRDSEED() const { return HasRDSEED; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
   bool hasVectorUAMem() const { return HasVectorUAMem; }
@@ -261,6 +277,7 @@ class X86Subtarget : public X86GenSubtargetInfo {
   bool useLeaForSP() const { return UseLeaForSP; }
   bool hasSlowDivide() const { return HasSlowDivide; }
   bool padShortFunctions() const { return PadShortFunctions; }
+  bool callRegIndirect() const { return CallRegIndirect; }
 
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
 
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index be2a997b8e0f..eba9d782d5c3 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -86,7 +86,9 @@ class X86TTI : public ImmutablePass, public TargetTransformInfo {
   virtual unsigned getNumberOfRegisters(bool Vector) const;
   virtual unsigned getRegisterBitWidth(bool Vector) const;
   virtual unsigned getMaximumUnrollFactor() const;
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind,
+                                          OperandValueKind) const;
   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
                                   int Index, Type *SubTp) const;
   virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -162,13 +164,109 @@ unsigned X86TTI::getMaximumUnrollFactor() const {
   return 2;
 }
 
-unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                        OperandValueKind Op1Info,
+                                        OperandValueKind Op2Info) const {
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  static const CostTblEntry<MVT> AVX2CostTable[] = {
+    // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
+    // customize them to detect the cases where shift amount is a scalar one.
+    { ISD::SHL,     MVT::v4i32,    1 },
+    { ISD::SRL,     MVT::v4i32,    1 },
+    { ISD::SRA,     MVT::v4i32,    1 },
+    { ISD::SHL,     MVT::v8i32,    1 },
+    { ISD::SRL,     MVT::v8i32,    1 },
+    { ISD::SRA,     MVT::v8i32,    1 },
+    { ISD::SHL,     MVT::v2i64,    1 },
+    { ISD::SRL,     MVT::v2i64,    1 },
+    { ISD::SHL,     MVT::v4i64,    1 },
+    { ISD::SRL,     MVT::v4i64,    1 },
+
+    { ISD::SHL,  MVT::v32i8,  42 }, // cmpeqb sequence.
+    { ISD::SHL,  MVT::v16i16,  16*10 }, // Scalarized.
+
+    { ISD::SRL,  MVT::v32i8,  32*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v16i16,  8*10 }, // Scalarized.
+
+    { ISD::SRA,  MVT::v32i8,  32*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v16i16,  16*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v4i64,  4*10 }, // Scalarized.
+  };
+
+  // Look for AVX2 lowering tricks.
+  if (ST->hasAVX2()) {
+    int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
+                                   ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * AVX2CostTable[Idx].Cost;
+  }
+
+  static const CostTblEntry<MVT> SSE2UniformConstCostTable[] = {
+    // We don't correctly identify costs of casts because they are marked as
+    // custom.
+    // Constant splats are cheaper for the following instructions.
+    { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
+    { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
+    { ISD::SHL,  MVT::v4i32,  1 }, // pslld
+    { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
+
+    { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
+    { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
+
+    { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
+    { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
+    { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+  };
+
+  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+      ST->hasSSE2()) {
+    int Idx = CostTableLookup<MVT>(SSE2UniformConstCostTable,
+                                   array_lengthof(SSE2UniformConstCostTable),
+                                   ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * SSE2UniformConstCostTable[Idx].Cost;
+  }
+
+
+  static const CostTblEntry<MVT> SSE2CostTable[] = {
+    // We don't correctly identify costs of casts because they are marked as
+    // custom.
+    // For some cases, where the shift amount is a scalar we would be able
+    // to generate better code. Unfortunately, when this is the case the value
+    // (the splat) will get hoisted out of the loop, thereby making it invisible
+    // to ISel. The cost model must return worst case assumptions because it is
+    // used for vectorization and we don't want to make vectorized code worse
+    // than scalar code.
+    { ISD::SHL,  MVT::v16i8,  30 }, // cmpeqb sequence.
+    { ISD::SHL,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SHL,  MVT::v4i32,  2*5 }, // We optimized this using mul.
+    { ISD::SHL,  MVT::v2i64,  2*10 }, // Scalarized.
+
+    { ISD::SRL,  MVT::v16i8,  16*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v4i32,  4*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized.
+
+    { ISD::SRA,  MVT::v16i8,  16*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v4i32,  4*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v2i64,  2*10 }, // Scalarized.
+  };
+
+  if (ST->hasSSE2()) {
+    int Idx = CostTableLookup<MVT>(SSE2CostTable, array_lengthof(SSE2CostTable),
+                                   ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * SSE2CostTable[Idx].Cost;
+  }
+
   static const CostTblEntry<MVT> AVX1CostTable[] = {
     // We don't have to scalarize unsupported ops. We can issue two half-sized
     // operations and we only need to extract the upper YMM half.
@@ -213,7 +311,8 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
     return 6;
 
   // Fallback to the default implementation.
-  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty);
+  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+                                                     Op2Info);
 }
 
 unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
@@ -235,9 +334,44 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+
+  static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = {
+    // These are somewhat magic numbers justified by looking at the output of
+    // Intel's IACA, running some kernels and making sure when we take
+    // legalization into account the throughput will be overestimated.
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+    // There are faster sequences for float conversions.
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+  };
+
+  if (ST->hasSSE2() && !ST->hasAVX()) {
+    int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl,
+                                          array_lengthof(SSE2ConvTbl),
+                                          ISD, LTDest.second, LTSrc.second);
+    if (Idx != -1)
+      return LTSrc.first * SSE2ConvTbl[Idx].Cost;
+  }
+
   EVT SrcTy = TLI->getValueType(Src);
   EVT DstTy = TLI->getValueType(Dst);
 
+  // The function getSimpleVT only handles simple value types.
   if (!SrcTy.isSimple() || !DstTy.isSimple())
     return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
 
@@ -248,17 +382,40 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
     { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
     { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 1 },
     { ISD::TRUNCATE,    MVT::v8i16, MVT::v8i32, 1 },
-    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i8,  1 },
-    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8,  1 },
-    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i8,  1 },
-    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8,  1 },
+
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i1,  8 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i8,  8 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 5 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i1,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i8,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i32, 1 },
+
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i1,  6 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i8,  5 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 5 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 9 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1,  7 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8,  2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 6 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i1,  7 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i8,  2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i32, 6 },
+
     { ISD::FP_TO_SINT,  MVT::v8i8,  MVT::v8f32, 1 },
     { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f32, 1 },
     { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1,  6 },
     { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1,  9 },
     { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1,  8 },
-    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8,  8 },
-    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8,  6 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
     { ISD::TRUNCATE,    MVT::v8i32, MVT::v8i64, 3 },
   };
 
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 7e7d3962a728..7b99967c4f32 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -89,6 +89,11 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
                                               uint64_t Address,
                                               const void *Decoder);
 
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
+                                             unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder);
+
 static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
                                       uint64_t Address, const void *Decoder);
 
@@ -214,6 +219,18 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
+                                             unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder)
+{
+  if (RegNo > 15)
+    return MCDisassembler::Fail;
+  unsigned Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
                                       uint64_t Address, const void *Decoder) {
   if (Val > 11)
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 019c4570d93d..beeb07f831c6 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -261,7 +261,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
       MBB.erase(MBBI);
     } else {
-      int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+      int Opcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
       BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
     }
   }
@@ -371,7 +371,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
           .addImm(Amount);
       } else {
         assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
-        int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+        int Opcode = isU6 ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
         New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
           .addImm(Amount);
       }
@@ -409,7 +409,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   }
   if (RegInfo->requiresRegisterScavenging(MF)) {
     // Reserve a slot close to SP or frame pointer.
-    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                        RC->getAlignment(),
                                                        false));
   }
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index fbf86c523054..d16811c29071 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -185,36 +185,36 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case XCoreISD::LSUB: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case XCoreISD::MACCU: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::MACCS: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::LMUL: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::CRC8: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case ISD::BRIND:
     if (SDNode *ResNode = SelectBRIND(N))
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index e140ef26b469..03653cb2b3de 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -281,9 +281,9 @@ multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> {
 }
 
 multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> {
-  def _ru6: _FRU6<opc, (outs GRRegs:$a), (ins i32imm:$b),
+  def _ru6: _FRU6<opc, (outs RRegs:$a), (ins i32imm:$b),
                   !strconcat(OpcStr, " $a, cp[$b]"), []>;
-  def _lru6: _FLRU6<opc, (outs GRRegs:$a), (ins i32imm:$b),
+  def _lru6: _FLRU6<opc, (outs RRegs:$a), (ins i32imm:$b),
                     !strconcat(OpcStr, " $a, cp[$b]"), []>;
 }
 
@@ -515,29 +515,29 @@ def LMUL_l6r : _FL6R<
 
 //let Uses = [DP] in ...
 let neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWDP_ru6: _FRU6<0b011000, (outs GRRegs:$a), (ins MEMii:$b),
+def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
                       "ldaw $a, dp[$b]", []>;
 
 let isReMaterializable = 1 in                    
-def LDAWDP_lru6: _FLRU6<0b011000, (outs GRRegs:$a), (ins MEMii:$b),
+def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
                         "ldaw $a, dp[$b]",
-                        [(set GRRegs:$a, ADDRdpii:$b)]>;
+                        [(set RRegs:$a, ADDRdpii:$b)]>;
 
 let mayLoad=1 in
-def LDWDP_ru6: _FRU6<0b010110, (outs GRRegs:$a), (ins MEMii:$b),
+def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
                      "ldw $a, dp[$b]", []>;
 
-def LDWDP_lru6: _FLRU6<0b010110, (outs GRRegs:$a), (ins MEMii:$b),
+def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
                        "ldw $a, dp[$b]",
-                       [(set GRRegs:$a, (load ADDRdpii:$b))]>;
+                       [(set RRegs:$a, (load ADDRdpii:$b))]>;
 
 let mayStore=1 in
-def STWDP_ru6 : _FRU6<0b010100, (outs), (ins GRRegs:$a, MEMii:$b),
+def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
                       "stw $a, dp[$b]", []>;
 
-def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins GRRegs:$a, MEMii:$b),
+def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
                         "stw $a, dp[$b]",
-                        [(store GRRegs:$a, ADDRdpii:$b)]>;
+                        [(store RRegs:$a, ADDRdpii:$b)]>;
 
 //let Uses = [CP] in ..
 let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
@@ -545,46 +545,38 @@ defm LDWCP : FRU6_LRU6_cp<0b011011, "ldw">;
 
 let Uses = [SP] in {
 let mayStore=1 in {
-def STWSP_ru6 : _FRU6<0b010101, (outs), (ins GRRegs:$a, i32imm:$b),
+def STWSP_ru6 : _FRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
                       "stw $a, sp[$b]",
-                      [(XCoreStwsp GRRegs:$a, immU6:$b)]>;
+                      [(XCoreStwsp RRegs:$a, immU6:$b)]>;
 
-def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins GRRegs:$a, i32imm:$b),
+def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
                         "stw $a, sp[$b]",
-                        [(XCoreStwsp GRRegs:$a, immU16:$b)]>;
+                        [(XCoreStwsp RRegs:$a, immU16:$b)]>;
 }
 
 let mayLoad=1 in {
-def LDWSP_ru6 : _FRU6<0b010111, (outs GRRegs:$a), (ins i32imm:$b),
+def LDWSP_ru6 : _FRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
                       "ldw $a, sp[$b]", []>;
 
-def LDWSP_lru6 : _FLRU6<0b010111, (outs GRRegs:$a), (ins i32imm:$b),
+def LDWSP_lru6 : _FLRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
                         "ldw $a, sp[$b]", []>;
 }
 
 let neverHasSideEffects = 1 in {
-def LDAWSP_ru6 : _FRU6<0b011001, (outs GRRegs:$a), (ins i32imm:$b),
+def LDAWSP_ru6 : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
                        "ldaw $a, sp[$b]", []>;
 
-def LDAWSP_lru6 : _FLRU6<0b011001, (outs GRRegs:$a), (ins i32imm:$b),
+def LDAWSP_lru6 : _FLRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
                          "ldaw $a, sp[$b]", []>;
-
-let isCodeGenOnly = 1 in
-def LDAWSP_ru6_RRegs : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
-                             "ldaw $a, sp[$b]", []>;
-
-let isCodeGenOnly = 1 in
-def LDAWSP_lru6_RRegs : _FLRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
-                               "ldaw $a, sp[$b]", []>;
 }
 }
 
 let isReMaterializable = 1 in {
-def LDC_ru6 : _FRU6<0b011010, (outs GRRegs:$a), (ins i32imm:$b),
-                    "ldc $a, $b", [(set GRRegs:$a, immU6:$b)]>;
+def LDC_ru6 : _FRU6<0b011010, (outs RRegs:$a), (ins i32imm:$b),
+                    "ldc $a, $b", [(set RRegs:$a, immU6:$b)]>;
 
-def LDC_lru6 : _FLRU6<0b011010, (outs GRRegs:$a), (ins i32imm:$b),
-                      "ldc $a, $b", [(set GRRegs:$a, immU16:$b)]>;
+def LDC_lru6 : _FLRU6<0b011010, (outs RRegs:$a), (ins i32imm:$b),
+                      "ldc $a, $b", [(set RRegs:$a, immU16:$b)]>;
 }
 
 def SETC_ru6 : _FRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
@@ -932,6 +924,9 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
                               "bru $i\n$t",
                               [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
 
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BRU_1r : _F1R<0b001010, (outs), (ins GRRegs:$a), "bru $a", []>;
+
 let Defs=[SP], neverHasSideEffects=1 in
 def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), "set sp, $a", []>;
 
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 4c771e970069..6694b2882aca 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -51,6 +51,9 @@ def GRRegs : RegisterClass<"XCore", [i32], 32,
   R11)>;
 
 // Reserved
-def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
+def RRegs : RegisterClass<"XCore", [i32], 32,
+  (add R0, R1, R2, R3,
+   R4, R5, R6, R7, R8, R9, R10,
+   R11, CP, DP, SP, LR)> {
   let isAllocatable = 0;
 }
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index a75212a386c5..bc5109b4d48d 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,4 +1,4 @@
-//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,6 +14,8 @@
 // to the function does not create any copies of the pointer value that
 // outlive the call.  This more or less means that the pointer is only
 // dereferenced, and not returned from the function or stored in a global.
+// Finally, well-known library call declarations are marked with all
+// attributes that are consistent with the function's standard definition.
 // This pass is implemented as a bottom-up traversal of the call-graph.
 //
 //===----------------------------------------------------------------------===//
@@ -32,12 +34,14 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
 STATISTIC(NumReadNone, "Number of functions marked readnone");
 STATISTIC(NumReadOnly, "Number of functions marked readonly");
 STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
 STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
 
 namespace {
   struct FunctionAttrs : public CallGraphSCCPass {
@@ -62,14 +66,63 @@ namespace {
     // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
     bool AddNoAliasAttrs(const CallGraphSCC &SCC);
 
+    // Utility methods used by inferPrototypeAttributes to add attributes
+    // and maintain annotation statistics.
+
+    void setDoesNotAccessMemory(Function &F) {
+      if (!F.doesNotAccessMemory()) {
+	F.setDoesNotAccessMemory();
+	++NumAnnotated;
+      }
+    }
+
+    void setOnlyReadsMemory(Function &F) {
+      if (!F.onlyReadsMemory()) {
+	F.setOnlyReadsMemory();
+	++NumAnnotated;
+      }
+    }
+
+    void setDoesNotThrow(Function &F) {
+      if (!F.doesNotThrow()) {
+	F.setDoesNotThrow();
+	++NumAnnotated;
+      }
+    }
+
+    void setDoesNotCapture(Function &F, unsigned n) {
+      if (!F.doesNotCapture(n)) {
+	F.setDoesNotCapture(n);
+	++NumAnnotated;
+      }
+    }
+
+    void setDoesNotAlias(Function &F, unsigned n) {
+      if (!F.doesNotAlias(n)) {
+	F.setDoesNotAlias(n);
+	++NumAnnotated;
+      }
+    }
+
+    // inferPrototypeAttributes - Analyze the name and prototype of the
+    // given function and set any applicable attributes.  Returns true
+    // if any attributes were set and false otherwise.
+    bool inferPrototypeAttributes(Function &F);
+
+    // annotateLibraryCalls - Adds attributes to well-known standard library
+    // call declarations.
+    bool annotateLibraryCalls(const CallGraphSCC &SCC);
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfo>();
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
   private:
     AliasAnalysis *AA;
+    TargetLibraryInfo *TLI;
   };
 }
 
@@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0;
 INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
 INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
 
@@ -598,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
   return MadeChange;
 }
 
+/// inferPrototypeAttributes - Analyze the name and prototype of the
+/// given function and set any applicable attributes.  Returns true
+/// if any attributes were set and false otherwise.
+bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
+  FunctionType *FTy = F.getFunctionType();
+  LibFunc::Func TheLibFunc;
+  if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
+    return false;
+
+  switch (TheLibFunc) {
+  case LibFunc::strlen:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::strchr:
+  case LibFunc::strrchr:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isIntegerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    break;
+  case LibFunc::strcpy:
+  case LibFunc::stpcpy:
+  case LibFunc::strcat:
+  case LibFunc::strtol:
+  case LibFunc::strtod:
+  case LibFunc::strtof:
+  case LibFunc::strtoul:
+  case LibFunc::strtoll:
+  case LibFunc::strtold:
+  case LibFunc::strncat:
+  case LibFunc::strncpy:
+  case LibFunc::stpncpy:
+  case LibFunc::strtoull:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::strxfrm:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::strcmp:
+  case LibFunc::strspn:
+  case LibFunc::strncmp:
+  case LibFunc::strcspn:
+  case LibFunc::strcoll:
+  case LibFunc::strcasecmp:
+  case LibFunc::strncasecmp:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::strstr:
+  case LibFunc::strpbrk:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::strtok:
+  case LibFunc::strtok_r:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::scanf:
+  case LibFunc::setbuf:
+  case LibFunc::setvbuf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::strdup:
+  case LibFunc::strndup:
+    if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::stat:
+  case LibFunc::sscanf:
+  case LibFunc::sprintf:
+  case LibFunc::statvfs:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::snprintf:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 3);
+    break;
+  case LibFunc::setitimer:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    setDoesNotCapture(F, 3);
+    break;
+  case LibFunc::system:
+    if (FTy->getNumParams() != 1 ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "system" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::malloc:
+    if (FTy->getNumParams() != 1 ||
+        !FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::memcmp:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::memchr:
+  case LibFunc::memrchr:
+    if (FTy->getNumParams() != 3)
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    break;
+  case LibFunc::modf:
+  case LibFunc::modff:
+  case LibFunc::modfl:
+  case LibFunc::memcpy:
+  case LibFunc::memccpy:
+  case LibFunc::memmove:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::memalign:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::mkdir:
+  case LibFunc::mktime:
+    if (FTy->getNumParams() == 0 ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::realloc:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::read:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "read" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::rmdir:
+  case LibFunc::rewind:
+  case LibFunc::remove:
+  case LibFunc::realpath:
+    if (FTy->getNumParams() < 1 ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::rename:
+  case LibFunc::readlink:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::write:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "write" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::bcopy:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::bcmp:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setOnlyReadsMemory(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::bzero:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::calloc:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::chmod:
+  case LibFunc::chown:
+  case LibFunc::ctermid:
+  case LibFunc::clearerr:
+  case LibFunc::closedir:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::atoi:
+  case LibFunc::atol:
+  case LibFunc::atof:
+  case LibFunc::atoll:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setOnlyReadsMemory(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::access:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::fopen:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::fdopen:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::feof:
+  case LibFunc::free:
+  case LibFunc::fseek:
+  case LibFunc::ftell:
+  case LibFunc::fgetc:
+  case LibFunc::fseeko:
+  case LibFunc::ftello:
+  case LibFunc::fileno:
+  case LibFunc::fflush:
+  case LibFunc::fclose:
+  case LibFunc::fsetpos:
+  case LibFunc::flockfile:
+  case LibFunc::funlockfile:
+  case LibFunc::ftrylockfile:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::ferror:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setOnlyReadsMemory(F);
+    break;
+  case LibFunc::fputc:
+  case LibFunc::fstat:
+  case LibFunc::frexp:
+  case LibFunc::frexpf:
+  case LibFunc::frexpl:
+  case LibFunc::fstatvfs:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::fgets:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 3);
+  case LibFunc::fread:
+  case LibFunc::fwrite:
+    if (FTy->getNumParams() != 4 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(3)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 4);
+  case LibFunc::fputs:
+  case LibFunc::fscanf:
+  case LibFunc::fprintf:
+  case LibFunc::fgetpos:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::getc:
+  case LibFunc::getlogin_r:
+  case LibFunc::getc_unlocked:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::getenv:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setOnlyReadsMemory(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::gets:
+  case LibFunc::getchar:
+    setDoesNotThrow(F);
+    break;
+  case LibFunc::getitimer:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::getpwnam:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::ungetc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::uname:
+  case LibFunc::unlink:
+  case LibFunc::unsetenv:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::utime:
+  case LibFunc::utimes:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::putc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::puts:
+  case LibFunc::printf:
+  case LibFunc::perror:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::pread:
+  case LibFunc::pwrite:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; these are valid pthread cancellation points.
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::putchar:
+    setDoesNotThrow(F);
+    break;
+  case LibFunc::popen:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::pclose:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::vscanf:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::vsscanf:
+  case LibFunc::vfscanf:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::valloc:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::vprintf:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::vfprintf:
+  case LibFunc::vsprintf:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::vsnprintf:
+    if (FTy->getNumParams() != 4 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 3);
+    break;
+  case LibFunc::open:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "open" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::opendir:
+    if (FTy->getNumParams() != 1 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::tmpfile:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::times:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::htonl:
+  case LibFunc::htons:
+  case LibFunc::ntohl:
+  case LibFunc::ntohs:
+    setDoesNotThrow(F);
+    setDoesNotAccessMemory(F);
+    break;
+  case LibFunc::lstat:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::lchown:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::qsort:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+      return false;
+    // May throw; places call through function pointer.
+    setDoesNotCapture(F, 4);
+    break;
+  case LibFunc::dunder_strdup:
+  case LibFunc::dunder_strndup:
+    if (FTy->getNumParams() < 1 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::dunder_strtok_r:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::under_IO_getc:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::under_IO_putc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::dunder_isoc99_scanf:
+    if (FTy->getNumParams() < 1 ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::stat64:
+  case LibFunc::lstat64:
+  case LibFunc::statvfs64:
+  case LibFunc::dunder_isoc99_sscanf:
+    if (FTy->getNumParams() < 1 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::fopen64:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::fseeko64:
+  case LibFunc::ftello64:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::tmpfile64:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::fstat64:
+  case LibFunc::fstatvfs64:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::open64:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "open" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 1);
+    break;
+  default:
+    // Didn't mark any attributes.
+    return false;
+  }
+
+  return true;
+}
+
+/// annotateLibraryCalls - Adds attributes to well-known standard library
+/// call declarations.
+bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+  bool MadeChange = false;
+
+  // Check each function in turn annotating well-known library function
+  // declarations with attributes.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F != 0 && F->isDeclaration())
+      MadeChange |= inferPrototypeAttributes(*F);
+  }
+
+  return MadeChange;
+}
+
 bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
   AA = &getAnalysis<AliasAnalysis>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
 
-  bool Changed = AddReadAttrs(SCC);
+  bool Changed = annotateLibraryCalls(SCC);
+  Changed |= AddReadAttrs(SCC);
   Changed |= AddNoCaptureAttrs(SCC);
   Changed |= AddNoAliasAttrs(SCC);
   return Changed;
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index dc99492990a3..201f320c43bd 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -42,6 +42,7 @@ namespace {
 
   private:
     SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+    SmallPtrSet<Constant *, 8> SeenConstants;
 
     /// GlobalIsNeeded - mark the specific global value as needed, and
     /// recursively mark anything that it uses as also needed.
@@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) {
 
   // Make sure that all memory is released
   AliveGlobals.clear();
+  SeenConstants.clear();
 
   return Changed;
 }
@@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
 void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
   if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
     return GlobalIsNeeded(GV);
-  
+
   // Loop over all of the operands of the constant, adding any globals they
   // use to the list of needed globals.
-  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
-    if (Constant *OpC = dyn_cast<Constant>(*I))
-      MarkUsedGlobalsAsNeeded(OpC);
+  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+    // If we've already processed this constant there's no need to do it again.
+    Constant *Op = dyn_cast<Constant>(*I);
+    if (Op && SeenConstants.insert(Op))
+      MarkUsedGlobalsAsNeeded(Op);
+  }
 }
 
 // RemoveUnusedGlobalValue - Loop over all of the uses of the specified
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 2b9d6670cae9..b035a821b4cf 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -470,8 +470,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
                                        DataLayout *TD, TargetLibraryInfo *TLI) {
   bool Changed = false;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
-    User *U = *UI++;
+  SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+  while (!WorkList.empty()) {
+    User *U = WorkList.pop_back_val();
 
     if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       if (Init) {
@@ -534,7 +535,6 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
       // us, and if they are all dead, nuke them without remorse.
       if (SafeToDestroyConstant(C)) {
         C->destroyConstant();
-        // This could have invalidated UI, start over from scratch.
         CleanupConstantGlobalUsers(V, Init, TD, TLI);
         return true;
       }
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 663ddb75f423..14201b12a08b 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -127,6 +127,12 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
 
   AdjustCallerSSPLevel(Caller, Callee);
 
+  // If the inlined function has a fixed stack segment, then make the caller
+  // have a fixed stack segment as well.
+  if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                           Attribute::FixedStackSegment))
+    Caller->addFnAttr(Attribute::FixedStackSegment);
+
   // Look at all of the allocas that we inlined through this call site.  If we
   // have already inlined other allocas through other calls into this function,
   // then we know that they have disjoint lifetimes and that we can merge them.
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 892100f0585a..4ce749cfec0a 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -72,6 +72,15 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
 STATISTIC(NumAliasesWritten, "Number of aliases generated");
 STATISTIC(NumDoubleWeak, "Number of new functions created");
 
+/// Returns the type id for a type to be hashed. We turn pointer types into
+/// integers here because the actual compare logic below considers pointers and
+/// integers of the same size as equal.
+static Type::TypeID getTypeIDForHash(Type *Ty) {
+  if (Ty->isPointerTy())
+    return Type::IntegerTyID;
+  return Ty->getTypeID();
+}
+
 /// Creates a hash-code for the function which is the same for any two
 /// functions that will compare equal, without looking at the instructions
 /// inside the function.
@@ -83,9 +92,9 @@ static unsigned profileFunction(const Function *F) {
   ID.AddInteger(F->getCallingConv());
   ID.AddBoolean(F->hasGC());
   ID.AddBoolean(FTy->isVarArg());
-  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  ID.AddInteger(getTypeIDForHash(FTy->getReturnType()));
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+    ID.AddInteger(getTypeIDForHash(FTy->getParamType(i)));
   return ID.ComputeHash();
 }
 
@@ -200,8 +209,7 @@ class FunctionComparator {
 
 // Any two pointers in the same address space are equivalent, intptr_t and
 // pointers are equivalent. Otherwise, standard type equivalence rules apply.
-bool FunctionComparator::isEquivalentType(Type *Ty1,
-                                          Type *Ty2) const {
+bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
   if (Ty1 == Ty2)
     return true;
   if (Ty1->getTypeID() != Ty2->getTypeID()) {
@@ -740,7 +748,13 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
   if (NewG->getReturnType()->isVoidTy()) {
     Builder.CreateRetVoid();
   } else {
-    Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+    Type *RetTy = NewG->getReturnType();
+    if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
+      Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
+    else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
+      Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
+    else
+      Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
   }
 
   NewG->copyAttributesFrom(G);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 027a9f2a6871..ffd07b6afcec 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,7 +33,12 @@ RunLoopVectorization("vectorize-loops",
                      cl::desc("Run the Loop vectorization passes"));
 
 static cl::opt<bool>
-RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
+RunSLPVectorization("vectorize-slp",
+                    cl::desc("Run the SLP vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize-slp-aggressive",
+                    cl::desc("Run the BB vectorization passes"));
 
 static cl::opt<bool>
 UseGVNAfterVectorization("use-gvn-after-vectorization",
@@ -52,7 +57,8 @@ PassManagerBuilder::PassManagerBuilder() {
     DisableSimplifyLibCalls = false;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
-    Vectorize = RunBBVectorization;
+    BBVectorize = RunBBVectorization;
+    SLPVectorize = RunSLPVectorization;
     LoopVectorize = RunLoopVectorization;
 }
 
@@ -207,7 +213,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
-  if (Vectorize) {
+  if (SLPVectorize)
+    MPM.add(createSLPVectorizerPass());     // Vectorize parallel scalar chains.
+
+  if (BBVectorize) {
     MPM.add(createBBVectorizePass());
     MPM.add(createInstructionCombiningPass());
     if (OptLevel > 1 && UseGVNAfterVectorization)
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index 72cfe2c985bc..a25696ec03a9 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -9,7 +9,7 @@ add_llvm_library(LLVMInstCombine
   InstCombineMulDivRem.cpp
   InstCombinePHI.cpp
   InstCombineSelect.cpp
-  InstCombineShifts.cpp 
+  InstCombineShifts.cpp
   InstCombineSimplifyDemanded.cpp
   InstCombineVectorOps.cpp
   )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 1f6a3a5e335d..2a3607475009 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -233,6 +233,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
   bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
   Value *EmitGEPOffset(User *GEP);
+  Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
 
 public:
   // InsertNewInstBefore - insert an instruction New before instruction Old
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3c5781ca73e0..b96eb51081ee 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -24,9 +24,9 @@ namespace {
   /// Class representing coefficient of floating-point addend.
   /// This class needs to be highly efficient, which is especially true for
   /// the constructor. As of I write this comment, the cost of the default
-  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to 
+  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
   /// perform write-merging).
-  /// 
+  ///
   class FAddendCoef {
   public:
     // The constructor has to initialize a APFloat, which is uncessary for
@@ -37,31 +37,31 @@ namespace {
     //
     FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
     ~FAddendCoef();
-  
+
     void set(short C) {
       assert(!insaneIntVal(C) && "Insane coefficient");
       IsFp = false; IntVal = C;
     }
-  
+
     void set(const APFloat& C);
-  
+
     void negate();
-  
+
     bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
     Value *getValue(Type *) const;
-  
+
     // If possible, don't define operator+/operator- etc because these
     // operators inevitably call FAddendCoef's constructor which is not cheap.
     void operator=(const FAddendCoef &A);
     void operator+=(const FAddendCoef &A);
     void operator-=(const FAddendCoef &A);
     void operator*=(const FAddendCoef &S);
-  
+
     bool isOne() const { return isInt() && IntVal == 1; }
     bool isTwo() const { return isInt() && IntVal == 2; }
     bool isMinusOne() const { return isInt() && IntVal == -1; }
     bool isMinusTwo() const { return isInt() && IntVal == -2; }
-  
+
   private:
     bool insaneIntVal(int V) { return V > 4 || V < -4; }
     APFloat *getFpValPtr(void)
@@ -74,18 +74,28 @@ namespace {
       return *getFpValPtr();
     }
 
-    APFloat &getFpVal(void)
-      { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
-  
+    APFloat &getFpVal(void) {
+      assert(IsFp && BufHasFpVal && "Incorret state");
+      return *getFpValPtr();
+    }
+
     bool isInt() const { return !IsFp; }
 
+    // If the coefficient is represented by an integer, promote it to a
+    // floating point.
+    void convertToFpType(const fltSemantics &Sem);
+
+    // Construct an APFloat from a signed integer.
+    // TODO: We should get rid of this function when APFloat can be constructed
+    //       from an *SIGNED* integer.
+    APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
   private:
 
     bool IsFp;
-  
+
     // True iff FpValBuf contains an instance of APFloat.
     bool BufHasFpVal;
-  
+
     // The integer coefficient of an individual addend is either 1 or -1,
     // and we try to simplify at most 4 addends from neighboring at most
     // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
@@ -94,7 +104,7 @@ namespace {
 
     AlignedCharArrayUnion<APFloat> FpValBuf;
   };
-  
+
   /// FAddend is used to represent floating-point addend. An addend is
   /// represented as <C, V>, where the V is a symbolic value, and C is a
   /// constant coefficient. A constant addend is represented as <C, 0>.
@@ -102,10 +112,10 @@ namespace {
   class FAddend {
   public:
     FAddend() { Val = 0; }
-  
+
     Value *getSymVal (void) const { return Val; }
     const FAddendCoef &getCoef(void) const { return Coeff; }
-  
+
     bool isConstant() const { return Val == 0; }
     bool isZero() const { return Coeff.isZero(); }
 
@@ -114,17 +124,17 @@ namespace {
       { Coeff.set(Coefficient); Val = V; }
     void set(const ConstantFP* Coefficient, Value *V)
       { Coeff.set(Coefficient->getValueAPF()); Val = V; }
-  
+
     void negate() { Coeff.negate(); }
-  
+
     /// Drill down the U-D chain one step to find the definition of V, and
     /// try to break the definition into one or two addends.
     static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
-  
+
     /// Similar to FAddend::drillDownOneStep() except that the value being
     /// splitted is the addend itself.
     unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
-  
+
     void operator+=(const FAddend &T) {
       assert((Val == T.Val) && "Symbolic-values disagree");
       Coeff += T.Coeff;
@@ -132,12 +142,12 @@ namespace {
 
   private:
     void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
-  
+
     // This addend has the value of "Coeff * Val".
     Value *Val;
     FAddendCoef Coeff;
   };
-  
+
   /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
   /// with its neighboring at most two instructions.
   ///
@@ -145,17 +155,17 @@ namespace {
   public:
     FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
     Value *simplify(Instruction *FAdd);
-  
+
   private:
     typedef SmallVector<const FAddend*, 4> AddendVect;
-  
+
     Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
 
     Value *performFactorization(Instruction *I);
 
     /// Convert given addend to a Value
     Value *createAddendVal(const FAddend &A, bool& NeedNeg);
-    
+
     /// Return the number of instructions needed to emit the N-ary addition.
     unsigned calcInstrNumber(const AddendVect& Vect);
     Value *createFSub(Value *Opnd0, Value *Opnd1);
@@ -165,10 +175,10 @@ namespace {
     Value *createFNeg(Value *V);
     Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
     void createInstPostProc(Instruction *NewInst);
-  
+
     InstCombiner::BuilderTy *Builder;
     Instruction *Instr;
-  
+
   private:
      // Debugging stuff are clustered here.
     #ifndef NDEBUG
@@ -180,7 +190,7 @@ namespace {
       void incCreateInstNum() {}
     #endif
   };
-} 
+}
 
 //===----------------------------------------------------------------------===//
 //
@@ -203,10 +213,34 @@ void FAddendCoef::set(const APFloat& C) {
   } else
     *P = C;
 
-  IsFp = BufHasFpVal = true; 
+  IsFp = BufHasFpVal = true;
 }
 
-void FAddendCoef::operator=(const FAddendCoef& That) {
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+  if (!isInt())
+    return;
+
+  APFloat *P = getFpValPtr();
+  if (IntVal > 0)
+    new(P) APFloat(Sem, IntVal);
+  else {
+    new(P) APFloat(Sem, 0 - IntVal);
+    P->changeSign();
+  }
+  IsFp = BufHasFpVal = true;
+}
+
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+  if (Val >= 0)
+    return APFloat(Sem, Val);
+
+  APFloat T(Sem, 0 - Val);
+  T.changeSign();
+
+  return T;
+}
+
+void FAddendCoef::operator=(const FAddendCoef &That) {
   if (That.isInt())
     set(That.IntVal);
   else
@@ -222,16 +256,16 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
       getFpVal().add(That.getFpVal(), RndMode);
     return;
   }
-  
+
   if (isInt()) {
     const APFloat &T = That.getFpVal();
-    set(T);
-    getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode);
+    convertToFpType(T.getSemantics());
+    getFpVal().add(T, RndMode);
     return;
   }
-  
+
   APFloat &T = getFpVal();
-  T.add(APFloat(T.getSemantics(), That.IntVal), RndMode);
+  T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
 }
 
 void FAddendCoef::operator-=(const FAddendCoef &That) {
@@ -243,16 +277,16 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
       getFpVal().subtract(That.getFpVal(), RndMode);
     return;
   }
-  
+
   if (isInt()) {
     const APFloat &T = That.getFpVal();
-    set(T);
-    getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+    convertToFpType(T.getSemantics());
+    getFpVal().subtract(T, RndMode);
     return;
   }
 
   APFloat &T = getFpVal();
-  T.subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+  T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode);
 }
 
 void FAddendCoef::operator*=(const FAddendCoef &That) {
@@ -271,15 +305,16 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
     return;
   }
 
-  const fltSemantics &Semantic = 
+  const fltSemantics &Semantic =
     isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
 
   if (isInt())
-    set(APFloat(Semantic, IntVal));
+    convertToFpType(Semantic);
   APFloat &F0 = getFpVal();
 
   if (That.isInt())
-    F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven);
+    F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+                APFloat::rmNearestTiesToEven);
   else
     F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
 
@@ -305,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const {
 //  A - B                     <1, A>, <1,B>
 //  0 - B                     <-1, B>
 //  C * A,                    <C, A>
-//  A + C                     <1, A> <C, NULL> 
+//  A + C                     <1, A> <C, NULL>
 //  0 +/- 0                   <0, NULL> (corner case)
 //
 // Legend: A and B are not constant, C is constant
-// 
+//
 unsigned FAddend::drillValueDownOneStep
   (Value *Val, FAddend &Addend0, FAddend &Addend1) {
   Instruction *I = 0;
@@ -380,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep
     return 0;
 
   unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
-  if (!BreakNum || Coeff.isOne()) 
+  if (!BreakNum || Coeff.isOne())
     return BreakNum;
 
   Addend0.Scale(Coeff);
@@ -402,10 +437,10 @@ unsigned FAddend::drillAddendDownOneStep
 Value *FAddCombine::performFactorization(Instruction *I) {
   assert((I->getOpcode() == Instruction::FAdd ||
           I->getOpcode() == Instruction::FSub) && "Expect add/sub");
-  
+
   Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
   Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
-  
+
   if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
     return 0;
 
@@ -420,14 +455,14 @@ Value *FAddCombine::performFactorization(Instruction *I) {
   Value *Opnd1_0 = I1->getOperand(0);
   Value *Opnd1_1 = I1->getOperand(1);
 
-  //  Input Instr I       Factor   AddSub0  AddSub1 
+  //  Input Instr I       Factor   AddSub0  AddSub1
   //  ----------------------------------------------
   // (x*y) +/- (x*z)        x        y         z
   // (y/x) +/- (z/x)        x        y         z
   //
   Value *Factor = 0;
   Value *AddSub0 = 0, *AddSub1 = 0;
-  
+
   if (isMpy) {
     if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
       Factor = Opnd0_0;
@@ -459,7 +494,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
 
   if (isMpy)
     return createFMul(Factor, NewAddSub);
- 
+
   return createFDiv(NewAddSub, Factor);
 }
 
@@ -473,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
   assert((I->getOpcode() == Instruction::FAdd ||
           I->getOpcode() == Instruction::FSub) && "Expect add/sub");
 
-  // Save the instruction before calling other member-functions. 
+  // Save the instruction before calling other member-functions.
   Instr = I;
 
   FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
@@ -484,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
   unsigned Opnd0_ExpNum = 0;
   unsigned Opnd1_ExpNum = 0;
 
-  if (!Opnd0.isConstant()) 
+  if (!Opnd0.isConstant())
     Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
 
   // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
@@ -506,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) {
 
     Value *V0 = I->getOperand(0);
     Value *V1 = I->getOperand(1);
-    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&  
+    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
                  (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
 
     if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
@@ -546,7 +581,7 @@ Value *FAddCombine::simplify(Instruction *I) {
       return R;
   }
 
-  // step 6: Try factorization as the last resort, 
+  // step 6: Try factorization as the last resort,
   return performFactorization(I);
 }
 
@@ -555,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
   unsigned AddendNum = Addends.size();
   assert(AddendNum <= 4 && "Too many addends");
 
-  // For saving intermediate results; 
+  // For saving intermediate results;
   unsigned NextTmpIdx = 0;
   FAddend TmpResult[3];
 
@@ -571,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
   AddendVect SimpVect;
 
   // The outer loop works on one symbolic-value at a time. Suppose the input
-  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... 
+  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
   // The symbolic-values will be processed in this order: x, y, z.
   //
   for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
@@ -598,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
       if (T && T->getSymVal() == Val) {
         // Set null such that next iteration of the outer loop will not process
         // this addend again.
-        Addends[SameSymIdx] = 0; 
+        Addends[SameSymIdx] = 0;
         SimpVect.push_back(T);
       }
     }
@@ -611,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
         R += *SimpVect[Idx];
 
       // Pop all addends being folded and push the resulting folded addend.
-      SimpVect.resize(StartIdx); 
+      SimpVect.resize(StartIdx);
       if (Val != 0) {
         if (!R.isZero()) {
           SimpVect.push_back(&R);
@@ -624,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
     }
   }
 
-  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && 
+  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
          "out-of-bound access");
 
   if (ConstAdd)
@@ -646,7 +681,7 @@ Value *FAddCombine::createNaryFAdd
   assert(!Opnds.empty() && "Expect at least one addend");
 
   // Step 1: Check if the # of instructions needed exceeds the quota.
-  // 
+  //
   unsigned InstrNeeded = calcInstrNumber(Opnds);
   if (InstrNeeded > InstrQuota)
     return 0;
@@ -667,7 +702,7 @@ Value *FAddCombine::createNaryFAdd
   // Iterate the addends, creating fadd/fsub using adjacent two addends.
   for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
        I != E; I++) {
-    bool NeedNeg; 
+    bool NeedNeg;
     Value *V = createAddendVal(**I, NeedNeg);
     if (!LastVal) {
       LastVal = V;
@@ -693,7 +728,7 @@ Value *FAddCombine::createNaryFAdd
   }
 
   #ifndef NDEBUG
-    assert(CreateInstrNum == InstrNeeded && 
+    assert(CreateInstrNum == InstrNeeded &&
            "Inconsistent in instruction numbers");
   #endif
 
@@ -751,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
   unsigned OpndNum = Opnds.size();
   unsigned InstrNeeded = OpndNum - 1;
 
-  // The number of addends in the form of "(-1)*x". 
-  unsigned NegOpndNum = 0; 
+  // The number of addends in the form of "(-1)*x".
+  unsigned NegOpndNum = 0;
 
   // Adjust the number of instructions needed to emit the N-ary add.
   for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 990cbc3d594e..ec75dd2e0425 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   return 0;
 }
 
-
-/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi).  In practice, we emit the more efficient
+/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
+/// (V < Lo || V >= Hi).  In practice, we emit the more efficient
 /// (V-Lo) \<u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
 /// whether to treat the V, Lo and HI as signed or not. IB is the location to
 /// insert new instructions.
@@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
       RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+    if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+      return 0;
+
     // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
     if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
       if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     switch (RHSCC) {
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:
-      if (LHSCst == SubOne(RHSCst)) {
-        // (X == 13 | X == 14) -> X-13 <u 2
-        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
-        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
-        return Builder->CreateICmpULT(Add, AddCST);
-      }
-
       if (LHS->getOperand(0) == RHS->getOperand(0)) {
         // if LHSCst and RHSCst differ only by one bit:
         // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
@@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
         }
       }
 
+      if (LHSCst == SubOne(RHSCst)) {
+        // (X == 13 | X == 14) -> X-13 <u 2
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+        return Builder->CreateICmpULT(Add, AddCST);
+      }
+
       break;                         // (X == 13 | X == 15) -> no change
     case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
     case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 64cd1bd27891..78b4a2c6c9eb 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
         NestF->getType() == PointerType::getUnqual(NewFTy) ?
         NestF : ConstantExpr::getBitCast(NestF,
                                          PointerType::getUnqual(NewFTy));
-      const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
+      const AttributeSet &NewPAL =
+          AttributeSet::get(FTy->getContext(), NewAttrs);
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index d162223a6f55..2ee1278d23dc 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1610,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
 /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
 /// bitcast.  The various long double bitcasts can't get in here.
 static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+  // We need to know the target byte order to perform this optimization.
+  if (!IC.getDataLayout()) return 0;
+
   Value *Src = CI.getOperand(0);
   Type *DestTy = CI.getType();
 
@@ -1631,7 +1634,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
         VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
       }
 
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+      unsigned Elt = 0;
+      if (IC.getDataLayout()->isBigEndian())
+        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
     }
   }
 
@@ -1653,6 +1659,8 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
       }
 
       unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+      if (IC.getDataLayout()->isBigEndian())
+        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
       return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
     }
   }
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 32fdb9b708ba..4c252c03d086 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
   }
 }
 
+/// Returns true if the exploded icmp can be expressed as a signed comparison
+/// to zero and updates the predicate accordingly.
+/// The signedness of the comparison is preserved.
+static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) {
+  if (!ICmpInst::isSigned(pred))
+    return false;
+
+  if (RHS->isZero())
+    return ICmpInst::isRelational(pred);
+
+  if (RHS->isOne()) {
+    if (pred == ICmpInst::ICMP_SLT) {
+      pred = ICmpInst::ICMP_SLE;
+      return true;
+    }
+  } else if (RHS->isAllOnesValue()) {
+    if (pred == ICmpInst::ICMP_SGT) {
+      pred = ICmpInst::ICMP_SGE;
+      return true;
+    }
+  }
+
+  return false;
+}
+
 // isHighOnes - Return true if the constant is of the form 1+0+.
 // This is the same as lowones(~X).
 static bool isHighOnes(const ConstantInt *CI) {
@@ -207,7 +232,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   Constant *Init = GV->getInitializer();
   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
     return 0;
-  
+
   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
   if (ArrayElementCount > 1024) return 0;  // Don't blow up on huge arrays.
 
@@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   }
 
 
-  // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+  // If a magic bitvector captures the entire comparison state
   // of this load, replace it with computation that does:
   //   ((magic_cst >> i) & 1) != 0
-  if (ArrayElementCount <= 32 ||
-      (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) {
-    Type *Ty;
-    if (ArrayElementCount <= 32)
+  {
+    Type *Ty = 0;
+
+    // Look for an appropriate type:
+    // - The type of Idx if the magic fits
+    // - The smallest fitting legal type if we have a DataLayout
+    // - Default to i32
+    if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
+      Ty = Idx->getType();
+    else if (TD)
+      Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+    else if (ArrayElementCount <= 32)
       Ty = Type::getInt32Ty(Init->getContext());
-    else
-      Ty = Type::getInt64Ty(Init->getContext());
-    Value *V = Builder->CreateIntCast(Idx, Ty, false);
-    V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
-    V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
-    return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+
+    if (Ty != 0) {
+      Value *V = Builder->CreateIntCast(Idx, Ty, false);
+      V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+      V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+      return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+    }
   }
 
   return 0;
@@ -1273,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     break;
   }
 
+  case Instruction::Mul: {       // (icmp pred (mul X, Val), CI)
+    ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!Val) break;
+
+    // If this is a signed comparison to 0 and the mul is sign preserving,
+    // use the mul LHS operand instead.
+    ICmpInst::Predicate pred = ICI.getPredicate();
+    if (isSignTest(pred, RHS) && !Val->isZero() &&
+        cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+      return new ICmpInst(Val->isNegative() ?
+                          ICmpInst::getSwappedPredicate(pred) : pred,
+                          LHSI->getOperand(0),
+                          Constant::getNullValue(RHS->getType()));
+
+    break;
+  }
+
   case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
     ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
     if (!ShAmt) break;
@@ -1304,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
                             ConstantExpr::getLShr(RHS, ShAmt));
 
+      // If the shift is NSW and we compare to 0, then it is just shifting out
+      // sign bits, no need for an AND either.
+      if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantExpr::getLShr(RHS, ShAmt));
+
       if (LHSI->hasOneUse()) {
         // Otherwise strength reduce the shift into an and.
         uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1318,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       }
     }
 
+    // If this is a signed comparison to 0 and the shift is sign preserving,
+    // use the shift LHS operand instead.
+    ICmpInst::Predicate pred = ICI.getPredicate();
+    if (isSignTest(pred, RHS) &&
+        cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+      return new ICmpInst(pred,
+                          LHSI->getOperand(0),
+                          Constant::getNullValue(RHS->getType()));
+
     // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
     bool TrueIfSigned = false;
     if (LHSI->hasOneUse() &&
@@ -1532,6 +1598,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
             return new ICmpInst(pred, X, NegX);
           }
         }
+        break;
+      case Instruction::Mul:
+        if (RHSV == 0 && BO->hasNoSignedWrap()) {
+          if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+            // The trivial case (mul X, 0) is handled by InstSimplify
+            // General case : (mul X, C) != 0 iff X != 0
+            //                (mul X, C) == 0 iff X == 0
+            if (!BOC->isZero())
+              return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                                  Constant::getNullValue(RHS->getType()));
+          }
+        }
+        break;
       default: break;
       }
     } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
@@ -2408,6 +2487,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       return new ICmpInst(Pred, Y, Z);
     }
 
+    // icmp slt (X + -1), Y -> icmp sle X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+        match(B, m_AllOnes()))
+      return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+    // icmp sge (X + -1), Y -> icmp sgt X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+        match(B, m_AllOnes()))
+      return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+    // icmp sle (X + 1), Y -> icmp slt X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
+        match(B, m_One()))
+      return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+    // icmp sgt (X + 1), Y -> icmp sge X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
+        match(B, m_One()))
+      return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+    // if C1 has greater magnitude than C2:
+    //  icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+    //  s.t. C3 = C1 - C2
+    //
+    // if C2 has greater magnitude than C1:
+    //  icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+    //  s.t. C3 = C2 - C1
+    if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+        (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+      if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+        if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+          const APInt &AP1 = C1->getValue();
+          const APInt &AP2 = C2->getValue();
+          if (AP1.isNegative() == AP2.isNegative()) {
+            APInt AP1Abs = C1->getValue().abs();
+            APInt AP2Abs = C2->getValue().abs();
+            if (AP1Abs.uge(AP2Abs)) {
+              ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+              Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+              return new ICmpInst(Pred, NewAdd, C);
+            } else {
+              ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+              Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+              return new ICmpInst(Pred, A, NewAdd);
+            }
+          }
+        }
+
+
     // Analyze the case when either Op0 or Op1 is a sub instruction.
     // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
     A = 0; B = 0; C = 0; D = 0;
@@ -2541,6 +2669,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   }
 
   { Value *A, *B;
+    // Transform (A & ~B) == 0 --> (A & B) != 0
+    // and       (A & ~B) != 0 --> (A & B) == 0
+    // if A is a power of 2.
+    if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+        match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality())
+      return new ICmpInst(I.getInversePredicate(),
+                          Builder->CreateAnd(A, B),
+                          Op1);
+
     // ~x < ~y --> y < x
     // ~x < cst --> ~cst < x
     if (match(Op0, m_Not(m_Value(A)))) {
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 337cfe32a869..e2d7966cb3ed 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       // If the GEP has all zero indices, it doesn't offset the pointer.  If it
       // doesn't, it does.
-      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
-                                          IsOffset || !GEP->hasAllZeroIndices()))
+      if (!isOnlyCopiedFromConstantGlobal(
+              GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
         return false;
       continue;
     }
@@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
   if (AI.isArrayAllocation()) {  // Check C != 1
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
-      Type *NewTy = 
+      Type *NewTy =
         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
       AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
       New->setAlignment(AI.getAlignment());
@@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
 
     Type *SrcPTy = SrcTy->getElementType();
 
-    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 
+    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
          DestPTy->isVectorTy()) {
       // If the source is an array, the code below will not succeed.  Check to
       // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
           }
 
       if (IC.getDataLayout() &&
-          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 
+          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
             SrcPTy->isVectorTy()) &&
           // Do not allow turning this into a load of an integer, which is then
           // casted to a pointer, this pessimizes pointer analysis a lot.
@@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         // Okay, we are casting from one integer or pointer type to another of
         // the same size.  Instead of casting the pointer before the load, cast
         // the result of the loaded value.
-        LoadInst *NewLoad = 
+        LoadInst *NewLoad =
           IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
         NewLoad->setAlignment(LI.getAlignment());
         NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   // None of the following transforms are legal for volatile/atomic loads.
   // FIXME: Some of it is okay for atomic loads; needs refactoring.
   if (!LI.isSimple()) return 0;
-  
+
   // Do really simple store-to-load forwarding and load CSE, to catch cases
   // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
@@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
                     Constant::getNullValue(Op->getType()), &LI);
       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
-  } 
+  }
 
   // load null/undef -> unreachable
   // TODO: Consider a target hook for valid address spaces for this xform.
@@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     if (CE->isCast())
       if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
         return Res;
-  
+
   if (Op->hasOneUse()) {
     // Change select and PHI nodes to select values instead of addresses: this
     // helps alias analysis out a lot, allows many others simplifications, and
@@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
   PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
   if (SrcTy == 0) return 0;
-  
+
   Type *SrcPTy = SrcTy->getElementType();
 
   if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
     return 0;
-  
+
   /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
   /// to its first element.  This allows us to handle things like:
   ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
   /// on 32-bit hosts.
   SmallVector<Value*, 4> NewGEPIndices;
-  
+
   // If the source is an array, the code below will not succeed.  Check to
   // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
   // constants.
@@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
     // Index through pointer.
     Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
     NewGEPIndices.push_back(Zero);
-    
+
     while (1) {
       if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
         if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
         break;
       }
     }
-    
+
     SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
   }
 
   if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
     return 0;
-  
+
   // If the pointers point into different address spaces or if they point to
   // values with different sizes, we can't do the transformation.
   if (!IC.getDataLayout() ||
-      SrcTy->getAddressSpace() != 
+      SrcTy->getAddressSpace() !=
         cast<PointerType>(CI->getType())->getAddressSpace() ||
       IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
       IC.getDataLayout()->getTypeSizeInBits(DestPTy))
     return 0;
 
   // Okay, we are casting from one integer or pointer type to another of
-  // the same size.  Instead of casting the pointer before 
+  // the same size.  Instead of casting the pointer before
   // the store, cast the value to be stored.
   Value *NewCast;
   Value *SIOp0 = SI.getOperand(0);
@@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
     if (SIOp0->getType()->isPointerTy())
       opcode = Instruction::PtrToInt;
   }
-  
+
   // SIOp0 is a pointer to aggregate and this is a store to the first field,
   // emit a GEP to index into its first field.
   if (!NewGEPIndices.empty())
     CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-  
+
   NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
                                    SIOp0->getName()+".c");
   SI.setOperand(0, NewCast);
@@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
 static bool equivalentAddressValues(Value *A, Value *B) {
   // Test if the values are trivially equivalent.
   if (A == B) return true;
-  
+
   // Test if the values come form identical arithmetic instructions.
   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
   // its only used to compare two uses within the same basic block, which
@@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
     if (Instruction *BI = dyn_cast<Instruction>(B))
       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
-  
+
   // Otherwise they may not be equivalent.
   return false;
 }
@@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   // If the RHS is an alloca with a single use, zapify the store, making the
   // alloca dead.
   if (Ptr->hasOneUse()) {
-    if (isa<AllocaInst>(Ptr)) 
+    if (isa<AllocaInst>(Ptr))
       return EraseInstFromFunction(SI);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
       if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
       ScanInsts++;
       continue;
-    }    
-    
+    }
+
     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
       // Prev store isn't volatile, and stores to the same location?
       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       }
       break;
     }
-    
+
     // If this is a load, we have to stop.  However, if the loaded value is from
     // the pointer we're loading and is producing the pointer we're storing,
     // then *this* store is dead (X = load P; store X -> P).
@@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
           LI->isSimple())
         return EraseInstFromFunction(SI);
-      
+
       // Otherwise, this is a load from some other location.  Stores before it
       // may not be dead.
       break;
     }
-    
+
     // Don't skip over loads or things that can modify memory.
     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
       break;
@@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       if (Instruction *Res = InstCombineStoreToCast(*this, SI))
         return Res;
 
-  
+
   // If this store is the last instruction in the basic block (possibly
   // excepting debug info instructions), and if the block ends with an
   // unconditional branch, try to move it to the successor block.
-  BBI = &SI; 
+  BBI = &SI;
   do {
     ++BBI;
   } while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
     if (BI->isUnconditional())
       if (SimplifyStoreAtEndOfBlock(SI))
         return 0;  // xform done!
-  
+
   return 0;
 }
 
@@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
 ///
 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   BasicBlock *StoreBB = SI.getParent();
-  
+
   // Check to see if the successor block has exactly two incoming edges.  If
   // so, see if the other predecessor contains a store to the same location.
   // if so, insert a PHI node (if needed) and move the stores down.
   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-  
+
   // Determine whether Dest has exactly two predecessors and, if so, compute
   // the other predecessor.
   pred_iterator PI = pred_begin(DestBB);
@@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
 
   if (++PI == pred_end(DestBB))
     return false;
-  
+
   P = *PI;
   if (P != StoreBB) {
     if (OtherBB)
@@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
   if (!OtherBr || BBI == OtherBB->begin())
     return false;
-  
+
   // If the other block ends in an unconditional branch, check for the 'if then
   // else' case.  there is an instruction before the branch.
   StoreInst *OtherStore = 0;
@@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   } else {
     // Otherwise, the other block ended with a conditional branch. If one of the
     // destinations is StoreBB, then we have the if/then case.
-    if (OtherBr->getSuccessor(0) != StoreBB && 
+    if (OtherBr->getSuccessor(0) != StoreBB &&
         OtherBr->getSuccessor(1) != StoreBB)
       return false;
-    
+
     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
     // if/then triangle.  See if there is a store to the same ptr as SI that
     // lives in OtherBB.
@@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
           BBI == OtherBB->begin())
         return false;
     }
-    
+
     // In order to eliminate the store in OtherBr, we have to
     // make sure nothing reads or overwrites the stored value in
     // StoreBB.
@@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
         return false;
     }
   }
-  
+
   // Insert a PHI node now if we need it.
   Value *MergedVal = OtherStore->getOperand(0);
   if (MergedVal != SI.getOperand(0)) {
@@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
     MergedVal = InsertNewInstBefore(PN, DestBB->front());
   }
-  
+
   // Advance to a place where it is safe to insert the new store and
   // insert it.
   BBI = DestBB->getFirstInsertionPt();
@@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
                                    SI.getOrdering(),
                                    SI.getSynchScope());
   InsertNewInstBefore(NewSI, *BBI);
-  NewSI->setDebugLoc(OtherStore->getDebugLoc()); 
+  NewSI->setDebugLoc(OtherStore->getDebugLoc());
 
   // If the two stores had the same TBAA tag, preserve it.
   if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
@@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
                                OtherStore->getMetadata(LLVMContext::MD_tbaa))))
       NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
 
-  
+
   // Nuke the old stores.
   EraseInstFromFunction(SI);
   EraseInstFromFunction(*OtherStore);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 173f2bf63304..df7390652f10 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   // if this is safe.  For example, the use could be in dynamically unreached
   // code.
   if (!V->hasOneUse()) return 0;
-  
+
   bool MadeChange = false;
 
   // ((1 << A) >>u B) --> (1 << (A-B))
@@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
     A = IC.Builder->CreateSub(A, B);
     return IC.Builder->CreateShl(PowerOf2, A);
   }
-  
+
   // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
   // inexact.  Similarly for <<.
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
@@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
         I->setOperand(0, V2);
         MadeChange = true;
       }
-      
+
       if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
         I->setIsExact();
         MadeChange = true;
       }
-      
+
       if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
         I->setHasNoUnsignedWrap();
         MadeChange = true;
@@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   // TODO: Lots more we could do here:
   //    If V is a phi node, we can call this on each of its operands.
   //    "select cond, X, 0" can simplify to "X".
-  
+
   return MadeChange ? V : 0;
 }
 
@@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
     LHSExt = LHSExt.zext(W * 2);
     RHSExt = RHSExt.zext(W * 2);
   }
-  
+
   APInt MulExt = LHSExt * RHSExt;
-  
+
   if (!sign)
     return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
-  
+
   APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
   APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
   return MulExt.slt(Min) || MulExt.sgt(Max);
@@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
   if (match(Op1, m_AllOnes()))  // X * -1 == 0 - X
     return BinaryOperator::CreateNeg(Op0, I.getName());
-  
+
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-    
+
     // ((X << C1)*C2) == (X * (C2 << C1))
     if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
       if (SI->getOpcode() == Instruction::Shl)
         if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
           return BinaryOperator::CreateMul(SI->getOperand(0),
                                            ConstantExpr::getShl(CI, ShOp));
-    
+
     const APInt &Val = CI->getValue();
     if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
       Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
@@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
       return Shl;
     }
-    
+
     // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
     { Value *X; ConstantInt *C1;
       if (Op0->hasOneUse() &&
@@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       }
     }
   }
-  
+
   // Simplify mul instructions with a constant RHS.
-  if (isa<Constant>(Op1)) {    
+  if (isa<Constant>(Op1)) {
     // Try to fold constant mul into select arguments.
     if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
       if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     Value *Op1C = Op1;
     BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
     if (!BO ||
-        (BO->getOpcode() != Instruction::UDiv && 
+        (BO->getOpcode() != Instruction::UDiv &&
          BO->getOpcode() != Instruction::SDiv)) {
       Op1C = Op0;
       BO = dyn_cast<BinaryOperator>(Op1);
@@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     if (match(Op1, m_Shl(m_One(), m_Value(Y))))
       return BinaryOperator::CreateShl(Op0, Y);
   }
-  
+
   // If one of the operands of the multiply is a cast from a boolean value, then
   // we know the bool is either zero or one, so this is a 'masking' multiply.
   //   X * Y (where Y is 0 or 1) -> X & (0-Y)
   if (!I.getType()->isVectorTy()) {
     // -2 is "-1 << 1" so it is all bits set except the low one.
     APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
-    
+
     Value *BoolCast = 0, *OtherOp = 0;
     if (MaskedValueIsZero(Op0, Negative2))
       BoolCast = Op0, OtherOp = Op1;
@@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
      return;
    if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
      return;
-              
+
    ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
    if (CFP && CFP->isExactlyValue(0.5)) {
      Y = I->getOperand(1);
@@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
    CFP = dyn_cast<ConstantFP>(I->getOperand(1));
    if (CFP && CFP->isExactlyValue(0.5))
      Y = I->getOperand(0);
-} 
+}
 
 /// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
 /// true iff the given value is FMul or FDiv with one and only one operand
 /// being a normal constant (i.e. not Zero/NaN/Infinity).
 static bool isFMulOrFDivWithConstant(Value *V) {
   Instruction *I = dyn_cast<Instruction>(V);
-  if (!I || (I->getOpcode() != Instruction::FMul && 
+  if (!I || (I->getOpcode() != Instruction::FMul &&
              I->getOpcode() != Instruction::FDiv))
     return false;
 
@@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) {
 /// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
 /// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
 /// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
-/// This function is to simplify "FMulOrDiv * C" and returns the 
+/// This function is to simplify "FMulOrDiv * C" and returns the
 /// resulting expression. Note that this function could return NULL in
 /// case the constants cannot be folded into a normal floating-point.
-/// 
+///
 Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
                                    Instruction *InsertBefore) {
   assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
@@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
       if (isNormalFp(F)) {
         R = BinaryOperator::CreateFMul(Opnd0, F);
       } else {
-        // (X / C1) * C => X / (C1/C) 
+        // (X / C1) * C => X / (C1/C)
         Constant *F = ConstantExpr::getFDiv(C1, C);
         if (isNormalFp(cast<ConstantFP>(F)))
           R = BinaryOperator::CreateFDiv(Opnd0, F);
@@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
         if (C0) {
           std::swap(C0, C1);
           std::swap(Opnd0, Opnd1);
-          Swap = true; 
+          Swap = true;
         }
 
         if (C1 && C1->getValueAPF().isNormal() &&
             isFMulOrFDivWithConstant(Opnd0)) {
           Value *M1 = ConstantExpr::getFMul(C1, C);
-          Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? 
+          Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
                       foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
                       0;
           if (M0 && M1) {
@@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     }
 
     // (X*Y) * X => (X*X) * Y where Y != X
-    //  The purpose is two-fold: 
+    //  The purpose is two-fold:
     //   1) to form a power expression (of X).
     //   2) potentially shorten the critical path: After transformation, the
     //  latency of the instruction Y is amortized by the expression of X*X,
@@ -537,7 +537,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
 /// instruction.
 bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
   SelectInst *SI = cast<SelectInst>(I.getOperand(1));
-  
+
   // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
   int NonNullOperand = -1;
   if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
@@ -547,36 +547,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
   if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
     if (ST->isNullValue())
       NonNullOperand = 1;
-  
+
   if (NonNullOperand == -1)
     return false;
-  
+
   Value *SelectCond = SI->getOperand(0);
-  
+
   // Change the div/rem to use 'Y' instead of the select.
   I.setOperand(1, SI->getOperand(NonNullOperand));
-  
+
   // Okay, we know we replace the operand of the div/rem with 'Y' with no
   // problem.  However, the select, or the condition of the select may have
   // multiple uses.  Based on our knowledge that the operand must be non-zero,
   // propagate the known value for the select into other uses of it, and
   // propagate a known value of the condition into its other users.
-  
+
   // If the select and condition only have a single use, don't bother with this,
   // early exit.
   if (SI->use_empty() && SelectCond->hasOneUse())
     return true;
-  
+
   // Scan the current block backward, looking for other uses of SI.
   BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
-  
+
   while (BBI != BBFront) {
     --BBI;
     // If we found a call to a function, we can't assume it will return, so
     // information from below it cannot be propagated above it.
     if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
       break;
-    
+
     // Replace uses of the select or its condition with the known values.
     for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
          I != E; ++I) {
@@ -589,17 +589,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
         Worklist.Add(BBI);
       }
     }
-    
+
     // If we past the instruction, quit looking for it.
     if (&*BBI == SI)
       SI = 0;
     if (&*BBI == SelectCond)
       SelectCond = 0;
-    
+
     // If we ran out of things to eliminate, break out of the loop.
     if (SelectCond == 0 && SI == 0)
       break;
-    
+
   }
   return true;
 }
@@ -617,7 +617,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     I.setOperand(1, V);
     return &I;
   }
-  
+
   // Handle cases involving: [su]div X, (select Cond, Y, Z)
   // This does not apply for fdiv.
   if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
@@ -683,16 +683,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   // Handle the integer div common cases
   if (Instruction *Common = commonIDivTransforms(I))
     return Common;
-  
-  { 
+
+  {
     // X udiv 2^C -> X >> C
     // Check to see if this is an unsigned division with an exact power of 2,
     // if so, convert to a right shift.
     const APInt *C;
     if (match(Op1, m_Power2(C))) {
       BinaryOperator *LShr =
-      BinaryOperator::CreateLShr(Op0, 
-                                 ConstantInt::get(Op0->getType(), 
+      BinaryOperator::CreateLShr(Op0,
+                                 ConstantInt::get(Op0->getType(),
                                                   C->logBase2()));
       if (I.isExact()) LShr->setIsExact();
       return LShr;
@@ -732,7 +732,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       return BinaryOperator::CreateLShr(Op0, N);
     }
   }
-  
+
   // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
   // where C1&C2 are powers of two.
   { Value *Cond; const APInt *C1, *C2;
@@ -740,11 +740,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       // Construct the "on true" case of the select
       Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
                                        I.isExact());
-  
+
       // Construct the "on false" case of the select
       Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
                                        I.isExact());
-      
+
       // construct the select instruction and return it.
       return SelectInst::Create(Cond, TSI, FSI);
     }
@@ -799,7 +799,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
         // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
         return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
       }
-      
+
       if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
         // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
         // Safe because the only negative value (1 << Y) can take on is
@@ -809,13 +809,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
       }
     }
   }
-  
+
   return 0;
 }
 
 /// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
 /// FP value and:
-///    1) 1/C is exact, or 
+///    1) 1/C is exact, or
 ///    2) reciprocal is allowed.
 /// If the convertion was successful, the simplified expression "X * 1/C" is
 /// returned; otherwise, NULL is returned.
@@ -826,7 +826,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
   const APFloat &FpVal = Divisor->getValueAPF();
   APFloat Reciprocal(FpVal.getSemantics());
   bool Cvt = FpVal.getExactInverse(&Reciprocal);
-    
+
   if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
     Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
     (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
@@ -870,10 +870,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
         Constant *C = ConstantExpr::getFMul(C1, C2);
         const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
         if (F.isNormal() && !F.isDenormal()) {
-          Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), 
+          Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
                                          AllowReciprocal);
           if (!Res)
-            Res = BinaryOperator::CreateFDiv(X, C); 
+            Res = BinaryOperator::CreateFDiv(X, C);
         }
       }
 
@@ -911,7 +911,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
     if (Fold) {
       const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
       if (FoldC.isNormal() && !FoldC.isDenormal()) {
-        Instruction *R = CreateDiv ? 
+        Instruction *R = CreateDiv ?
                          BinaryOperator::CreateFDiv(Fold, X) :
                          BinaryOperator::CreateFMul(X, Fold);
         R->setFastMathFlags(I.getFastMathFlags());
@@ -997,7 +997,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
 
   if (Instruction *common = commonIRemTransforms(I))
     return common;
-  
+
   // X urem C^2 -> X and C-1
   { const APInt *C;
     if (match(Op1, m_Power2(C)))
@@ -1005,7 +1005,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
                                        ConstantInt::get(I.getType(), *C-1));
   }
 
-  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
+  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
   if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
     Constant *N1 = Constant::getAllOnesValue(I.getType());
     Value *Add = Builder->CreateAdd(Op1, N1);
@@ -1041,7 +1041,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   // Handle the integer rem common cases
   if (Instruction *Common = commonIRemTransforms(I))
     return Common;
-  
+
   if (Value *RHSNeg = dyn_castNegVal(Op1))
     if (!isa<Constant>(RHSNeg) ||
         (isa<ConstantInt>(RHSNeg) &&
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index b0a998cca76e..bd14e81c3f8f 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   unsigned Opc = FirstInst->getOpcode();
   Value *LHSVal = FirstInst->getOperand(0);
   Value *RHSVal = FirstInst->getOperand(1);
-    
+
   Type *LHSType = LHSVal->getType();
   Type *RHSType = RHSVal->getType();
-  
+
   bool isNUW = false, isNSW = false, isExact = false;
   if (OverflowingBinaryOperator *BO =
         dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
@@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   } else if (PossiblyExactOperator *PEO =
                dyn_cast<PossiblyExactOperator>(FirstInst))
     isExact = PEO->isExact();
-  
+
   // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
@@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     if (CmpInst *CI = dyn_cast<CmpInst>(I))
       if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
         return 0;
-    
+
     if (isNUW)
       isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
     if (isNSW)
       isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     if (isExact)
       isExact = cast<PossiblyExactOperator>(I)->isExact();
-    
+
     // Keep track of which operand needs a phi node.
     if (I->getOperand(0) != LHSVal) LHSVal = 0;
     if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   // bad when the PHIs are in the header of a loop.
   if (!LHSVal && !RHSVal)
     return 0;
-  
+
   // Otherwise, this is safe to transform!
-  
+
   Value *InLHS = FirstInst->getOperand(0);
   Value *InRHS = FirstInst->getOperand(1);
   PHINode *NewLHS = 0, *NewRHS = 0;
@@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewLHS, PN);
     LHSVal = NewLHS;
   }
-  
+
   if (RHSVal == 0) {
     NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
                              FirstInst->getOperand(1)->getName() + ".pn");
@@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewRHS, PN);
     RHSVal = NewRHS;
   }
-  
+
   // Add all operands to the new PHIs.
   if (NewLHS || NewRHS) {
     for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
@@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
       }
     }
   }
-    
+
   if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
     CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                                      LHSVal, RHSVal);
@@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
 
 Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
-  
-  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 
+
+  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
                                         FirstInst->op_end());
   // This is true if all GEP bases are allocas and if all indices into them are
   // constants.
@@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // more than one phi, which leads to higher register pressure. This is
   // especially bad when the PHIs are in the header of a loop.
   bool NeededPhi = false;
-  
+
   bool AllInBounds = true;
-  
+
   // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       return 0;
 
     AllInBounds &= GEP->isInBounds();
-    
+
     // Keep track of whether or not all GEPs are of alloca pointers.
     if (AllBasePointersAreAllocas &&
         (!isa<AllocaInst>(GEP->getOperand(0)) ||
          !GEP->hasAllConstantIndices()))
       AllBasePointersAreAllocas = false;
-    
+
     // Compare the operand lists.
     for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
       if (FirstInst->getOperand(op) == GEP->getOperand(op))
         continue;
-      
+
       // Don't merge two GEPs when two operands differ (introducing phi nodes)
       // if one of the PHIs has a constant for the index.  The index may be
       // substantially cheaper to compute for the constants, so making it a
@@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
           isa<ConstantInt>(GEP->getOperand(op)))
         return 0;
-      
+
       if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
         return 0;
 
@@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       NeededPhi = true;
     }
   }
-  
+
   // If all of the base pointers of the PHI'd GEPs are from allocas, don't
   // bother doing this transformation.  At best, this will just save a bit of
   // offset calculation, but all the predecessors will have to materialize the
@@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // which can usually all be folded into the load.
   if (AllBasePointersAreAllocas)
     return 0;
-  
+
   // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
   // that is variable.
   SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
-  
+
   bool HasAnyPHIs = false;
   for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
     if (FixedOperands[i]) continue;  // operand doesn't need a phi.
@@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
     PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
                                      FirstOp->getName()+".pn");
     InsertNewInstBefore(NewPN, PN);
-    
+
     NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
     OperandPhis[i] = NewPN;
     FixedOperands[i] = NewPN;
     HasAnyPHIs = true;
   }
 
-  
+
   // Add all operands to the new PHIs.
   if (HasAnyPHIs) {
     for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
       GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
       BasicBlock *InBB = PN.getIncomingBlock(i);
-      
+
       for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
         if (PHINode *OpPhi = OperandPhis[op])
           OpPhi->addIncoming(InGEP->getOperand(op), InBB);
     }
   }
-  
+
   Value *Base = FixedOperands[0];
-  GetElementPtrInst *NewGEP = 
+  GetElementPtrInst *NewGEP =
     GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
   if (AllInBounds) NewGEP->setIsInBounds();
   NewGEP->setDebugLoc(FirstInst->getDebugLoc());
@@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
 /// to a register.
 static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
   BasicBlock::iterator BBI = L, E = L->getParent()->end();
-  
+
   for (++BBI; BBI != E; ++BBI)
     if (BBI->mayWriteToMemory())
       return false;
-  
+
   // Check for non-address taken alloca.  If not address-taken already, it isn't
   // profitable to do this xform.
   if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
@@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
       isAddressTaken = true;
       break;
     }
-    
+
     if (!isAddressTaken && AI->isStaticAlloca())
       return false;
   }
-  
+
   // If this load is a load from a GEP with a constant offset from an alloca,
   // then we don't want to sink it.  In its present form, it will be
   // load [constant stack offset].  Sinking it will cause us to have to
@@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
     if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
       if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
         return false;
-  
+
   return true;
 }
 
@@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
   bool isVolatile = FirstLI->isVolatile();
   unsigned LoadAlignment = FirstLI->getAlignment();
   unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
-  
+
   // We can't sink the load if the loaded value could be modified between the
   // load and the PHI.
   if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
       !isSafeAndProfitableToSinkLoad(FirstLI))
     return 0;
-  
+
   // If the PHI is of volatile loads and the load block has multiple
   // successors, sinking it would remove a load of the volatile value from
   // the path through the other successor.
-  if (isVolatile && 
+  if (isVolatile &&
       FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
     return 0;
-  
+
   // Check to see if all arguments are the same operation.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
     if (!LI || !LI->hasOneUse())
       return 0;
-    
-    // We can't sink the load if the loaded value could be modified between 
+
+    // We can't sink the load if the loaded value could be modified between
     // the load and the PHI.
     if (LI->isVolatile() != isVolatile ||
         LI->getParent() != PN.getIncomingBlock(i) ||
         LI->getPointerAddressSpace() != LoadAddrSpace ||
         !isSafeAndProfitableToSinkLoad(LI))
       return 0;
-      
+
     // If some of the loads have an alignment specified but not all of them,
     // we can't do the transformation.
     if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
       return 0;
-    
+
     LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
-    
+
     // If the PHI is of volatile loads and the load block has multiple
     // successors, sinking it would remove a load of the volatile value from
     // the path through the other successor.
@@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
         LI->getParent()->getTerminator()->getNumSuccessors() != 1)
       return 0;
   }
-  
+
   // Okay, they are all the same operation.  Create a new PHI node of the
   // correct type, and PHI together all of the LHS's of the instructions.
   PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
                                    PN.getNumIncomingValues(),
                                    PN.getName()+".in");
-  
+
   Value *InVal = FirstLI->getOperand(0);
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-  
+
   // Add all operands to the new PHI.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
@@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
       InVal = 0;
     NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
   }
-  
+
   Value *PhiVal;
   if (InVal) {
     // The new PHI unions all of the same values together.  This is really
@@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewPN, PN);
     PhiVal = NewPN;
   }
-  
+
   // If this was a volatile load that we are merging, make sure to loop through
   // and mark all the input loads as non-volatile.  If we don't do this, we will
   // insert a new volatile load and the old ones will not be deletable.
   if (isVolatile)
     for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
       cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
-  
+
   LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
   NewLI->setDebugLoc(FirstLI->getDebugLoc());
   return NewLI;
@@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     return FoldPHIArgGEPIntoPHI(PN);
   if (isa<LoadInst>(FirstInst))
     return FoldPHIArgLoadIntoPHI(PN);
-  
+
   // Scan the instruction, looking for input operations that can be folded away.
   // If all input operands to the phi are the same instruction (e.g. a cast from
   // the same type or "+42") we can pull the operation through the PHI, reducing
@@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   Constant *ConstantOp = 0;
   Type *CastSrcTy = 0;
   bool isNUW = false, isNSW = false, isExact = false;
-  
+
   if (isa<CastInst>(FirstInst)) {
     CastSrcTy = FirstInst->getOperand(0)->getType();
 
@@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
         return 0;
     }
   } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
-    // Can fold binop, compare or shift here if the RHS is a constant, 
+    // Can fold binop, compare or shift here if the RHS is a constant,
     // otherwise call FoldPHIArgBinOpIntoPHI.
     ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
     if (ConstantOp == 0)
       return FoldPHIArgBinOpIntoPHI(PN);
-    
+
     if (OverflowingBinaryOperator *BO =
         dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
       isNUW = BO->hasNoUnsignedWrap();
@@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     } else if (I->getOperand(1) != ConstantOp) {
       return 0;
     }
-    
+
     if (isNUW)
       isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
     if (isNSW)
@@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     NewCI->setDebugLoc(FirstInst->getDebugLoc());
     return NewCI;
   }
-  
+
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
     BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
     if (isNUW) BinOp->setHasNoUnsignedWrap();
@@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     BinOp->setDebugLoc(FirstInst->getDebugLoc());
     return BinOp;
   }
-  
+
   CmpInst *CIOp = cast<CmpInst>(FirstInst);
   CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                                    PhiVal, ConstantOp);
@@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN,
   // Remember this node, and if we find the cycle, return.
   if (!PotentiallyDeadPHIs.insert(PN))
     return true;
-  
+
   // Don't scan crazily complex things.
   if (PotentiallyDeadPHIs.size() == 16)
     return false;
@@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN,
 /// PHIsEqualValue - Return true if this phi node is always equal to
 /// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
 ///   z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
                            SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
   // See if we already saw this PHI node.
   if (!ValueEqualPHIs.insert(PN))
     return true;
-  
+
   // Don't scan crazily complex things.
   if (ValueEqualPHIs.size() == 16)
     return false;
- 
+
   // Scan the operands to see if they are either phi nodes or are equal to
   // the value.
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
     } else if (Op != NonPhiInVal)
       return false;
   }
-  
+
   return true;
 }
 
@@ -557,10 +557,10 @@ struct PHIUsageRecord {
   unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
   unsigned Shift;     // The amount shifted.
   Instruction *Inst;  // The trunc instruction.
-  
+
   PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
     : PHIId(pn), Shift(Sh), Inst(User) {}
-  
+
   bool operator<(const PHIUsageRecord &RHS) const {
     if (PHIId < RHS.PHIId) return true;
     if (PHIId > RHS.PHIId) return false;
@@ -570,15 +570,15 @@ struct PHIUsageRecord {
            RHS.Inst->getType()->getPrimitiveSizeInBits();
   }
 };
-  
+
 struct LoweredPHIRecord {
   PHINode *PN;        // The PHI that was lowered.
   unsigned Shift;     // The amount shifted.
   unsigned Width;     // The width extracted.
-  
+
   LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
     : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
-  
+
   // Ctor form used by DenseMap.
   LoweredPHIRecord(PHINode *pn, unsigned Sh)
     : PN(pn), Shift(Sh), Width(0) {}
@@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   // PHIUsers - Keep track of all of the truncated values extracted from a set
   // of PHIs, along with their offset.  These are the things we want to rewrite.
   SmallVector<PHIUsageRecord, 16> PHIUsers;
-  
+
   // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
   // nodes which are extracted from. PHIsToSlice is a set we use to avoid
   // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
   // check the uses of (to ensure they are all extracts).
   SmallVector<PHINode*, 8> PHIsToSlice;
   SmallPtrSet<PHINode*, 8> PHIsInspected;
-  
+
   PHIsToSlice.push_back(&FirstPhi);
   PHIsInspected.insert(&FirstPhi);
-  
+
   for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
     PHINode *PN = PHIsToSlice[PHIId];
-    
+
     // Scan the input list of the PHI.  If any input is an invoke, and if the
     // input is defined in the predecessor, then we won't be split the critical
     // edge which is required to insert a truncate.  Because of this, we have to
@@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
       if (II == 0) continue;
       if (II->getParent() != PN->getIncomingBlock(i))
         continue;
-     
+
       // If we have a phi, and if it's directly in the predecessor, then we have
       // a critical edge where we need to put the truncate.  Since we can't
       // split the edge in instcombine, we have to bail out.
       return 0;
     }
-      
-    
+
+
     for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
          UI != E; ++UI) {
       Instruction *User = cast<Instruction>(*UI);
-      
+
       // If the user is a PHI, inspect its uses recursively.
       if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
         if (PHIsInspected.insert(UserPN))
           PHIsToSlice.push_back(UserPN);
         continue;
       }
-      
+
       // Truncates are always ok.
       if (isa<TruncInst>(User)) {
         PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
         continue;
       }
-      
+
       // Otherwise it must be a lshr which can only be used by one trunc.
       if (User->getOpcode() != Instruction::LShr ||
           !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
           !isa<ConstantInt>(User->getOperand(1)))
         return 0;
-      
+
       unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
       PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
     }
   }
-  
+
   // If we have no users, they must be all self uses, just nuke the PHI.
   if (PHIUsers.empty())
     return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
-  
+
   // If this phi node is transformable, create new PHIs for all the pieces
   // extracted out of it.  First, sort the users by their offset and size.
   array_pod_sort(PHIUsers.begin(), PHIUsers.end());
-  
+
   DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
             for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
               errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
         );
-  
+
   // PredValues - This is a temporary used when rewriting PHI nodes.  It is
   // hoisted out here to avoid construction/destruction thrashing.
   DenseMap<BasicBlock*, Value*> PredValues;
-  
+
   // ExtractedVals - Each new PHI we introduce is saved here so we don't
   // introduce redundant PHIs.
   DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
-  
+
   for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
     unsigned PHIId = PHIUsers[UserI].PHIId;
     PHINode *PN = PHIsToSlice[PHIId];
     unsigned Offset = PHIUsers[UserI].Shift;
     Type *Ty = PHIUsers[UserI].Inst->getType();
-    
+
     PHINode *EltPHI;
-    
+
     // If we've already lowered a user like this, reuse the previously lowered
     // value.
     if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
-      
+
       // Otherwise, Create the new PHI node for this user.
       EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
                                PN->getName()+".off"+Twine(Offset), PN);
       assert(EltPHI->getType() != PN->getType() &&
              "Truncate didn't shrink phi?");
-    
+
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         BasicBlock *Pred = PN->getIncomingBlock(i);
         Value *&PredVal = PredValues[Pred];
-        
+
         // If we already have a value for this predecessor, reuse it.
         if (PredVal) {
           EltPHI->addIncoming(PredVal, Pred);
@@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
           EltPHI->addIncoming(PredVal, Pred);
           continue;
         }
-        
+
         if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
           // If the incoming value was a PHI, and if it was one of the PHIs we
           // already rewrote it, just use the lowered value.
@@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
             continue;
           }
         }
-        
+
         // Otherwise, do an extract in the predecessor.
         Builder->SetInsertPoint(Pred, Pred->getTerminator());
         Value *Res = InVal;
@@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         Res = Builder->CreateTrunc(Res, Ty, "extract.t");
         PredVal = Res;
         EltPHI->addIncoming(Res, Pred);
-        
+
         // If the incoming value was a PHI, and if it was one of the PHIs we are
         // rewriting, we will ultimately delete the code we inserted.  This
         // means we need to revisit that PHI to make sure we extract out the
@@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
           if (PHIsInspected.count(OldInVal)) {
             unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
                                           OldInVal)-PHIsToSlice.begin();
-            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 
+            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
                                               cast<Instruction>(Res)));
             ++UserE;
           }
       }
       PredValues.clear();
-      
+
       DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
                    << *EltPHI << '\n');
       ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
     }
-    
+
     // Replace the use of this piece with the PHI node.
     ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
   }
-  
+
   // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
   // with undefs.
   Value *Undef = UndefValue::get(FirstPhi.getType());
@@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       if (DeadPHICycle(PU, PotentiallyDeadPHIs))
         return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
     }
-   
+
     // If this phi has a single use, and if that use just computes a value for
     // the next iteration of a loop, delete the phi.  This occurs with unused
     // induction variables, e.g. "for (int j = 0; ; ++j);".  Detecting this
@@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
 
     if (InValNo != NumIncomingVals) {
       Value *NonPhiInVal = PN.getIncomingValue(InValNo);
-      
+
       // Scan the rest of the operands to see if there are any conflicts, if so
       // there is no need to recursively scan other phis.
       for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
@@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
         if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
           break;
       }
-      
+
       // If we scanned over all operands, then we have one unique value plus
       // phi values.  Scan PHI nodes to see if they all merge in each other or
       // the value.
@@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
     if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
       return Res;
-  
+
   return 0;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a262d711d3b4..fa946e63ac68 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     // If this is a non-volatile load or a cast from the same type,
     // merge.
     if (TI->isCast()) {
-      if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+      Type *FIOpndTy = FI->getOperand(0)->getType();
+      if (TI->getOperand(0)->getType() != FIOpndTy)
         return 0;
       // The select condition may be a vector. We may only change the operand
       // type if the vector width remains the same (and matches the condition).
       Type *CondTy = SI.getCondition()->getType();
-      if (CondTy->isVectorTy() && CondTy->getVectorNumElements() !=
-          FI->getOperand(0)->getType()->getVectorNumElements())
+      if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
+          CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
         return 0;
     } else {
       return 0;  // unknown unary op.
@@ -675,7 +676,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // Change: A = select B, false, C --> A = and !B, C
       Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
       return BinaryOperator::CreateAnd(NotCond, FalseVal);
-    } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+    }
+    if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
       if (C->getZExtValue() == false) {
         // Change: A = select B, C, false --> A = and B, C
         return BinaryOperator::CreateAnd(CondVal, TrueVal);
@@ -689,14 +691,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     // select a, a, b  -> a|b
     if (CondVal == TrueVal)
       return BinaryOperator::CreateOr(CondVal, FalseVal);
-    else if (CondVal == FalseVal)
+    if (CondVal == FalseVal)
       return BinaryOperator::CreateAnd(CondVal, TrueVal);
 
     // select a, ~a, b -> (~a)&b
     // select a, b, ~a -> (~a)|b
     if (match(TrueVal, m_Not(m_Specific(CondVal))))
       return BinaryOperator::CreateAnd(TrueVal, FalseVal);
-    else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+    if (match(FalseVal, m_Not(m_Specific(CondVal))))
       return BinaryOperator::CreateOr(TrueVal, FalseVal);
   }
 
@@ -837,7 +839,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             Value *NewFalseOp = NegVal;
             if (AddOp != TI)
               std::swap(NewTrueOp, NewFalseOp);
-            Value *NewSel = 
+            Value *NewSel =
               Builder->CreateSelect(CondVal, NewTrueOp,
                                     NewFalseOp, SI.getName() + ".p");
 
@@ -861,7 +863,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     Value *LHS, *RHS, *LHS2, *RHS2;
     if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
       if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
-        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
                                           SI, SPF, RHS))
           return R;
       if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4f71db1a4b09..79e16f13ad04 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
   return 0;
 }
 
+// If we have a PHI node with a vector type that has only 2 uses: feed
+// itself and be an operand of extractelemnt at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type
+Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+  // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
+  if (!PN->hasNUses(2))
+    return NULL;
+
+  // If so, it's known at this point that one operand is PHI and the other is
+  // an extractelement node. Find the PHI user that is not the extractelement
+  // node.
+  Value::use_iterator iu = PN->use_begin();
+  Instruction *PHIUser = dyn_cast<Instruction>(*iu);
+  if (PHIUser == cast<Instruction>(&EI))
+    PHIUser = cast<Instruction>(*(++iu));
+
+  // Verify that this PHI user has one use, which is the PHI itself,
+  // and that it is a binary operation which is cheap to scalarize.
+  // otherwise return NULL.
+  if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
+    !(isa<BinaryOperator>(PHIUser)) ||
+    !CheapToScalarize(PHIUser, true))
+    return NULL;
+
+  // Create a scalar PHI node that will replace the vector PHI node
+  // just before the current PHI node.
+  PHINode * scalarPHI = cast<PHINode>(
+    InsertNewInstWith(PHINode::Create(EI.getType(),
+    PN->getNumIncomingValues(), ""), *PN));
+  // Scalarize each PHI operand.
+  for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+    Value *PHIInVal = PN->getIncomingValue(i);
+    BasicBlock *inBB = PN->getIncomingBlock(i);
+    Value *Elt = EI.getIndexOperand();
+    // If the operand is the PHI induction variable:
+    if (PHIInVal == PHIUser) {
+      // Scalarize the binary operation. Its first operand is the
+      // scalar PHI and the second operand is extracted from the other
+      // vector operand.
+      BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
+      unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
+      Value *Op = Builder->CreateExtractElement(
+        B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+      Value *newPHIUser = InsertNewInstWith(
+        BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
+        *B0);
+      scalarPHI->addIncoming(newPHIUser, inBB);
+    } else {
+      // Scalarize PHI input:
+      Instruction *newEI =
+        ExtractElementInst::Create(PHIInVal, Elt, "");
+      // Insert the new instruction into the predecessor basic block.
+      Instruction *pos = dyn_cast<Instruction>(PHIInVal);
+      BasicBlock::iterator InsertPos;
+      if (pos && !isa<PHINode>(pos)) {
+        InsertPos = pos;
+        ++InsertPos;
+      } else {
+        InsertPos = inBB->getFirstInsertionPt();
+      }
+
+      InsertNewInstWith(newEI, *InsertPos);
+
+      scalarPHI->addIncoming(newEI, inBB);
+    }
+  }
+  return ReplaceInstUsesWith(EI, scalarPHI);
+}
+
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If vector val is constant with all elements the same, replace EI with
   // that element.  We handle a known element # below.
@@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
           if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
             return new BitCastInst(Elt, EI.getType());
     }
+
+    // If there's a vector PHI feeding a scalar use through this extractelement
+    // instruction, try to scalarize the PHI.
+    if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
+    	Instruction *scalarPHI = scalarizePHI(EI, PN);
+    	if (scalarPHI)
+    		return (scalarPHI);
+    }
   }
 
   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
       // Canonicalize extractelement(cast) -> cast(extractelement)
       // bitcasts can change the number of vector elements and they cost nothing
-      if (CI->hasOneUse() && EI.hasOneUse() &&
-          (CI->getOpcode() != Instruction::BitCast)) {
-        Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
-                                                  EI.getIndexOperand());
+      if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
+        Value *EE = InsertNewInstWith(
+       	  ExtractElementInst::Create(CI->getOperand(0), EI.getIndexOperand()),
+          *CI);
         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
       }
     }
@@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
 
         if (VecOp == RHS) {
           Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+          // Update Mask to reflect that `ScalarOp' has been inserted at
+          // position `InsertedIdx' within the vector returned by IEI.
+          Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
+
           // Everything but the extracted element is replaced with the RHS.
           for (unsigned i = 0; i != NumElts; ++i) {
             if (i != InsertedIdx)
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 92b42ee64b00..623c4705061e 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
 static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
 static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init_v2";
+static const char *kAsanInitName = "__asan_init_v3";
 static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
 static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
 static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -274,8 +274,6 @@ struct AddressSanitizer : public FunctionPass {
                                    Instruction *InsertBefore, bool IsWrite);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
   bool runOnFunction(Function &F);
-  void createInitializerPoisonCalls(Module &M,
-                                    Value *FirstAddr, Value *LastAddr);
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
   void emitShadowMapping(Module &M, IRBuilder<> &IRB) const;
   virtual bool doInitialization(Module &M);
@@ -333,8 +331,7 @@ class AddressSanitizerModule : public ModulePass {
   void initializeCallbacks(Module &M);
 
   bool ShouldInstrumentGlobal(GlobalVariable *G);
-  void createInitializerPoisonCalls(Module &M, Value *FirstAddr,
-                                    Value *LastAddr);
+  void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
   size_t RedzoneSize() const {
     return RedzoneSizeForScale(Mapping.Scale);
   }
@@ -753,7 +750,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
 }
 
 void AddressSanitizerModule::createInitializerPoisonCalls(
-    Module &M, Value *FirstAddr, Value *LastAddr) {
+    Module &M, GlobalValue *ModuleName) {
   // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
   Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
   // If that function is not present, this TU contains no globals, or they have
@@ -765,7 +762,8 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
   IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
 
   // Add a call to poison all external globals before the given function starts.
-  IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+  Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
+  IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr);
 
   // Add calls to unpoison all globals before each return instruction.
   for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
@@ -839,7 +837,7 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   // Declare our poisoning and unpoisoning functions.
   AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+      kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL));
   AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
   AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
       kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
@@ -901,12 +899,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
   assert(CtorFunc);
   IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
 
-  // The addresses of the first and last dynamically initialized globals in
-  // this TU.  Used in initialization order checking.
-  Value *FirstDynamic = 0, *LastDynamic = 0;
+  bool HasDynamicallyInitializedGlobals = false;
 
   GlobalVariable *ModuleName = createPrivateGlobalForString(
       M, M.getModuleIdentifier());
+  // We shouldn't merge same module names, as this string serves as unique
+  // module ID in runtime.
+  ModuleName->setUnnamedAddr(false);
 
   for (size_t i = 0; i < n; i++) {
     static const uint64_t kMaxGlobalRedzone = 1 << 18;
@@ -966,11 +965,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
         NULL);
 
     // Populate the first and last globals declared in this TU.
-    if (CheckInitOrder && GlobalHasDynamicInitializer) {
-      LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
-      if (FirstDynamic == 0)
-        FirstDynamic = LastDynamic;
-    }
+    if (CheckInitOrder && GlobalHasDynamicInitializer)
+      HasDynamicallyInitializedGlobals = true;
 
     DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
   }
@@ -981,8 +977,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
       ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
 
   // Create calls for poisoning before initializers run and unpoisoning after.
-  if (CheckInitOrder && FirstDynamic && LastDynamic)
-    createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+  if (CheckInitOrder && HasDynamicallyInitializedGlobals)
+    createInitializerPoisonCalls(M, ModuleName);
   IRB.CreateCall2(AsanRegisterGlobals,
                   IRB.CreatePointerCast(AllGlobals, IntptrTy),
                   ConstantInt::get(IntptrTy, n));
@@ -1317,10 +1313,10 @@ void FunctionStackPoisoner::poisonStack() {
         ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
   }
 
-  // This string will be parsed by the run-time (DescribeStackAddress).
+  // This string will be parsed by the run-time (DescribeAddressIfStack).
   SmallString<2048> StackDescriptionStorage;
   raw_svector_ostream StackDescription(StackDescriptionStorage);
-  StackDescription << F.getName() << " " << AllocaVec.size() << " ";
+  StackDescription << AllocaVec.size() << " ";
 
   // Insert poison calls for lifetime intrinsics for alloca.
   bool HavePoisonedAllocas = false;
@@ -1353,19 +1349,26 @@ void FunctionStackPoisoner::poisonStack() {
   }
   assert(Pos == LocalStackSize);
 
-  // Write the Magic value and the frame description constant to the redzone.
+  // The left-most redzone has enough space for at least 4 pointers.
+  // Write the Magic value to redzone[0].
   Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
   IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
                   BasePlus0);
-  Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
-                                   ConstantInt::get(IntptrTy,
-                                                    ASan.LongSize/8));
-  BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
+  // Write the frame description constant to redzone[1].
+  Value *BasePlus1 = IRB.CreateIntToPtr(
+    IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
+    IntptrPtrTy);
   GlobalVariable *StackDescriptionGlobal =
       createPrivateGlobalForString(*F.getParent(), StackDescription.str());
   Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
                                              IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
+  // Write the PC to redzone[2].
+  Value *BasePlus2 = IRB.CreateIntToPtr(
+    IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy,
+                                                   2 * ASan.LongSize/8)),
+    IntptrPtrTy);
+  IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
 
   // Poison the stack redzones at the entry.
   Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
index 927982d2af47..39de4b0401cb 100644
--- a/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -110,7 +110,8 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
 bool BlackList::isInInit(const GlobalVariable &G) const {
   return (isIn(*G.getParent()) ||
           inSection("global-init", G.getName()) ||
-          inSection("global-init-type", GetGVTypeString(G)));
+          inSection("global-init-type", GetGVTypeString(G)) ||
+          inSection("global-init-src", G.getParent()->getModuleIdentifier()));
 }
 
 bool BlackList::inSection(const StringRef Section,
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 921d48c3646d..2edd151869e0 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/raw_ostream.h"
@@ -101,6 +102,8 @@ namespace {
     Constant *getIncrementIndirectCounterFunc();
     Constant *getEmitFunctionFunc();
     Constant *getEmitArcsFunc();
+    Constant *getDeleteWriteoutFunctionListFunc();
+    Constant *getDeleteFlushFunctionListFunc();
     Constant *getEndFileFunc();
 
     // Create or retrieve an i32 state value that is used to represent the
@@ -116,9 +119,10 @@ namespace {
 
     // Add the function to write out all our counters to the global destructor
     // list.
-    void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+    Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*,
+                                                       MDNode*> >);
+    Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
     void insertIndirectCounterIncrement();
-    void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
 
     std::string mangleName(DICompileUnit CU, const char *NewStem);
 
@@ -140,6 +144,12 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
   return new GCOVProfiler(Options);
 }
 
+static std::string getFunctionName(DISubprogram SP) {
+  if (!SP.getLinkageName().empty())
+    return SP.getLinkageName();
+  return SP.getName();
+}
+
 namespace {
   class GCOVRecord {
    protected:
@@ -288,7 +298,7 @@ namespace {
       ReturnBlock = new GCOVBlock(i++, os);
 
       writeBytes(FunctionTag, 4);
-      uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+      uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
           1 + lengthOfGCOVString(SP.getFilename()) + 1;
       if (UseCfgChecksum)
         ++BlockLen;
@@ -297,7 +307,7 @@ namespace {
       write(0);  // lineno checksum
       if (UseCfgChecksum)
         write(0);  // cfg checksum
-      writeGCOVString(SP.getName());
+      writeGCOVString(getFunctionName(SP));
       writeGCOVString(SP.getFilename());
       write(SP.getLineNumber());
     }
@@ -372,7 +382,11 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
 
   SmallString<128> Filename = CU.getFilename();
   sys::path::replace_extension(Filename, NewStem);
-  return sys::path::filename(Filename.str());
+  StringRef FName = sys::path::filename(Filename);
+  SmallString<128> CurPath;
+  if (sys::fs::current_path(CurPath)) return FName;
+  sys::path::append(CurPath, FName.str());
+  return CurPath.str();
 }
 
 bool GCOVProfiler::runOnModule(Module &M) {
@@ -538,8 +552,38 @@ bool GCOVProfiler::emitProfileArcs() {
       }
     }
 
-    insertCounterWriteout(CountersBySP);
-    insertFlush(CountersBySP);
+    Function *WriteoutF = insertCounterWriteout(CountersBySP);
+    Function *FlushF = insertFlush(CountersBySP);
+
+    // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+    // be executed at exit and the "__llvm_gcov_flush" function to be executed
+    // when "__gcov_flush" is called.
+    FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+    Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+                                   "__llvm_gcov_init", M);
+    F->setUnnamedAddr(true);
+    F->setLinkage(GlobalValue::InternalLinkage);
+    F->addFnAttr(Attribute::NoInline);
+    if (Options.NoRedZone)
+      F->addFnAttr(Attribute::NoRedZone);
+
+    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+    IRBuilder<> Builder(BB);
+
+    FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+    Type *Params[] = {
+      PointerType::get(FTy, 0),
+      PointerType::get(FTy, 0)
+    };
+    FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
+
+    // Inialize the environment and register the local writeout and flush
+    // functions.
+    Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+    Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
+    Builder.CreateRetVoid();
+
+    appendToGlobalCtors(*M, F, 0);
   }
 
   if (InsertIndCounterIncrCode)
@@ -635,6 +679,16 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
 }
 
+Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
+}
+
+Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy);
+}
+
 Constant *GCOVProfiler::getEndFileFunc() {
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
@@ -653,7 +707,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
   return GV;
 }
 
-void GCOVProfiler::insertCounterWriteout(
+Function *GCOVProfiler::insertCounterWriteout(
     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
@@ -683,12 +737,12 @@ void GCOVProfiler::insertCounterWriteout(
                           Builder.CreateGlobalStringPtr(ReversedVersion));
       for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
         DISubprogram SP(CountersBySP[j].second);
-        Builder.CreateCall3(EmitFunction,
-                            Builder.getInt32(j),
-                            Options.FunctionNamesInData ?
-                              Builder.CreateGlobalStringPtr(SP.getName()) :
-                              Constant::getNullValue(Builder.getInt8PtrTy()),
-                            Builder.getInt8(Options.UseCfgChecksum));
+        Builder.CreateCall3(
+            EmitFunction, Builder.getInt32(j),
+            Options.FunctionNamesInData ?
+              Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
+              Constant::getNullValue(Builder.getInt8PtrTy()),
+            Builder.getInt8(Options.UseCfgChecksum));
 
         GlobalVariable *GV = CountersBySP[j].first;
         unsigned Arcs =
@@ -700,29 +754,9 @@ void GCOVProfiler::insertCounterWriteout(
       Builder.CreateCall(EndFile);
     }
   }
-  Builder.CreateRetVoid();
-
-  // Create a small bit of code that registers the "__llvm_gcov_writeout"
-  // function to be executed at exit.
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
-  Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
-                                 "__llvm_gcov_init", M);
-  F->setUnnamedAddr(true);
-  F->setLinkage(GlobalValue::InternalLinkage);
-  F->addFnAttr(Attribute::NoInline);
-  if (Options.NoRedZone)
-    F->addFnAttr(Attribute::NoRedZone);
-
-  BB = BasicBlock::Create(*Ctx, "entry", F);
-  Builder.SetInsertPoint(BB);
 
-  FTy = FunctionType::get(Builder.getInt32Ty(),
-                          PointerType::get(FTy, 0), false);
-  Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
-  Builder.CreateCall(AtExitFn, WriteoutF);
   Builder.CreateRetVoid();
-
-  appendToGlobalCtors(*M, F, 0);
+  return WriteoutF;
 }
 
 void GCOVProfiler::insertIndirectCounterIncrement() {
@@ -776,13 +810,13 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
   Builder.CreateRetVoid();
 }
 
-void GCOVProfiler::
+Function *GCOVProfiler::
 insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
-  Function *FlushF = M->getFunction("__gcov_flush");
+  Function *FlushF = M->getFunction("__llvm_gcov_flush");
   if (!FlushF)
     FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
-                              "__gcov_flush", M);
+                              "__llvm_gcov_flush", M);
   else
     FlushF->setLinkage(GlobalValue::InternalLinkage);
   FlushF->setUnnamedAddr(true);
@@ -812,8 +846,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
   if (RetTy == Type::getVoidTy(*Ctx))
     Builder.CreateRetVoid();
   else if (RetTy->isIntegerTy())
-    // Used if __gcov_flush was implicitly declared.
+    // Used if __llvm_gcov_flush was implicitly declared.
     Builder.CreateRet(ConstantInt::get(RetTy, 0));
   else
-    report_fatal_error("invalid return type for __gcov_flush");
+    report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+  return FlushF;
 }
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fce6513a9774..4e75904ded4f 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -122,6 +122,9 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
        cl::desc("poison uninitialized stack variables with the given patter"),
        cl::Hidden, cl::init(0xff));
+static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
+       cl::desc("poison undef temps"),
+       cl::Hidden, cl::init(true));
 
 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
@@ -690,7 +693,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   ///
   /// Clean shadow (all zeroes) means all bits of the value are defined
   /// (initialized).
-  Value *getCleanShadow(Value *V) {
+  Constant *getCleanShadow(Value *V) {
     Type *ShadowTy = getShadowTy(V);
     if (!ShadowTy)
       return 0;
@@ -709,6 +712,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     return ConstantStruct::get(ST, Vals);
   }
 
+  /// \brief Create a dirty shadow for a given value.
+  Constant *getPoisonedShadow(Value *V) {
+    Type *ShadowTy = getShadowTy(V);
+    if (!ShadowTy)
+      return 0;
+    return getPoisonedShadow(ShadowTy);
+  }
+
   /// \brief Create a clean (zero) origin.
   Value *getCleanOrigin() {
     return Constant::getNullValue(MS.OriginTy);
@@ -730,7 +741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       return Shadow;
     }
     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
-      Value *AllOnes = getPoisonedShadow(getShadowTy(V));
+      Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
       DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
       (void)U;
       return AllOnes;
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index f93c5ab4c806..299060a42fe8 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
@@ -56,6 +57,9 @@ static cl::opt<bool>  ClInstrumentFuncEntryExit(
 static cl::opt<bool>  ClInstrumentAtomics(
     "tsan-instrument-atomics", cl::init(true),
     cl::desc("Instrument atomics"), cl::Hidden);
+static cl::opt<bool>  ClInstrumentMemIntrinsics(
+    "tsan-instrument-memintrinsics", cl::init(true),
+    cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
 
 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
@@ -63,6 +67,7 @@ STATISTIC(NumOmittedReadsBeforeWrite,
           "Number of reads ignored due to following writes");
 STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
 STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
+STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
 STATISTIC(NumOmittedReadsFromConstantGlobals,
           "Number of reads from constant globals");
 STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
@@ -85,12 +90,14 @@ struct ThreadSanitizer : public FunctionPass {
   void initializeCallbacks(Module &M);
   bool instrumentLoadOrStore(Instruction *I);
   bool instrumentAtomic(Instruction *I);
+  bool instrumentMemIntrinsic(Instruction *I);
   void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
                                       SmallVectorImpl<Instruction*> &All);
   bool addrPointsToConstantData(Value *Addr);
   int getMemoryAccessFuncIndex(Value *Addr);
 
   DataLayout *TD;
+  Type *IntptrTy;
   SmallString<64> BlacklistFile;
   OwningPtr<BlackList> BL;
   IntegerType *OrdTy;
@@ -108,6 +115,8 @@ struct ThreadSanitizer : public FunctionPass {
   Function *TsanAtomicThreadFence;
   Function *TsanAtomicSignalFence;
   Function *TsanVptrUpdate;
+  Function *TsanVptrLoad;
+  Function *MemmoveFn, *MemcpyFn, *MemsetFn;
 };
 }  // namespace
 
@@ -196,10 +205,22 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
   TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), NULL));
+  TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+      "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
   TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL));
   TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL));
+
+  MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
+    "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IRB.getInt8PtrTy(), IntptrTy, NULL));
+  MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
+    "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IntptrTy, NULL));
+  MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
+    "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+    IntptrTy, NULL));
 }
 
 bool ThreadSanitizer::doInitialization(Module &M) {
@@ -210,6 +231,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
 
   // Always insert a call to __tsan_init into the module's CTORs.
   IRBuilder<> IRB(M.getContext());
+  IntptrTy = IRB.getIntPtrTy(TD);
   Value *TsanInit = M.getOrInsertFunction("__tsan_init",
                                           IRB.getVoidTy(), NULL);
   appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
@@ -309,6 +331,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
   SmallVector<Instruction*, 8> AllLoadsAndStores;
   SmallVector<Instruction*, 8> LocalLoadsAndStores;
   SmallVector<Instruction*, 8> AtomicAccesses;
+  SmallVector<Instruction*, 8> MemIntrinCalls;
   bool Res = false;
   bool HasCalls = false;
 
@@ -325,6 +348,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
       else if (isa<ReturnInst>(BI))
         RetVec.push_back(BI);
       else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+        if (isa<MemIntrinsic>(BI))
+          MemIntrinCalls.push_back(BI);
         HasCalls = true;
         chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
       }
@@ -348,6 +373,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
       Res |= instrumentAtomic(AtomicAccesses[i]);
     }
 
+  if (ClInstrumentMemIntrinsics)
+    for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) {
+      Res |= instrumentMemIntrinsic(MemIntrinCalls[i]);
+    }
+
   // Instrument function entry/exit points if there were instrumented accesses.
   if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
@@ -386,6 +416,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
     NumInstrumentedVtableWrites++;
     return true;
   }
+  if (!IsWrite && isVtableAccess(I)) {
+    IRB.CreateCall(TsanVptrLoad,
+                   IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+    NumInstrumentedVtableReads++;
+    return true;
+  }
   Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
   IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
   if (IsWrite) NumInstrumentedWrites++;
@@ -423,6 +459,32 @@ static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
   return IRB->getInt32(v);
 }
 
+// If a memset intrinsic gets inlined by the code gen, we will miss races on it.
+// So, we either need to ensure the intrinsic is not inlined, or instrument it.
+// We do not instrument memset/memmove/memcpy intrinsics (too complicated),
+// instead we simply replace them with regular function calls, which are then
+// intercepted by the run-time.
+// Since tsan is running after everyone else, the calls should not be
+// replaced back with intrinsics. If that becomes wrong at some point,
+// we will need to call e.g. __tsan_memset to avoid the intrinsics.
+bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
+  IRBuilder<> IRB(I);
+  if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
+    IRB.CreateCall3(MemsetFn,
+      IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+      IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+    I->eraseFromParent();
+  } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
+    IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+      IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+    I->eraseFromParent();
+  }
+  return false;
+}
+
 // Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
 // standards.  For background see C++11 standard.  A slightly older, publically
 // available draft of the standard (not entirely up-to-date, but close enough
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 5aada9c3734d..8f917aeb3725 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -38,6 +38,7 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
   switch (Class) {
   case IC_Autorelease:
   case IC_AutoreleaseRV:
+  case IC_IntrinsicUser:
   case IC_User:
     // These operations never directly modify a reference count.
     return false;
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
index 53a31b0de178..373168e89888 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -30,6 +30,7 @@ using namespace llvm::objcarc;
 bool llvm::objcarc::EnableARCOpts;
 static cl::opt<bool, true>
 EnableARCOptimizations("enable-objc-arc-opts",
+                       cl::desc("enable/disable all ARC Optimizations"),
                        cl::location(EnableARCOpts),
                        cl::init(true));
 
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index e062b665554a..39670f339e9f 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -64,7 +64,8 @@ static inline bool ModuleHasARC(const Module &M) {
     M.getNamedValue("objc_copyWeak") ||
     M.getNamedValue("objc_retainedObject") ||
     M.getNamedValue("objc_unretainedObject") ||
-    M.getNamedValue("objc_unretainedPointer");
+    M.getNamedValue("objc_unretainedPointer") ||
+    M.getNamedValue("clang.arc.use");
 }
 
 /// \enum InstructionClass
@@ -89,6 +90,7 @@ enum InstructionClass {
   IC_CopyWeak,            ///< objc_copyWeak (derived)
   IC_DestroyWeak,         ///< objc_destroyWeak (derived)
   IC_StoreStrong,         ///< objc_storeStrong (derived)
+  IC_IntrinsicUser,       ///< clang.arc.use
   IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
   IC_Call,                ///< could call objc_release
   IC_User,                ///< could "use" a pointer
@@ -97,6 +99,13 @@ enum InstructionClass {
 
 raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class);
 
+/// \brief Test if the given class is a kind of user.
+inline static bool IsUser(InstructionClass Class) {
+  return Class == IC_User ||
+         Class == IC_CallOrUser ||
+         Class == IC_IntrinsicUser;
+}
+
 /// \brief Test if the given class is objc_retain or equivalent.
 static inline bool IsRetain(InstructionClass Class) {
   return Class == IC_Retain ||
@@ -112,13 +121,10 @@ static inline bool IsAutorelease(InstructionClass Class) {
 /// \brief Test if the given class represents instructions which return their
 /// argument verbatim.
 static inline bool IsForwarding(InstructionClass Class) {
-  // objc_retainBlock technically doesn't always return its argument
-  // verbatim, but it doesn't matter for our purposes here.
   return Class == IC_Retain ||
          Class == IC_RetainRV ||
          Class == IC_Autorelease ||
          Class == IC_AutoreleaseRV ||
-         Class == IC_RetainBlock ||
          Class == IC_NoopCast;
 }
 
@@ -256,11 +262,11 @@ static inline Value *GetObjCArg(Value *Inst) {
   return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
 }
 
-static inline bool isNullOrUndef(const Value *V) {
+static inline bool IsNullOrUndef(const Value *V) {
   return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
 }
 
-static inline bool isNoopInstruction(const Instruction *I) {
+static inline bool IsNoopInstruction(const Instruction *I) {
   return isa<BitCastInst>(I) ||
     (isa<GetElementPtrInst>(I) &&
      cast<GetElementPtrInst>(I)->hasAllZeroIndices());
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 1c13d1cbea42..b96c64fe81de 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -410,7 +410,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
           break;
         }
         --BBI;
-      } while (isNoopInstruction(BBI));
+      } while (IsNoopInstruction(BBI));
 
       if (&*BBI == GetObjCArg(Inst)) {
         DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
@@ -429,7 +429,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
     case IC_InitWeak: {
       // objc_initWeak(p, null) => *p = null
       CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(1))) {
+      if (IsNullOrUndef(CI->getArgOperand(1))) {
         Value *Null =
           ConstantPointerNull::get(cast<PointerType>(CI->getType()));
         Changed = true;
@@ -453,6 +453,10 @@ bool ObjCARCContract::runOnFunction(Function &F) {
       if (isa<AllocaInst>(Inst))
         TailOkForStoreStrongs = false;
       continue;
+    case IC_IntrinsicUser:
+      // Remove calls to @clang.arc.use(...).
+      Inst->eraseFromParent();
+      continue;
     default:
       continue;
     }
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9c14949877f9..9c52a55b3510 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
@@ -190,13 +191,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
   do {
     const Value *V = Worklist.pop_back_val();
 
-    DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+    DEBUG(dbgs() << "Visiting: " << *V << "\n");
 
     for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
          UI != UE; ++UI) {
       const User *UUser = *UI;
 
-      DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+      DEBUG(dbgs() << "User: " << *UUser << "\n");
 
       // Special - Use by a call (callee or argument) is not considered
       // to be an escape.
@@ -206,11 +207,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
       case IC_StoreStrong:
       case IC_Autorelease:
       case IC_AutoreleaseRV: {
-        DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
-              "arguments. Pointer Escapes!\n");
+        DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
         // These special functions make copies of their pointer arguments.
         return true;
       }
+      case IC_IntrinsicUser:
+        // Use by the use intrinsic is not an escape.
+        continue;
       case IC_User:
       case IC_None:
         // Use by an instruction which copies the value is an escape if the
@@ -219,12 +222,11 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
             isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
 
           if (VisitedSet.insert(UUser)) {
-            DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
-                  "Ptr escapes if result escapes. Adding to list.\n");
+            DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
+                  " Adding to list.\n");
             Worklist.push_back(UUser);
           } else {
-            DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
-                  "\n");
+            DEBUG(dbgs() << "Already visited node.\n");
           }
           continue;
         }
@@ -241,13 +243,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
         continue;
       }
       // Otherwise, conservatively assume an escape.
-      DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+      DEBUG(dbgs() << "Assuming ptr escapes.\n");
       return true;
     }
   } while (!Worklist.empty());
 
   // No escapes found.
-  DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+  DEBUG(dbgs() << "Ptr does not escape.\n");
   return false;
 }
 
@@ -371,7 +373,7 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
 namespace {
   /// \brief Unidirectional information about either a
   /// retain-decrement-use-release sequence or release-use-decrement-retain
-  /// reverese sequence.
+  /// reverse sequence.
   struct RRInfo {
     /// After an objc_retain, the reference count of the referenced
     /// object is known to be positive. Similarly, before an objc_release, the
@@ -387,10 +389,6 @@ namespace {
     /// KnownSafe is true when either of these conditions is satisfied.
     bool KnownSafe;
 
-    /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain
-    /// calls).
-    bool IsRetainBlock;
-
     /// True of the objc_release calls are all marked with the "tail" keyword.
     bool IsTailCallRelease;
 
@@ -407,17 +405,18 @@ namespace {
     SmallPtrSet<Instruction *, 2> ReverseInsertPts;
 
     RRInfo() :
-      KnownSafe(false), IsRetainBlock(false),
-      IsTailCallRelease(false),
-      ReleaseMetadata(0) {}
+      KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
 
     void clear();
+
+    bool IsTrackingImpreciseReleases() {
+      return ReleaseMetadata != 0;
+    }
   };
 }
 
 void RRInfo::clear() {
   KnownSafe = false;
-  IsRetainBlock = false;
   IsTailCallRelease = false;
   ReleaseMetadata = 0;
   Calls.clear();
@@ -431,7 +430,7 @@ namespace {
     /// True if the reference count is known to be incremented.
     bool KnownPositiveRefCount;
 
-    /// True of we've seen an opportunity for partial RR elimination, such as
+    /// True if we've seen an opportunity for partial RR elimination, such as
     /// pushing calls into a CFG triangle or into one side of a CFG diamond.
     bool Partial;
 
@@ -451,15 +450,16 @@ namespace {
       KnownPositiveRefCount = true;
     }
 
-    void ClearRefCount() {
+    void ClearKnownPositiveRefCount() {
       KnownPositiveRefCount = false;
     }
 
-    bool IsKnownIncremented() const {
+    bool HasKnownPositiveRefCount() const {
       return KnownPositiveRefCount;
     }
 
     void SetSeq(Sequence NewSeq) {
+      DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
       Seq = NewSeq;
     }
 
@@ -472,7 +472,7 @@ namespace {
     }
 
     void ResetSequenceProgress(Sequence NewSeq) {
-      Seq = NewSeq;
+      SetSeq(NewSeq);
       Partial = false;
       RRI.clear();
     }
@@ -486,10 +486,6 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
   Seq = MergeSeqs(Seq, Other.Seq, TopDown);
   KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
 
-  // We can't merge a plain objc_retain with an objc_retainBlock.
-  if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
-    Seq = S_None;
-
   // If we're not in a sequence (anymore), drop all associated state.
   if (Seq == S_None) {
     Partial = false;
@@ -698,6 +694,287 @@ void BBState::MergeSucc(const BBState &Other) {
       MI->second.Merge(PtrState(), /*TopDown=*/false);
 }
 
+// Only enable ARC Annotations if we are building a debug version of
+// libObjCARCOpts.
+#ifndef NDEBUG
+#define ARC_ANNOTATIONS
+#endif
+
+// Define some macros along the lines of DEBUG and some helper functions to make
+// it cleaner to create annotations in the source code and to no-op when not
+// building in debug mode.
+#ifdef ARC_ANNOTATIONS
+
+#include "llvm/Support/CommandLine.h"
+
+/// Enable/disable ARC sequence annotations.
+static cl::opt<bool>
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
+                     cl::desc("Enable emission of arc data flow analysis "
+                              "annotations"));
+static cl::opt<bool>
+DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
+                          cl::desc("Disable check for cfg hazards when "
+                                   "annotating"));
+static cl::opt<std::string>
+ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
+                              cl::init(""),
+                              cl::desc("filter out all data flow annotations "
+                                       "but those that apply to the given "
+                                       "target llvm identifier."));
+
+/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
+/// instruction so that we can track backwards when post processing via the llvm
+/// arc annotation processor tool. If the function is an
+static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
+                                         Value *Ptr) {
+  MDString *Hash = 0;
+
+  // If pointer is a result of an instruction and it does not have a source
+  // MDNode it, attach a new MDNode onto it. If pointer is a result of
+  // an instruction and does have a source MDNode attached to it, return a
+  // reference to said Node. Otherwise just return 0.
+  if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) {
+    MDNode *Node;
+    if (!(Node = Inst->getMetadata(NodeId))) {
+      // We do not have any node. Generate and attatch the hash MDString to the
+      // instruction.
+
+      // We just use an MDString to ensure that this metadata gets written out
+      // of line at the module level and to provide a very simple format
+      // encoding the information herein. Both of these makes it simpler to
+      // parse the annotations by a simple external program.
+      std::string Str;
+      raw_string_ostream os(Str);
+      os << "(" << Inst->getParent()->getParent()->getName() << ",%"
+         << Inst->getName() << ")";
+
+      Hash = MDString::get(Inst->getContext(), os.str());
+      Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash));
+    } else {
+      // We have a node. Grab its hash and return it.
+      assert(Node->getNumOperands() == 1 &&
+        "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand.");
+      Hash = cast<MDString>(Node->getOperand(0));
+    }
+  } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+    std::string str;
+    raw_string_ostream os(str);
+    os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName()
+       << ")";
+    Hash = MDString::get(Arg->getContext(), os.str());
+  }
+
+  return Hash;
+}
+
+static std::string SequenceToString(Sequence A) {
+  std::string str;
+  raw_string_ostream os(str);
+  os << A;
+  return os.str();
+}
+
+/// Helper function to change a Sequence into a String object using our overload
+/// for raw_ostream so we only have printing code in one location.
+static MDString *SequenceToMDString(LLVMContext &Context,
+                                    Sequence A) {
+  return MDString::get(Context, SequenceToString(A));
+}
+
+/// A simple function to generate a MDNode which describes the change in state
+/// for Value *Ptr caused by Instruction *Inst.
+static void AppendMDNodeToInstForPtr(unsigned NodeId,
+                                     Instruction *Inst,
+                                     Value *Ptr,
+                                     MDString *PtrSourceMDNodeID,
+                                     Sequence OldSeq,
+                                     Sequence NewSeq) {
+  MDNode *Node = 0;
+  Value *tmp[3] = {PtrSourceMDNodeID,
+                   SequenceToMDString(Inst->getContext(),
+                                      OldSeq),
+                   SequenceToMDString(Inst->getContext(),
+                                      NewSeq)};
+  Node = MDNode::get(Inst->getContext(),
+                     ArrayRef<Value*>(tmp, 3));
+
+  Inst->setMetadata(NodeId, Node);
+}
+
+/// Add to the beginning of the basic block llvm.ptr.annotations which show the
+/// state of a pointer at the entrance to a basic block.
+static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
+                                            Value *Ptr, Sequence Seq) {
+  // If we have a target identifier, make sure that we match it before
+  // continuing.
+  if(!ARCAnnotationTargetIdentifier.empty() &&
+     !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+    return;
+
+  Module *M = BB->getParent()->getParent();
+  LLVMContext &C = M->getContext();
+  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+  Type *I8XX = PointerType::getUnqual(I8X);
+  Type *Params[] = {I8XX, I8XX};
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+                                        ArrayRef<Type*>(Params, 2),
+                                        /*isVarArg=*/false);
+  Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+  IRBuilder<> Builder(BB, BB->getFirstInsertionPt());
+
+  Value *PtrName;
+  StringRef Tmp = Ptr->getName();
+  if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+    Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+                                                         Tmp + "_STR");
+    PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+                                 cast<Constant>(ActualPtrName), Tmp);
+  }
+
+  Value *S;
+  std::string SeqStr = SequenceToString(Seq);
+  if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+    Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+                                                         SeqStr + "_STR");
+    S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+                           cast<Constant>(ActualPtrName), SeqStr);
+  }
+
+  Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Add to the end of the basic block llvm.ptr.annotations which show the state
+/// of the pointer at the bottom of the basic block.
+static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
+                                              Value *Ptr, Sequence Seq) {
+  // If we have a target identifier, make sure that we match it before emitting
+  // an annotation.
+  if(!ARCAnnotationTargetIdentifier.empty() &&
+     !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+    return;
+
+  Module *M = BB->getParent()->getParent();
+  LLVMContext &C = M->getContext();
+  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+  Type *I8XX = PointerType::getUnqual(I8X);
+  Type *Params[] = {I8XX, I8XX};
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+                                        ArrayRef<Type*>(Params, 2),
+                                        /*isVarArg=*/false);
+  Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+  IRBuilder<> Builder(BB, llvm::prior(BB->end()));
+
+  Value *PtrName;
+  StringRef Tmp = Ptr->getName();
+  if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+    Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+                                                         Tmp + "_STR");
+    PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+                                 cast<Constant>(ActualPtrName), Tmp);
+  }
+
+  Value *S;
+  std::string SeqStr = SequenceToString(Seq);
+  if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+    Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+                                                         SeqStr + "_STR");
+    S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+                           cast<Constant>(ActualPtrName), SeqStr);
+  }
+  Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Adds a source annotation to pointer and a state change annotation to Inst
+/// referencing the source annotation and the old/new state of pointer.
+static void GenerateARCAnnotation(unsigned InstMDId,
+                                  unsigned PtrMDId,
+                                  Instruction *Inst,
+                                  Value *Ptr,
+                                  Sequence OldSeq,
+                                  Sequence NewSeq) {
+  if (EnableARCAnnotations) {
+    // If we have a target identifier, make sure that we match it before
+    // emitting an annotation.
+    if(!ARCAnnotationTargetIdentifier.empty() &&
+       !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+      return;
+
+    // First generate the source annotation on our pointer. This will return an
+    // MDString* if Ptr actually comes from an instruction implying we can put
+    // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
+    // then we know that our pointer is from an Argument so we put a reference
+    // to the argument number.
+    //
+    // The point of this is to make it easy for the
+    // llvm-arc-annotation-processor tool to cross reference where the source
+    // pointer is in the LLVM IR since the LLVM IR parser does not submit such
+    // information via debug info for backends to use (since why would anyone
+    // need such a thing from LLVM IR besides in non standard cases
+    // [i.e. this]).
+    MDString *SourcePtrMDNode =
+      AppendMDNodeToSourcePtr(PtrMDId, Ptr);
+    AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq,
+                             NewSeq);
+  }
+}
+
+// The actual interface for accessing the above functionality is defined via
+// some simple macros which are defined below. We do this so that the user does
+// not need to pass in what metadata id is needed resulting in cleaner code and
+// additionally since it provides an easy way to conditionally no-op all
+// annotation support in a non-debug build.
+
+/// Use this macro to annotate a sequence state change when processing
+/// instructions bottom up,
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)                          \
+  GenerateARCAnnotation(ARCAnnotationBottomUpMDKind,                    \
+                        ARCAnnotationProvenanceSourceMDKind, (inst),    \
+                        const_cast<Value*>(ptr), (old), (new))
+/// Use this macro to annotate a sequence state change when processing
+/// instructions top down.
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new)                           \
+  GenerateARCAnnotation(ARCAnnotationTopDownMDKind,                     \
+                        ARCAnnotationProvenanceSourceMDKind, (inst),    \
+                        const_cast<Value*>(ptr), (old), (new))
+
+#define ANNOTATE_BB(_states, _bb, _name, _type, _direction)                   \
+  do {                                                                        \
+    if (EnableARCAnnotations) {                                               \
+      for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
+          E = (_states)._direction##_ptr_end(); I != E; ++I) {                \
+        Value *Ptr = const_cast<Value*>(I->first);                            \
+        Sequence Seq = I->second.GetSeq();                                    \
+        GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq);         \
+      }                                                                       \
+    }                                                                         \
+  } while (0)
+
+#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock)                       \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
+                Entrance, bottom_up)
+#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock)                         \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend",   \
+                Terminator, bottom_up)
+#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock)                        \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart",  \
+                Entrance, top_down)
+#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock)                          \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend",    \
+                Terminator, top_down)
+
+#else // !ARC_ANNOTATION
+// If annotations are off, noop.
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new)
+#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock)
+#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock)
+#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock)
+#define ANNOTATE_TOPDOWN_BBEND(states, basicblock)
+#endif // !ARC_ANNOTATION
+
 namespace {
   /// \brief The main ARC optimization pass.
   class ObjCARCOpt : public FunctionPass {
@@ -738,6 +1015,15 @@ namespace {
     /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
     unsigned NoObjCARCExceptionsMDKind;
 
+#ifdef ARC_ANNOTATIONS
+    /// The Metadata Kind for llvm.arc.annotation.bottomup metadata.
+    unsigned ARCAnnotationBottomUpMDKind;
+    /// The Metadata Kind for llvm.arc.annotation.topdown metadata.
+    unsigned ARCAnnotationTopDownMDKind;
+    /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata.
+    unsigned ARCAnnotationProvenanceSourceMDKind;
+#endif // ARC_ANNOATIONS
+
     Constant *getRetainRVCallee(Module *M);
     Constant *getAutoreleaseRVCallee(Module *M);
     Constant *getReleaseCallee(Module *M);
@@ -751,6 +1037,8 @@ namespace {
     bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
     void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
                                    InstructionClass &Class);
+    bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
+                                 InstructionClass &Class);
     void OptimizeIndividualCalls(Function &F);
 
     void CheckForCFGHazards(const BasicBlock *BB,
@@ -958,7 +1246,7 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
   // Check that the call is next to the retain.
   BasicBlock::const_iterator I = Call;
   ++I;
-  while (isNoopInstruction(I)) ++I;
+  while (IsNoopInstruction(I)) ++I;
   if (&*I != Retain)
     return;
 
@@ -966,16 +1254,13 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
   Changed = true;
   ++NumPeeps;
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
-                  "objc_retain => objc_retainAutoreleasedReturnValue"
-                  " since the operand is a return value.\n"
-                  "                                Old: "
-               << *Retain << "\n");
+  DEBUG(dbgs() << "Transforming objc_retain => "
+                  "objc_retainAutoreleasedReturnValue since the operand is a "
+                  "return value.\nOld: "<< *Retain << "\n");
 
   cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
 
-  DEBUG(dbgs() << "                                New: "
-               << *Retain << "\n");
+  DEBUG(dbgs() << "New: " << *Retain << "\n");
 }
 
 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
@@ -990,14 +1275,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
     if (Call->getParent() == RetainRV->getParent()) {
       BasicBlock::const_iterator I = Call;
       ++I;
-      while (isNoopInstruction(I)) ++I;
+      while (IsNoopInstruction(I)) ++I;
       if (&*I == RetainRV)
         return false;
     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       BasicBlock *RetainRVParent = RetainRV->getParent();
       if (II->getNormalDest() == RetainRVParent) {
         BasicBlock::const_iterator I = RetainRVParent->begin();
-        while (isNoopInstruction(I)) ++I;
+        while (IsNoopInstruction(I)) ++I;
         if (&*I == RetainRV)
           return false;
       }
@@ -1008,15 +1293,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
   // pointer. In this case, we can delete the pair.
   BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
   if (I != Begin) {
-    do --I; while (I != Begin && isNoopInstruction(I));
+    do --I; while (I != Begin && IsNoopInstruction(I));
     if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
         GetObjCArg(I) == Arg) {
       Changed = true;
       ++NumPeeps;
 
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
-                   << "                                  Erasing " << *RetainRV
-                   << "\n");
+      DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
+                   << "Erasing " << *RetainRV << "\n");
 
       EraseInstruction(I);
       EraseInstruction(RetainRV);
@@ -1028,16 +1312,13 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
   Changed = true;
   ++NumPeeps;
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
-                  "objc_retainAutoreleasedReturnValue => "
+  DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
                   "objc_retain since the operand is not a return value.\n"
-                  "                                  Old: "
-               << *RetainRV << "\n");
+                  "Old = " << *RetainRV << "\n");
 
   cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
 
-  DEBUG(dbgs() << "                                  New: "
-               << *RetainRV << "\n");
+  DEBUG(dbgs() << "New = " << *RetainRV << "\n");
 
   return false;
 }
@@ -1066,12 +1347,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
   Changed = true;
   ++NumPeeps;
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
-                  "objc_autoreleaseReturnValue => "
+  DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => "
                   "objc_autorelease since its operand is not used as a return "
                   "value.\n"
-                  "                                       Old: "
-               << *AutoreleaseRV << "\n");
+                  "Old = " << *AutoreleaseRV << "\n");
 
   CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
   AutoreleaseRVCI->
@@ -1079,14 +1358,43 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
   AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
   Class = IC_Autorelease;
 
-  DEBUG(dbgs() << "                                       New: "
-               << *AutoreleaseRV << "\n");
+  DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
+
+}
+
+// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
+// calls.
+//
+// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
+// does not escape (following the rules of block escaping), strength reduce the
+// objc_retainBlock to an objc_retain.
+//
+// TODO: If an objc_retainBlock call is dominated period by a previous
+// objc_retainBlock call, strength reduce the objc_retainBlock to an
+// objc_retain.
+bool
+ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
+                                    InstructionClass &Class) {
+  assert(GetBasicInstructionClass(Inst) == Class);
+  assert(IC_RetainBlock == Class);
+
+  // If we can not optimize Inst, return false.
+  if (!IsRetainBlockOptimizable(Inst))
+    return false;
+
+  CallInst *RetainBlock = cast<CallInst>(Inst);
+  RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
+  // Remove copy_on_escape metadata.
+  RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
+  Class = IC_Retain;
 
+  return true;
 }
 
 /// Visit each call, one at a time, and make simplifications without doing any
 /// additional analysis.
 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
   // Reset all the flags in preparation for recomputing them.
   UsedInThisFunction = 0;
 
@@ -1096,8 +1404,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 
     InstructionClass Class = GetBasicInstructionClass(Inst);
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
-          << Class << "; " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
 
     switch (Class) {
     default: break;
@@ -1113,8 +1420,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_NoopCast:
       Changed = true;
       ++NumNoops;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
-                   " " << *Inst << "\n");
+      DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
       EraseInstruction(Inst);
       continue;
 
@@ -1125,18 +1431,15 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_InitWeak:
     case IC_DestroyWeak: {
       CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(0))) {
+      if (IsNullOrUndef(CI->getArgOperand(0))) {
         Changed = true;
         Type *Ty = CI->getArgOperand(0)->getType();
         new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
                       Constant::getNullValue(Ty),
                       CI);
         llvm::Value *NewValue = UndefValue::get(CI->getType());
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
-                        "pointer-to-weak-pointer is undefined behavior.\n"
-                        "                                     Old = " << *CI <<
-                        "\n                                     New = " <<
-                        *NewValue << "\n");
+        DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+                       "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
         CI->replaceAllUsesWith(NewValue);
         CI->eraseFromParent();
         continue;
@@ -1146,8 +1449,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_CopyWeak:
     case IC_MoveWeak: {
       CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(0)) ||
-          isNullOrUndef(CI->getArgOperand(1))) {
+      if (IsNullOrUndef(CI->getArgOperand(0)) ||
+          IsNullOrUndef(CI->getArgOperand(1))) {
         Changed = true;
         Type *Ty = CI->getArgOperand(0)->getType();
         new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
@@ -1155,11 +1458,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
                       CI);
 
         llvm::Value *NewValue = UndefValue::get(CI->getType());
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
-                        "pointer-to-weak-pointer is undefined behavior.\n"
-                        "                                     Old = " << *CI <<
-                        "\n                                     New = " <<
-                        *NewValue << "\n");
+        DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+                        "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
 
         CI->replaceAllUsesWith(NewValue);
         CI->eraseFromParent();
@@ -1167,6 +1467,12 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       }
       break;
     }
+    case IC_RetainBlock:
+      // If we strength reduce an objc_retainBlock to amn objc_retain, continue
+      // onto the objc_retain peephole optimizations. Otherwise break.
+      if (!OptimizeRetainBlockCall(F, Inst, Class))
+        break;
+      // FALLTHROUGH
     case IC_Retain:
       OptimizeRetainCall(F, Inst);
       break;
@@ -1196,12 +1502,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
         NewCall->setMetadata(ImpreciseReleaseMDKind,
                              MDNode::get(C, ArrayRef<Value *>()));
 
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
-                        "objc_autorelease(x) with objc_release(x) since x is "
-                        "otherwise unused.\n"
-                        "                                     Old: " << *Call <<
-                        "\n                                     New: " <<
-                        *NewCall << "\n");
+        DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
+              "since x is otherwise unused.\nOld: " << *Call << "\nNew: "
+              << *NewCall << "\n");
 
         EraseInstruction(Call);
         Inst = NewCall;
@@ -1213,9 +1516,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // a tail keyword.
     if (IsAlwaysTail(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
-            " to function since it can never be passed stack args: " << *Inst <<
-            "\n");
+      DEBUG(dbgs() << "Adding tail keyword to function since it can never be "
+                      "passed stack args: " << *Inst << "\n");
       cast<CallInst>(Inst)->setTailCall();
     }
 
@@ -1223,8 +1525,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // semantics of ARC truly do not do so.
     if (IsNeverTail(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
-            "keyword from function: " << *Inst <<
+      DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst <<
             "\n");
       cast<CallInst>(Inst)->setTailCall(false);
     }
@@ -1232,8 +1533,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // Set nounwind as needed.
     if (IsNoThrow(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
-            " class. Setting nounwind on: " << *Inst << "\n");
+      DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
+                   << "\n");
       cast<CallInst>(Inst)->setDoesNotThrow();
     }
 
@@ -1245,11 +1546,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     const Value *Arg = GetObjCArg(Inst);
 
     // ARC calls with null are no-ops. Delete them.
-    if (isNullOrUndef(Arg)) {
+    if (IsNullOrUndef(Arg)) {
       Changed = true;
       ++NumNoops;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
-            " null are no-ops. Erasing: " << *Inst << "\n");
+      DEBUG(dbgs() << "ARC calls with  null are no-ops. Erasing: " << *Inst
+            << "\n");
       EraseInstruction(Inst);
       continue;
     }
@@ -1280,7 +1581,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         Value *Incoming =
           StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
-        if (isNullOrUndef(Incoming))
+        if (IsNullOrUndef(Incoming))
           HasNull = true;
         else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
                    .getNumSuccessors() != 1) {
@@ -1334,7 +1635,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
           for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
             Value *Incoming =
               StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
-            if (!isNullOrUndef(Incoming)) {
+            if (!IsNullOrUndef(Incoming)) {
               CallInst *Clone = cast<CallInst>(CInst->clone());
               Value *Op = PN->getIncomingValue(i);
               Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
@@ -1343,10 +1644,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
               Clone->setArgOperand(0, Op);
               Clone->insertBefore(InsertPos);
 
-              DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+              DEBUG(dbgs() << "Cloning "
                            << *CInst << "\n"
-                           "                                     And inserting "
-                           "clone at " << *InsertPos << "\n");
+                           "And inserting clone at " << *InsertPos << "\n");
               Worklist.push_back(std::make_pair(Clone, Incoming));
             }
           }
@@ -1358,7 +1658,65 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       }
     } while (!Worklist.empty());
   }
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// If we have a top down pointer in the S_Use state, make sure that there are
+/// no CFG hazards by checking the states of various bottom up pointers.
+static void CheckForUseCFGHazard(const Sequence SuccSSeq,
+                                 const bool SuccSRRIKnownSafe,
+                                 PtrState &S,
+                                 bool &SomeSuccHasSame,
+                                 bool &AllSuccsHaveSame,
+                                 bool &ShouldContinue) {
+  switch (SuccSSeq) {
+  case S_CanRelease: {
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+      S.ClearSequenceProgress();
+      break;
+    }
+    ShouldContinue = true;
+    break;
+  }
+  case S_Use:
+    SomeSuccHasSame = true;
+    break;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+      AllSuccsHaveSame = false;
+    break;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  case S_None:
+    llvm_unreachable("This should have been handled earlier.");
+  }
+}
+
+/// If we have a Top Down pointer in the S_CanRelease state, make sure that
+/// there are no CFG hazards by checking the states of various bottom up
+/// pointers.
+static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
+                                        const bool SuccSRRIKnownSafe,
+                                        PtrState &S,
+                                        bool &SomeSuccHasSame,
+                                        bool &AllSuccsHaveSame) {
+  switch (SuccSSeq) {
+  case S_CanRelease:
+    SomeSuccHasSame = true;
+    break;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+  case S_Use:
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+      AllSuccsHaveSame = false;
+    break;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  case S_None:
+    llvm_unreachable("This should have been handled earlier.");
+  }
 }
 
 /// Check for critical edges, loop boundaries, irreducible control flow, or
@@ -1371,106 +1729,82 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
   // If any top-down local-use or possible-dec has a succ which is earlier in
   // the sequence, forget it.
   for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
-       E = MyStates.top_down_ptr_end(); I != E; ++I)
-    switch (I->second.GetSeq()) {
-    default: break;
-    case S_Use: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None:
-        case S_CanRelease: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
-          continue;
-        }
-        case S_Use:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-      }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
+         E = MyStates.top_down_ptr_end(); I != E; ++I) {
+    PtrState &S = I->second;
+    const Sequence Seq = I->second.GetSeq();
+
+    // We only care about S_Retain, S_CanRelease, and S_Use.
+    if (Seq == S_None)
+      continue;
+
+    // Make sure that if extra top down states are added in the future that this
+    // code is updated to handle it.
+    assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
+           "Unknown top down sequence state.");
+
+    const Value *Arg = I->first;
+    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    bool SomeSuccHasSame = false;
+    bool AllSuccsHaveSame = true;
+
+    succ_const_iterator SI(TI), SE(TI, false);
+
+    for (; SI != SE; ++SI) {
+      // If VisitBottomUp has pointer information for this successor, take
+      // what we know about it.
+      const DenseMap<const BasicBlock *, BBState>::iterator BBI =
+        BBStates.find(*SI);
+      assert(BBI != BBStates.end());
+      const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+      const Sequence SuccSSeq = SuccS.GetSeq();
+
+      // If bottom up, the pointer is in an S_None state, clear the sequence
+      // progress since the sequence in the bottom up state finished
+      // suggesting a mismatch in between retains/releases. This is true for
+      // all three cases that we are handling here: S_Retain, S_Use, and
+      // S_CanRelease.
+      if (SuccSSeq == S_None) {
         S.ClearSequenceProgress();
-      break;
-    }
-    case S_CanRelease: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
+        continue;
+      }
+
+      // If we have S_Use or S_CanRelease, perform our check for cfg hazard
+      // checks.
+      const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+
+      // *NOTE* We do not use Seq from above here since we are allowing for
+      // S.GetSeq() to change while we are visiting basic blocks.
+      switch(S.GetSeq()) {
+      case S_Use: {
+        bool ShouldContinue = false;
+        CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
+                             SomeSuccHasSame, AllSuccsHaveSame,
+                             ShouldContinue);
+        if (ShouldContinue)
           continue;
-        }
-        case S_CanRelease:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-        case S_Use:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
+        break;
+      }
+      case S_CanRelease: {
+        CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe,
+                                    S, SomeSuccHasSame,
+                                    AllSuccsHaveSame);
+        break;
+      }
+      case S_Retain:
+      case S_None:
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        break;
       }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
-        S.ClearSequenceProgress();
-      break;
-    }
     }
+
+    // If the state at the other end of any of the successor edges
+    // matches the current state, require all edges to match. This
+    // guards against loops in the middle of a sequence.
+    if (SomeSuccHasSame && !AllSuccsHaveSame)
+      S.ClearSequenceProgress();
+  }
 }
 
 bool
@@ -1482,6 +1816,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
   InstructionClass Class = GetInstructionClass(Inst);
   const Value *Arg = 0;
 
+  DEBUG(dbgs() << "Class: " << Class << "\n");
+
   switch (Class) {
   case IC_Release: {
     Arg = GetObjCArg(Inst);
@@ -1496,27 +1832,26 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     // pairs by making PtrState hold a stack of states, but this is
     // simple and avoids adding overhead for the non-nested case.
     if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
-      DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
-                      "releases (i.e. a release pair)\n");
+      DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
       NestingDetected = true;
     }
 
     MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-    S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
+    Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
+    ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
+    S.ResetSequenceProgress(NewSeq);
     S.RRI.ReleaseMetadata = ReleaseMetadata;
-    S.RRI.KnownSafe = S.IsKnownIncremented();
+    S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
     S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
     S.RRI.Calls.insert(Inst);
-
     S.SetKnownPositiveRefCount();
     break;
   }
   case IC_RetainBlock:
-    // An objc_retainBlock call with just a use may need to be kept,
-    // because it may be copying a block from the stack to the heap.
-    if (!IsRetainBlockOptimizable(Inst))
-      break;
-    // FALLTHROUGH
+    // In OptimizeIndividualCalls, we have strength reduced all optimizable
+    // objc_retainBlocks to objc_retains. Thus at this point any
+    // objc_retainBlocks that we see are not optimizable.
+    break;
   case IC_Retain:
   case IC_RetainRV: {
     Arg = GetObjCArg(Inst);
@@ -1524,20 +1859,22 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     PtrState &S = MyStates.getPtrBottomUpState(Arg);
     S.SetKnownPositiveRefCount();
 
-    switch (S.GetSeq()) {
+    Sequence OldSeq = S.GetSeq();
+    switch (OldSeq) {
     case S_Stop:
     case S_Release:
     case S_MovableRelease:
     case S_Use:
-      S.RRI.ReverseInsertPts.clear();
+      // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+      // imprecise release, clear our reverse insertion points.
+      if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases())
+        S.RRI.ReverseInsertPts.clear();
       // FALL THROUGH
     case S_CanRelease:
       // Don't do retain+release tracking for IC_RetainRV, because it's
       // better to let it remain as the first instruction after a call.
-      if (Class != IC_RetainRV) {
-        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+      if (Class != IC_RetainRV)
         Retains[Inst] = S.RRI;
-      }
       S.ClearSequenceProgress();
       break;
     case S_None:
@@ -1545,7 +1882,9 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     case S_Retain:
       llvm_unreachable("bottom-up pointer in retain state!");
     }
-    return NestingDetected;
+    ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
+    // A retain moving bottom up can be a use.
+    break;
   }
   case IC_AutoreleasepoolPop:
     // Conservatively, clear MyStates for all known pointers.
@@ -1571,10 +1910,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
 
     // Check for possible releases.
     if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-      S.ClearRefCount();
+      DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+            << "\n");
+      S.ClearKnownPositiveRefCount();
       switch (Seq) {
       case S_Use:
         S.SetSeq(S_CanRelease);
+        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq());
         continue;
       case S_CanRelease:
       case S_Release:
@@ -1592,6 +1934,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     case S_Release:
     case S_MovableRelease:
       if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         assert(S.RRI.ReverseInsertPts.empty());
         // If this is an invoke instruction, we're scanning it as part of
         // one of its successor blocks, since we can't insert code after it
@@ -1601,10 +1945,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
         else
           S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
         S.SetSeq(S_Use);
-      } else if (Seq == S_Release &&
-                 (Class == IC_User || Class == IC_CallOrUser)) {
+        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+      } else if (Seq == S_Release && IsUser(Class)) {
+        DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         // Non-movable releases depend on any possible objc pointer use.
         S.SetSeq(S_Stop);
+        ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
         assert(S.RRI.ReverseInsertPts.empty());
         // As above; handle invoke specially.
         if (isa<InvokeInst>(Inst))
@@ -1614,8 +1961,12 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
       }
       break;
     case S_Stop:
-      if (CanUse(Inst, Ptr, PA, Class))
+      if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         S.SetSeq(S_Use);
+        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+      }
       break;
     case S_CanRelease:
     case S_Use:
@@ -1633,6 +1984,9 @@ bool
 ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
                           DenseMap<const BasicBlock *, BBState> &BBStates,
                           MapVector<Value *, RRInfo> &Retains) {
+
+  DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
+
   bool NestingDetected = false;
   BBState &MyStates = BBStates[BB];
 
@@ -1654,6 +2008,10 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
     }
   }
 
+  // If ARC Annotations are enabled, output the current state of pointers at the
+  // bottom of the basic block.
+  ANNOTATE_BOTTOMUP_BBEND(MyStates, BB);
+
   // Visit all the instructions, bottom-up.
   for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
     Instruction *Inst = llvm::prior(I);
@@ -1662,7 +2020,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
     if (isa<InvokeInst>(Inst))
       continue;
 
-    DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
   }
@@ -1677,6 +2035,10 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
       NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
   }
 
+  // If ARC Annotations are enabled, output the current state of pointers at the
+  // top of the basic block.
+  ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB);
+
   return NestingDetected;
 }
 
@@ -1690,11 +2052,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
 
   switch (Class) {
   case IC_RetainBlock:
-    // An objc_retainBlock call with just a use may need to be kept,
-    // because it may be copying a block from the stack to the heap.
-    if (!IsRetainBlockOptimizable(Inst))
-      break;
-    // FALLTHROUGH
+    // In OptimizeIndividualCalls, we have strength reduced all optimizable
+    // objc_retainBlocks to objc_retains. Thus at this point any
+    // objc_retainBlocks that we see are not optimizable.
+    break;
   case IC_Retain:
   case IC_RetainRV: {
     Arg = GetObjCArg(Inst);
@@ -1714,9 +2075,9 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
       if (S.GetSeq() == S_Retain)
         NestingDetected = true;
 
+      ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
       S.ResetSequenceProgress(S_Retain);
-      S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-      S.RRI.KnownSafe = S.IsKnownIncremented();
+      S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
       S.RRI.Calls.insert(Inst);
     }
 
@@ -1730,17 +2091,23 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
     Arg = GetObjCArg(Inst);
 
     PtrState &S = MyStates.getPtrTopDownState(Arg);
-    S.ClearRefCount();
+    S.ClearKnownPositiveRefCount();
+
+    Sequence OldSeq = S.GetSeq();
 
-    switch (S.GetSeq()) {
+    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+
+    switch (OldSeq) {
     case S_Retain:
     case S_CanRelease:
-      S.RRI.ReverseInsertPts.clear();
+      if (OldSeq == S_Retain || ReleaseMetadata != 0)
+        S.RRI.ReverseInsertPts.clear();
       // FALL THROUGH
     case S_Use:
-      S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+      S.RRI.ReleaseMetadata = ReleaseMetadata;
       S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
       Releases[Inst] = S.RRI;
+      ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
       S.ClearSequenceProgress();
       break;
     case S_None:
@@ -1776,10 +2143,13 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
 
     // Check for possible releases.
     if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-      S.ClearRefCount();
+      DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+            << "\n");
+      S.ClearKnownPositiveRefCount();
       switch (Seq) {
       case S_Retain:
         S.SetSeq(S_CanRelease);
+        ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
         assert(S.RRI.ReverseInsertPts.empty());
         S.RRI.ReverseInsertPts.insert(Inst);
 
@@ -1801,8 +2171,12 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
     // Check for possible direct uses.
     switch (Seq) {
     case S_CanRelease:
-      if (CanUse(Inst, Ptr, PA, Class))
+      if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         S.SetSeq(S_Use);
+        ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
+      }
       break;
     case S_Retain:
     case S_Use:
@@ -1822,6 +2196,7 @@ bool
 ObjCARCOpt::VisitTopDown(BasicBlock *BB,
                          DenseMap<const BasicBlock *, BBState> &BBStates,
                          DenseMap<Value *, RRInfo> &Releases) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
   bool NestingDetected = false;
   BBState &MyStates = BBStates[BB];
 
@@ -1843,15 +2218,26 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
     }
   }
 
+  // If ARC Annotations are enabled, output the current state of pointers at the
+  // top of the basic block.
+  ANNOTATE_TOPDOWN_BBSTART(MyStates, BB);
+
   // Visit all the instructions, top-down.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     Instruction *Inst = I;
 
-    DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
   }
 
+  // If ARC Annotations are enabled, output the current state of pointers at the
+  // bottom of the basic block.
+  ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
+
+#ifdef ARC_ANNOTATIONS
+  if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
+#endif
   CheckForCFGHazards(BB, BBStates, MyStates);
   return NestingDetected;
 }
@@ -1983,6 +2369,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
   Type *ArgTy = Arg->getType();
   Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
 
+  DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
+
   // Insert the new retain and release calls.
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        PI = ReleasesToMove.ReverseInsertPts.begin(),
@@ -1991,20 +2379,12 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Value *MyArg = ArgTy == ParamTy ? Arg :
                    new BitCastInst(Arg, ParamTy, "", InsertPt);
     CallInst *Call =
-      CallInst::Create(RetainsToMove.IsRetainBlock ?
-                         getRetainBlockCallee(M) : getRetainCallee(M),
-                       MyArg, "", InsertPt);
+      CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt);
     Call->setDoesNotThrow();
-    if (RetainsToMove.IsRetainBlock)
-      Call->setMetadata(CopyOnEscapeMDKind,
-                        MDNode::get(M->getContext(), ArrayRef<Value *>()));
-    else
-      Call->setTailCall();
+    Call->setTailCall();
 
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
-                 << "\n"
-                    "                       At insertion point: " << *InsertPt
-                 << "\n");
+    DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n"
+                    "At insertion point: " << *InsertPt << "\n");
   }
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        PI = RetainsToMove.ReverseInsertPts.begin(),
@@ -2021,10 +2401,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     if (ReleasesToMove.IsTailCallRelease)
       Call->setTailCall();
 
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
-                 << "\n"
-                    "                       At insertion point: " << *InsertPt
-                 << "\n");
+    DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n"
+                    "At insertion point: " << *InsertPt << "\n");
   }
 
   // Delete the original retain and release calls.
@@ -2034,8 +2412,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Instruction *OrigRetain = *AI;
     Retains.blot(OrigRetain);
     DeadInsts.push_back(OrigRetain);
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
-                    "\n");
+    DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
   }
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        AI = ReleasesToMove.Calls.begin(),
@@ -2043,9 +2420,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Instruction *OrigRelease = *AI;
     Releases.erase(OrigRelease);
     DeadInsts.push_back(OrigRelease);
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
-                 << "\n");
+    DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
   }
+
 }
 
 bool
@@ -2075,7 +2452,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
   unsigned OldCount = 0;
   unsigned NewCount = 0;
   bool FirstRelease = true;
-  bool FirstRetain = true;
   for (;;) {
     for (SmallVectorImpl<Instruction *>::const_iterator
            NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
@@ -2156,16 +2532,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
           OldDelta += PathCount;
           OldCount += PathCount;
 
-          // Merge the IsRetainBlock values.
-          if (FirstRetain) {
-            RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
-            FirstRetain = false;
-          } else if (ReleasesToMove.IsRetainBlock !=
-                     NewReleaseRetainRRI.IsRetainBlock)
-            // It's not possible to merge the sequences if one uses
-            // objc_retain and the other uses objc_retainBlock.
-            return false;
-
           // Collect the optimal insertion points.
           if (!KnownSafe)
             for (SmallPtrSet<Instruction *, 2>::const_iterator
@@ -2228,6 +2594,8 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
                                  MapVector<Value *, RRInfo> &Retains,
                                  DenseMap<Value *, RRInfo> &Releases,
                                  Module *M) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
+
   bool AnyPairsCompletelyEliminated = false;
   RRInfo RetainsToMove;
   RRInfo ReleasesToMove;
@@ -2243,8 +2611,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
 
     Instruction *Retain = cast<Instruction>(V);
 
-    DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
-          << "\n");
+    DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
 
     Value *Arg = GetObjCArg(Retain);
 
@@ -2271,6 +2638,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
                             ReleasesToMove, Arg, KnownSafe,
                             AnyPairsCompletelyEliminated);
 
+#ifdef ARC_ANNOTATIONS
+    // Do not move calls if ARC annotations are requested. If we were to move
+    // calls in this case, we would not be able
+    PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations;
+#endif // ARC_ANNOTATIONS
+
     if (PerformMoveCalls) {
       // Ok, everything checks out and we're all set. Let's move/delete some
       // code!
@@ -2295,14 +2668,15 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
 
 /// Weak pointer optimizations.
 void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
+
   // First, do memdep-style RLE and S2L optimizations. We can't use memdep
   // itself because it uses AliasAnalysis and we need to do provenance
   // queries instead.
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
     Instruction *Inst = &*I++;
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
-          "\n");
+    DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
 
     InstructionClass Class = GetBasicInstructionClass(Inst);
     if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
@@ -2392,6 +2766,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
         goto clobbered;
       case IC_AutoreleasepoolPush:
       case IC_None:
+      case IC_IntrinsicUser:
       case IC_User:
         // Weak pointers are only modified through the weak entry points
         // (and arbitrary calls, which could call the weak entry points).
@@ -2449,9 +2824,6 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
     done:;
     }
   }
-
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
-
 }
 
 /// Identify program paths which execute sequences of retains and releases which
@@ -2476,6 +2848,88 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
          NestingDetected;
 }
 
+/// Check if there is a dependent call earlier that does not have anything in
+/// between the Retain and the call that can affect the reference count of their
+/// shared pointer argument. Note that Retain need not be in BB.
+static bool
+HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
+                             SmallPtrSet<Instruction *, 4> &DepInsts,
+                             SmallPtrSet<const BasicBlock *, 4> &Visited,
+                             ProvenanceAnalysis &PA) {
+  FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
+                   DepInsts, Visited, PA);
+  if (DepInsts.size() != 1)
+    return false;
+
+  CallInst *Call =
+    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+
+  // Check that the pointer is the return value of the call.
+  if (!Call || Arg != Call)
+    return false;
+
+  // Check that the call is a regular call.
+  InstructionClass Class = GetBasicInstructionClass(Call);
+  if (Class != IC_CallOrUser && Class != IC_Call)
+    return false;
+
+  return true;
+}
+
+/// Find a dependent retain that precedes the given autorelease for which there
+/// is nothing in between the two instructions that can affect the ref count of
+/// Arg.
+static CallInst *
+FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
+                                  Instruction *Autorelease,
+                                  SmallPtrSet<Instruction *, 4> &DepInsts,
+                                  SmallPtrSet<const BasicBlock *, 4> &Visited,
+                                  ProvenanceAnalysis &PA) {
+  FindDependencies(CanChangeRetainCount, Arg,
+                   BB, Autorelease, DepInsts, Visited, PA);
+  if (DepInsts.size() != 1)
+    return 0;
+
+  CallInst *Retain =
+    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+
+  // Check that we found a retain with the same argument.
+  if (!Retain ||
+      !IsRetain(GetBasicInstructionClass(Retain)) ||
+      GetObjCArg(Retain) != Arg) {
+    return 0;
+  }
+
+  return Retain;
+}
+
+/// Look for an ``autorelease'' instruction dependent on Arg such that there are
+/// no instructions dependent on Arg that need a positive ref count in between
+/// the autorelease and the ret.
+static CallInst *
+FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
+                                       ReturnInst *Ret,
+                                       SmallPtrSet<Instruction *, 4> &DepInsts,
+                                       SmallPtrSet<const BasicBlock *, 4> &V,
+                                       ProvenanceAnalysis &PA) {
+  FindDependencies(NeedsPositiveRetainCount, Arg,
+                   BB, Ret, DepInsts, V, PA);
+  if (DepInsts.size() != 1)
+    return 0;
+
+  CallInst *Autorelease =
+    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+  if (!Autorelease)
+    return 0;
+  InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
+  if (!IsAutorelease(AutoreleaseClass))
+    return 0;
+  if (GetObjCArg(Autorelease) != Arg)
+    return 0;
+
+  return Autorelease;
+}
+
 /// Look for this pattern:
 /// \code
 ///    %call = call i8* @something(...)
@@ -2484,107 +2938,51 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
 ///    ret i8* %3
 /// \endcode
 /// And delete the retain and autorelease.
-///
-/// Otherwise if it's just this:
-/// \code
-///    %3 = call i8* @objc_autorelease(i8* %2)
-///    ret i8* %3
-/// \endcode
-/// convert the autorelease to autoreleaseRV.
 void ObjCARCOpt::OptimizeReturns(Function &F) {
   if (!F.getReturnType()->isPointerTy())
     return;
 
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
+
   SmallPtrSet<Instruction *, 4> DependingInstructions;
   SmallPtrSet<const BasicBlock *, 4> Visited;
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
     BasicBlock *BB = FI;
     ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+    DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
 
-    if (!Ret) continue;
+    if (!Ret)
+      continue;
 
     const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
-    FindDependencies(NeedsPositiveRetainCount, Arg,
-                     BB, Ret, DependingInstructions, Visited, PA);
-    if (DependingInstructions.size() != 1)
-      goto next_block;
-
-    {
-      CallInst *Autorelease =
-        dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-      if (!Autorelease)
-        goto next_block;
-      InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
-      if (!IsAutorelease(AutoreleaseClass))
-        goto next_block;
-      if (GetObjCArg(Autorelease) != Arg)
-        goto next_block;
 
+    // Look for an ``autorelease'' instruction that is a predecssor of Ret and
+    // dependent on Arg such that there are no instructions dependent on Arg
+    // that need a positive ref count in between the autorelease and Ret.
+    CallInst *Autorelease =
+      FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
+                                             DependingInstructions, Visited,
+                                             PA);
+    if (Autorelease) {
       DependingInstructions.clear();
       Visited.clear();
 
-      // Check that there is nothing that can affect the reference
-      // count between the autorelease and the retain.
-      FindDependencies(CanChangeRetainCount, Arg,
-                       BB, Autorelease, DependingInstructions, Visited, PA);
-      if (DependingInstructions.size() != 1)
-        goto next_block;
-
-      {
-        CallInst *Retain =
-          dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
-        // Check that we found a retain with the same argument.
-        if (!Retain ||
-            !IsRetain(GetBasicInstructionClass(Retain)) ||
-            GetObjCArg(Retain) != Arg)
-          goto next_block;
-
+      CallInst *Retain =
+        FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
+                                          DependingInstructions, Visited, PA);
+      if (Retain) {
         DependingInstructions.clear();
         Visited.clear();
 
-        // Convert the autorelease to an autoreleaseRV, since it's
-        // returning the value.
-        if (AutoreleaseClass == IC_Autorelease) {
-          DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Converting autorelease "
-                          "=> autoreleaseRV since it's returning a value.\n"
-                          "                             In: " << *Autorelease
-                       << "\n");
-          Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
-          DEBUG(dbgs() << "                             Out: " << *Autorelease
-                       << "\n");
-          Autorelease->setTailCall(); // Always tail call autoreleaseRV.
-          AutoreleaseClass = IC_AutoreleaseRV;
-        }
-
-        // Check that there is nothing that can affect the reference
-        // count between the retain and the call.
-        // Note that Retain need not be in BB.
-        FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
-                         DependingInstructions, Visited, PA);
-        if (DependingInstructions.size() != 1)
-          goto next_block;
-
-        {
-          CallInst *Call =
-            dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
-          // Check that the pointer is the return value of the call.
-          if (!Call || Arg != Call)
-            goto next_block;
-
-          // Check that the call is a regular call.
-          InstructionClass Class = GetBasicInstructionClass(Call);
-          if (Class != IC_CallOrUser && Class != IC_Call)
-            goto next_block;
-
+        // Check that there is nothing that can affect the reference count
+        // between the retain and the call.  Note that Retain need not be in BB.
+        if (HasSafePathToPredecessorCall(Arg, Retain, DependingInstructions,
+                                         Visited, PA)) {
           // If so, we can zap the retain and autorelease.
           Changed = true;
           ++NumRets;
-          DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
-                       << "\n                             Erasing: "
+          DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: "
                        << *Autorelease << "\n");
           EraseInstruction(Retain);
           EraseInstruction(Autorelease);
@@ -2592,13 +2990,9 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
       }
     }
 
-  next_block:
     DependingInstructions.clear();
     Visited.clear();
   }
-
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
-
 }
 
 bool ObjCARCOpt::doInitialization(Module &M) {
@@ -2617,6 +3011,14 @@ bool ObjCARCOpt::doInitialization(Module &M) {
     M.getContext().getMDKindID("clang.arc.copy_on_escape");
   NoObjCARCExceptionsMDKind =
     M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+#ifdef ARC_ANNOTATIONS
+  ARCAnnotationBottomUpMDKind =
+    M.getContext().getMDKindID("llvm.arc.annotation.bottomup");
+  ARCAnnotationTopDownMDKind =
+    M.getContext().getMDKindID("llvm.arc.annotation.topdown");
+  ARCAnnotationProvenanceSourceMDKind =
+    M.getContext().getMDKindID("llvm.arc.annotation.provenancesource");
+#endif // ARC_ANNOTATIONS
 
   // Intuitively, objc_retain and others are nocapture, however in practice
   // they are not, because they return their argument value. And objc_release
@@ -2643,7 +3045,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
 
   Changed = false;
 
-  DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+  DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
+        "\n");
 
   PA.setAA(&getAnalysis<AliasAnalysis>());
 
diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
index a841c64a9f52..03e12d4fd763 100644
--- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
@@ -72,6 +72,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
     return OS << "IC_Call";
   case IC_User:
     return OS << "IC_User";
+  case IC_IntrinsicUser:
+    return OS << "IC_IntrinsicUser";
   case IC_None:
     return OS << "IC_None";
   }
@@ -81,10 +83,11 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
 InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
   Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
 
-  // No arguments.
+  // No (mandatory) arguments.
   if (AI == AE)
     return StringSwitch<InstructionClass>(F->getName())
       .Case("objc_autoreleasePoolPush",  IC_AutoreleasepoolPush)
+      .Case("clang.arc.use", IC_IntrinsicUser)
       .Default(IC_CallOrUser);
 
   // One argument.
@@ -142,6 +145,14 @@ InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
                 return StringSwitch<InstructionClass>(F->getName())
                   .Case("objc_moveWeak",              IC_MoveWeak)
                   .Case("objc_copyWeak",              IC_CopyWeak)
+                  // Ignore annotation calls. This is important to stop the
+                  // optimizer from treating annotations as uses which would
+                  // make the state of the pointers they are attempting to
+                  // elucidate to be incorrect.
+                  .Case("llvm.arc.annotation.topdown.bbstart", IC_None)
+                  .Case("llvm.arc.annotation.topdown.bbend", IC_None)
+                  .Case("llvm.arc.annotation.bottomup.bbstart", IC_None)
+                  .Case("llvm.arc.annotation.bottomup.bbend", IC_None)
                   .Default(IC_CallOrUser);
           }
 
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 015fd2e6e6fc..615c51744926 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -1761,7 +1761,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   if (!DefIsLiveOut)
     return false;
 
-  // Make sure non of the uses are PHI nodes.
+  // Make sure none of the uses are PHI nodes.
   for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
        UI != E; ++UI) {
     Instruction *User = cast<Instruction>(*UI);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 1601a8d64665..5d02c68a7a47 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,6 +53,7 @@
 
 #define DEBUG_TYPE "global-merge"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constants.h"
@@ -64,10 +65,16 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 using namespace llvm;
 
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+                  	cl::desc("Enable global merge pass on constants"),
+                  	cl::init(false));
+
 STATISTIC(NumMerged      , "Number of globals merged");
 namespace {
   class GlobalMerge : public FunctionPass {
@@ -78,6 +85,23 @@ namespace {
     bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                  Module &M, bool isConst, unsigned AddrSpace) const;
 
+    /// \brief Check if the given variable has been identified as must keep
+    /// \pre setMustKeepGlobalVariables must have been called on the Module that
+    ///      contains GV
+    bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+      return MustKeepGlobalVariables.count(GV);
+    }
+
+    /// Collect every variables marked as "used" or used in a landing pad
+    /// instruction for this Module.
+    void setMustKeepGlobalVariables(Module &M);
+
+    /// Collect every variables marked as "used"
+    void collectUsedGlobalVariables(Module &M);
+
+    /// Keep track of the GlobalVariable that must not be merged away
+    SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
   public:
     static char ID;             // Pass identification, replacement for typeid.
     explicit GlobalMerge(const TargetLowering *tli = 0)
@@ -87,6 +111,7 @@ namespace {
 
     virtual bool doInitialization(Module &M);
     virtual bool runOnFunction(Function &F);
+    virtual bool doFinalization(Module &M);
 
     const char *getPassName() const {
       return "Merge internal globals";
@@ -169,6 +194,43 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
   return true;
 }
 
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+  // Extract global variables from llvm.used array
+  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+  if (!GV || !GV->hasInitializer()) return;
+
+  // Should be an array of 'i8*'.
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (InitList == 0) return;
+ 
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (const GlobalVariable *G =
+        dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+      MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+  collectUsedGlobalVariables(M);
+
+  for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+       ++IFn) {
+    for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+         IBB != IEndBB; ++IBB) {
+      // Follow the inwoke link to find the landing pad instruction
+      const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+      if (!II) continue;
+
+      const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+      // Look for globals in the clauses of the landing pad instruction
+      for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+           Idx != NumClauses; ++Idx)
+        if (const GlobalVariable *GV =
+            dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+                                     ->stripPointerCasts()))
+          MustKeepGlobalVariables.insert(GV);
+    }
+  }
+}
 
 bool GlobalMerge::doInitialization(Module &M) {
   DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
@@ -176,6 +238,7 @@ bool GlobalMerge::doInitialization(Module &M) {
   const DataLayout *TD = TLI->getDataLayout();
   unsigned MaxOffset = TLI->getMaximalGlobalOffset();
   bool Changed = false;
+  setMustKeepGlobalVariables(M);
 
   // Grab all non-const globals.
   for (Module::global_iterator I = M.global_begin(),
@@ -200,6 +263,10 @@ bool GlobalMerge::doInitialization(Module &M) {
         I->getName().startswith(".llvm."))
       continue;
 
+    // Ignore all "required" globals:
+    if (isMustKeepGlobalVariable(I))
+      continue;
+
     if (TD->getTypeAllocSize(Ty) < MaxOffset) {
       if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
           .isBSSLocal())
@@ -221,11 +288,11 @@ bool GlobalMerge::doInitialization(Module &M) {
     if (I->second.size() > 1)
       Changed |= doMerge(I->second, M, false, I->first);
 
-  // FIXME: This currently breaks the EH processing due to way how the
-  // typeinfo detection works. We might want to detect the TIs and ignore
-  // them in the future.
-  // if (ConstGlobals.size() > 1)
-  //  Changed |= doMerge(ConstGlobals, M, true);
+  if (EnableGlobalMergeOnConst)
+    for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+         I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+      if (I->second.size() > 1)
+        Changed |= doMerge(I->second, M, true, I->first);
 
   return Changed;
 }
@@ -234,6 +301,11 @@ bool GlobalMerge::runOnFunction(Function &F) {
   return false;
 }
 
+bool GlobalMerge::doFinalization(Module &M) {
+  MustKeepGlobalVariables.clear();
+  return false;
+}
+
 Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
   return new GlobalMerge(tli);
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 97fff7e78265..8e76c78f5ac3 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
         if (!SE->isLoopInvariant(ExitValue, L))
           continue;
 
+        // Computing the value outside of the loop brings no benefit if :
+        //  - it is definitely used inside the loop in a way which can not be
+        //    optimized away.
+        //  - no use outside of the loop can take advantage of hoisting the
+        //    computation out of the loop
+        if (ExitValue->getSCEVType()>=scMulExpr) {
+          unsigned NumHardInternalUses = 0;
+          unsigned NumSoftExternalUses = 0;
+          unsigned NumUses = 0;
+          for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
+               IB!=IE && NumUses<=6 ; ++IB) {
+            Instruction *UseInstr = cast<Instruction>(*IB);
+            unsigned Opc = UseInstr->getOpcode();
+            NumUses++;
+            if (L->contains(UseInstr)) {
+              if (Opc == Instruction::Call || Opc == Instruction::Ret)
+                NumHardInternalUses++;
+            } else {
+              if (Opc == Instruction::PHI) {
+                // Do not count the Phi as a use. LCSSA may have inserted
+                // plenty of trivial ones.
+                NumUses--;
+                for (Value::use_iterator PB=UseInstr->use_begin(),
+                                         PE=UseInstr->use_end();
+                     PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+                  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
+                  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
+                    NumSoftExternalUses++;
+                }
+                continue;
+              }
+              if (Opc != Instruction::Call && Opc != Instruction::Ret)
+                NumSoftExternalUses++;
+            }
+          }
+          if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
+            continue;
+        }
+
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9c67e327e26d..0b62050b17a0 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -34,13 +34,9 @@ namespace {
     }
 
     // Possibly eliminate loop L if it is dead.
-    bool runOnLoop(Loop* L, LPPassManager& LPM);
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
 
-    bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
-                    SmallVector<BasicBlock*, 4>& exitBlocks,
-                    bool &Changed, BasicBlock *Preheader);
-
-    virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
       AU.addRequired<LoopInfo>();
       AU.addRequired<ScalarEvolution>();
@@ -53,6 +49,12 @@ namespace {
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
     }
+
+  private:
+    bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks,
+                    SmallVector<BasicBlock*, 4> &exitBlocks,
+                    bool &Changed, BasicBlock *Preheader);
+
   };
 }
 
@@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
                 "Delete dead loops", false, false)
 
-Pass* llvm::createLoopDeletionPass() {
+Pass *llvm::createLoopDeletionPass() {
   return new LoopDeletion();
 }
 
-/// IsLoopDead - Determined if a loop is dead.  This assumes that we've already
+/// isLoopDead - Determined if a loop is dead.  This assumes that we've already
 /// checked for unique exit and exiting blocks, and that the code is in LCSSA
 /// form.
-bool LoopDeletion::IsLoopDead(Loop* L,
-                              SmallVector<BasicBlock*, 4>& exitingBlocks,
-                              SmallVector<BasicBlock*, 4>& exitBlocks,
+bool LoopDeletion::isLoopDead(Loop *L,
+                              SmallVector<BasicBlock*, 4> &exitingBlocks,
+                              SmallVector<BasicBlock*, 4> &exitBlocks,
                               bool &Changed, BasicBlock *Preheader) {
-  BasicBlock* exitBlock = exitBlocks[0];
+  BasicBlock *exitBlock = exitBlocks[0];
 
   // Make sure that all PHI entries coming from the loop are loop invariant.
   // Because the code is in LCSSA form, any values used outside of the loop
@@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L,
   // sufficient to guarantee that no loop-variant values are used outside
   // of the loop.
   BasicBlock::iterator BI = exitBlock->begin();
-  while (PHINode* P = dyn_cast<PHINode>(BI)) {
-    Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+  while (PHINode *P = dyn_cast<PHINode>(BI)) {
+    Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
 
     // Make sure all exiting blocks produce the same incoming value for the exit
     // block.  If there are different incoming values for different exiting
     // blocks, then it is impossible to statically determine which value should
     // be used.
-    for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+    for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) {
       if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
         return false;
     }
 
-    if (Instruction* I = dyn_cast<Instruction>(incoming))
+    if (Instruction *I = dyn_cast<Instruction>(incoming))
       if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
         return false;
 
@@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L,
 /// so could change the halting/non-halting nature of a program.
 /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
 /// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
   // We can only remove the loop if there is a preheader that we can
   // branch from after removing it.
-  BasicBlock* preheader = L->getLoopPreheader();
+  BasicBlock *preheader = L->getLoopPreheader();
   if (!preheader)
     return false;
 
@@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
 
   // Finally, we have to check that the loop really is dead.
   bool Changed = false;
-  if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+  if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
     return Changed;
 
   // Don't remove loops for which we can't solve the trip count.
   // They could be infinite, in which case we'd be changing program behavior.
-  ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+  ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
   const SCEV *S = SE.getMaxBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(S))
     return Changed;
 
   // Now that we know the removal is safe, remove the loop by changing the
   // branch from the preheader to go to the single exit block.
-  BasicBlock* exitBlock = exitBlocks[0];
+  BasicBlock *exitBlock = exitBlocks[0];
 
   // Because we're deleting a large chunk of code at once, the sequence in which
   // we remove things is very important to avoid invalidation issues.  Don't
@@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   SE.forgetLoop(L);
 
   // Connect the preheader directly to the exit block.
-  TerminatorInst* TI = preheader->getTerminator();
+  TerminatorInst *TI = preheader->getTerminator();
   TI->replaceUsesOfWith(L->getHeader(), exitBlock);
 
   // Rewrite phis in the exit block to get their inputs from
   // the preheader instead of the exiting block.
-  BasicBlock* exitingBlock = exitingBlocks[0];
+  BasicBlock *exitingBlock = exitingBlocks[0];
   BasicBlock::iterator BI = exitBlock->begin();
-  while (PHINode* P = dyn_cast<PHINode>(BI)) {
+  while (PHINode *P = dyn_cast<PHINode>(BI)) {
     int j = P->getBasicBlockIndex(exitingBlock);
     assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
     P->setIncomingBlock(j, preheader);
@@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
 
   // Update the dominator tree and remove the instructions and blocks that will
   // be deleted from the reference counting scheme.
-  DominatorTree& DT = getAnalysis<DominatorTree>();
+  DominatorTree &DT = getAnalysis<DominatorTree>();
   SmallVector<DomTreeNode*, 8> ChildNodes;
   for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
        LI != LE; ++LI) {
@@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
 
   // Finally, the blocks from loopinfo.  This has to happen late because
   // otherwise our loop iterators won't work.
-  LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+  LoopInfo &loopInfo = getAnalysis<LoopInfo>();
   SmallPtrSet<BasicBlock*, 8> blocks;
   blocks.insert(L->block_begin(), L->block_end());
   for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4e4cb8646414..73e44d7edf5e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
   }
   if (Regs.insert(Reg)) {
     RateRegister(Reg, Regs, L, SE, DT);
-    if (isLoser())
+    if (LoserRegs && isLoser())
       LoserRegs->insert(Reg);
   }
 }
@@ -1895,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   if (ICmpInst::isTrueWhenEqual(Pred)) {
     // Look for n+1, and grab n.
     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
-      if (isa<ConstantInt>(BO->getOperand(1)) &&
-          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
-          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
-        NewRHS = BO->getOperand(0);
+      if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+         if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+           NewRHS = BO->getOperand(0);
     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
-      if (isa<ConstantInt>(BO->getOperand(1)) &&
-          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
-          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
-        NewRHS = BO->getOperand(0);
+      if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+        if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+          NewRHS = BO->getOperand(0);
     if (!NewRHS)
       return Cond;
   } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
@@ -2716,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
   // by LSR.
   const IVInc &Head = Chain.Incs[0];
   User::op_iterator IVOpEnd = Head.UserInst->op_end();
+  // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
   User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
                                              IVOpEnd, L, SE);
   Value *IVSrc = 0;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 0da37469505b..fc3b38ee53be 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -110,6 +110,51 @@ namespace {
       }
     };
   };
+  
+  /// Utility class representing a non-constant Xor-operand. We classify
+  /// non-constant Xor-Operands into two categories:
+  ///  C1) The operand is in the form "X & C", where C is a constant and C != ~0
+  ///  C2)
+  ///    C2.1) The operand is in the form of "X | C", where C is a non-zero
+  ///          constant.
+  ///    C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
+  ///          operand as "E | 0"
+  class XorOpnd {
+  public:
+    XorOpnd(Value *V);
+    const XorOpnd &operator=(const XorOpnd &That);
+
+    bool isInvalid() const { return SymbolicPart == 0; }
+    bool isOrExpr() const { return isOr; }
+    Value *getValue() const { return OrigVal; }
+    Value *getSymbolicPart() const { return SymbolicPart; }
+    unsigned getSymbolicRank() const { return SymbolicRank; }
+    const APInt &getConstPart() const { return ConstPart; }
+
+    void Invalidate() { SymbolicPart = OrigVal = 0; }
+    void setSymbolicRank(unsigned R) { SymbolicRank = R; }
+
+    // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
+    // The purpose is twofold:
+    // 1) Cluster together the operands sharing the same symbolic-value.
+    // 2) Operand having smaller symbolic-value-rank is permuted earlier, which 
+    //   could potentially shorten crital path, and expose more loop-invariants.
+    //   Note that values' rank are basically defined in RPO order (FIXME). 
+    //   So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier 
+    //   than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
+    //   "z" in the order of X-Y-Z is better than any other orders.
+    struct PtrSortFunctor {
+      bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
+        return LHS->getSymbolicRank() < RHS->getSymbolicRank();
+      }
+    };
+  private:
+    Value *OrigVal;
+    Value *SymbolicPart;
+    APInt ConstPart;
+    unsigned SymbolicRank;
+    bool isOr;
+  };
 }
 
 namespace {
@@ -137,6 +182,11 @@ namespace {
     Value *OptimizeExpression(BinaryOperator *I,
                               SmallVectorImpl<ValueEntry> &Ops);
     Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+    Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+    bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd,
+                        Value *&Res);
+    bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+                        APInt &ConstOpnd, Value *&Res);
     bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
                                 SmallVectorImpl<Factor> &Factors);
     Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
@@ -148,6 +198,42 @@ namespace {
   };
 }
 
+XorOpnd::XorOpnd(Value *V) {
+  assert(!isa<ConstantInt>(V) && "No ConstantInt");
+  OrigVal = V;
+  Instruction *I = dyn_cast<Instruction>(V);
+  SymbolicRank = 0;
+
+  if (I && (I->getOpcode() == Instruction::Or ||
+            I->getOpcode() == Instruction::And)) {
+    Value *V0 = I->getOperand(0);
+    Value *V1 = I->getOperand(1);
+    if (isa<ConstantInt>(V0))
+      std::swap(V0, V1);
+
+    if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) {
+      ConstPart = C->getValue();
+      SymbolicPart = V0;
+      isOr = (I->getOpcode() == Instruction::Or);
+      return;
+    }
+  }
+
+  // view the operand as "V | 0"
+  SymbolicPart = V;
+  ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth());
+  isOr = true;
+}
+
+const XorOpnd &XorOpnd::operator=(const XorOpnd &That) {
+  OrigVal = That.OrigVal;
+  SymbolicPart = That.SymbolicPart;
+  ConstPart = That.ConstPart;
+  SymbolicRank = That.SymbolicRank;
+  isOr = That.isOr;
+  return *this;
+}
+
 char Reassociate::ID = 0;
 INITIALIZE_PASS(Reassociate, "reassociate",
                 "Reassociate expressions", false, false)
@@ -1040,6 +1126,247 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
   return 0;
 }
 
+/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// instruction with the given two operands, and return the resulting
+/// instruction. There are two special cases: 1) if the constant operand is 0,
+/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
+/// be returned.
+static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, 
+                             const APInt &ConstOpnd) {
+  if (ConstOpnd != 0) {
+    if (!ConstOpnd.isAllOnesValue()) {
+      LLVMContext &Ctx = Opnd->getType()->getContext();
+      Instruction *I;
+      I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd),
+                                    "and.ra", InsertBefore);
+      I->setDebugLoc(InsertBefore->getDebugLoc());
+      return I;
+    }
+    return Opnd;
+  }
+  return 0;
+}
+
+// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
+// into "R ^ C", where C would be 0, and R is a symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
+// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
+// and both "Res" and "ConstOpnd" remain unchanged.
+//  
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+                                 APInt &ConstOpnd, Value *&Res) {
+  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 
+  //                       = ((x | c1) ^ c1) ^ (c1 ^ c2)
+  //                       = (x & ~c1) ^ (c1 ^ c2)
+  // It is useful only when c1 == c2.
+  if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) {
+    if (!Opnd1->getValue()->hasOneUse())
+      return false;
+
+    const APInt &C1 = Opnd1->getConstPart();
+    if (C1 != ConstOpnd)
+      return false;
+
+    Value *X = Opnd1->getSymbolicPart();
+    Res = createAndInstr(I, X, ~C1);
+    // ConstOpnd was C2, now C1 ^ C2.
+    ConstOpnd ^= C1;
+
+    if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+      RedoInsts.insert(T);
+    return true;
+  }
+  return false;
+}
+
+                           
+// Helper function of OptimizeXor(). It tries to simplify
+// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
+// symbolic value. 
+// 
+// If it was successful, true is returned, and the "R" and "C" is returned 
+// via "Res" and "ConstOpnd", respectively (If the entire expression is
+// evaluated to a constant, the Res is set to NULL); otherwise, false is
+// returned, and both "Res" and "ConstOpnd" remain unchanged.
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+                                 APInt &ConstOpnd, Value *&Res) {
+  Value *X = Opnd1->getSymbolicPart();
+  if (X != Opnd2->getSymbolicPart())
+    return false;
+
+  const APInt &C1 = Opnd1->getConstPart();
+  const APInt &C2 = Opnd2->getConstPart();
+
+  // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
+  int DeadInstNum = 1;
+  if (Opnd1->getValue()->hasOneUse())
+    DeadInstNum++;
+  if (Opnd2->getValue()->hasOneUse())
+    DeadInstNum++;
+
+  // Xor-Rule 2:
+  //  (x | c1) ^ (x & c2)
+  //   = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
+  //   = (x & ~c1) ^ (x & c2) ^ c1               // Xor-Rule 1
+  //   = (x & c3) ^ c1, where c3 = ~c1 ^ c2      // Xor-rule 3
+  //
+  if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
+    if (Opnd2->isOrExpr())
+      std::swap(Opnd1, Opnd2);
+
+    APInt C3((~C1) ^ C2);
+
+    // Do not increase code size!
+    if (C3 != 0 && !C3.isAllOnesValue()) {
+      int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+      if (NewInstNum > DeadInstNum)
+        return false;
+    }
+
+    Res = createAndInstr(I, X, C3);
+    ConstOpnd ^= C1;
+
+  } else if (Opnd1->isOrExpr()) {
+    // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
+    //
+    APInt C3 = C1 ^ C2;
+    
+    // Do not increase code size
+    if (C3 != 0 && !C3.isAllOnesValue()) {
+      int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+      if (NewInstNum > DeadInstNum)
+        return false;
+    }
+
+    Res = createAndInstr(I, X, C3);
+    ConstOpnd ^= C3;
+  } else {
+    // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
+    //
+    APInt C3 = C1 ^ C2;
+    Res = createAndInstr(I, X, C3);
+  }
+
+  // Put the original operands in the Redo list; hope they will be deleted
+  // as dead code.
+  if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+    RedoInsts.insert(T);
+  if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
+    RedoInsts.insert(T);
+
+  return true;
+}
+
+/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
+/// to a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+Value *Reassociate::OptimizeXor(Instruction *I,
+                                SmallVectorImpl<ValueEntry> &Ops) {
+  if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
+    return V;
+      
+  if (Ops.size() == 1)
+    return 0;
+
+  SmallVector<XorOpnd, 8> Opnds;
+  SmallVector<XorOpnd*, 8> OpndPtrs;
+  Type *Ty = Ops[0].Op->getType();
+  APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
+
+  // Step 1: Convert ValueEntry to XorOpnd
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    Value *V = Ops[i].Op;
+    if (!isa<ConstantInt>(V)) {
+      XorOpnd O(V);
+      O.setSymbolicRank(getRank(O.getSymbolicPart()));
+      Opnds.push_back(O);
+    } else
+      ConstOpnd ^= cast<ConstantInt>(V)->getValue();
+  }
+
+  // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
+  //  It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
+  //  the "OpndPtrs" as well. For the similar reason, do not fuse this loop
+  //  with the previous loop --- the iterator of the "Opnds" may be invalidated
+  //  when new elements are added to the vector.
+  for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
+    OpndPtrs.push_back(&Opnds[i]);
+
+  // Step 2: Sort the Xor-Operands in a way such that the operands containing
+  //  the same symbolic value cluster together. For instance, the input operand
+  //  sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
+  //  ("x | 123", "x & 789", "y & 456").
+  std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
+
+  // Step 3: Combine adjacent operands
+  XorOpnd *PrevOpnd = 0;
+  bool Changed = false;
+  for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
+    XorOpnd *CurrOpnd = OpndPtrs[i];
+    // The combined value
+    Value *CV;
+
+    // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
+    if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+      Changed = true;
+      if (CV)
+        *CurrOpnd = XorOpnd(CV);
+      else {
+        CurrOpnd->Invalidate();
+        continue;
+      }
+    }
+
+    if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
+      PrevOpnd = CurrOpnd;
+      continue;
+    }
+
+    // step 3.2: When previous and current operands share the same symbolic
+    //  value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" 
+    //    
+    if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
+      // Remove previous operand
+      PrevOpnd->Invalidate();
+      if (CV) {
+        *CurrOpnd = XorOpnd(CV);
+        PrevOpnd = CurrOpnd;
+      } else {
+        CurrOpnd->Invalidate();
+        PrevOpnd = 0;
+      }
+      Changed = true;
+    }
+  }
+
+  // Step 4: Reassemble the Ops
+  if (Changed) {
+    Ops.clear();
+    for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
+      XorOpnd &O = Opnds[i];
+      if (O.isInvalid())
+        continue;
+      ValueEntry VE(getRank(O.getValue()), O.getValue());
+      Ops.push_back(VE);
+    }
+    if (ConstOpnd != 0) {
+      Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+      ValueEntry VE(getRank(C), C);
+      Ops.push_back(VE);
+    }
+    int Sz = Ops.size();
+    if (Sz == 1)
+      return Ops.back().Op;
+    else if (Sz == 0) {
+      assert(ConstOpnd == 0);
+      return ConstantInt::get(Ty->getContext(), ConstOpnd);
+    }
+  }
+
+  return 0;
+}
+
 /// OptimizeAdd - Optimize a series of operands to an 'add' instruction.  This
 /// optimizes based on identities.  If it can be reduced to a single Value, it
 /// is returned, otherwise the Ops list is mutated as necessary.
@@ -1431,11 +1758,15 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
   default: break;
   case Instruction::And:
   case Instruction::Or:
-  case Instruction::Xor:
     if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
       return Result;
     break;
 
+  case Instruction::Xor:
+    if (Value *Result = OptimizeXor(I, Ops))
+      return Result;
+    break;
+
   case Instruction::Add:
     if (Value *Result = OptimizeAdd(I, Ops))
       return Result;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 0e57e5cc12ff..1e18873950d3 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -57,17 +57,58 @@
 using namespace llvm;
 
 STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
-STATISTIC(NumNewAllocas,      "Number of new, smaller allocas introduced");
-STATISTIC(NumPromoted,        "Number of allocas promoted to SSA values");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
 STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
-STATISTIC(NumDeleted,         "Number of instructions deleted");
-STATISTIC(NumVectorized,      "Number of vectorized aggregates");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
 
 /// Hidden option to force the pass to not use DomTree and mem2reg, instead
 /// forming SSA values through the SSAUpdater infrastructure.
 static cl::opt<bool>
 ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
 
+namespace {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+    public IRBuilderDefaultInserter<preserveNames> {
+  std::string Prefix;
+
+public:
+  void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
+
+protected:
+  void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+                    BasicBlock::iterator InsertPt) const {
+    IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+        I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+  }
+};
+
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+    public IRBuilderDefaultInserter<false> {
+public:
+  void SetNamePrefix(const Twine &P) {}
+};
+
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+                        IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+                        IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
+
 namespace {
 /// \brief A common base class for representing a half-open byte range.
 struct ByteRange {
@@ -888,7 +929,7 @@ class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> {
     uint64_t Size = Length ? Length->getLimitedValue()
                            : AllocSize - Offset.getLimitedValue();
 
-    MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+    const MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
     if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
         Offsets.DestBegin == Offsets.SourceBegin)
       return markAsDead(II); // Skip identity transfers without side-effects.
@@ -1097,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
     splitAndMergePartitions();
   }
 
+  // Record how many partitions we end up with.
+  NumAllocaPartitions += Partitions.size();
+  MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
+
   // Now build up the user lists for each of these disjoint partitions by
   // re-walking the recursive users of the alloca.
   Uses.resize(Partitions.size());
@@ -1104,6 +1149,14 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
   PtrI = UB.visitPtr(AI);
   assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
   assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
+
+  unsigned NumUses = 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+  for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
+    NumUses += Uses[Idx].size();
+#endif
+  NumAllocaPartitionUses += NumUses;
+  MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
 }
 
 Type *AllocaPartitioning::getCommonType(iterator I) const {
@@ -1265,12 +1318,12 @@ class AllocaPromoter : public LoadAndStorePromoter {
         // may be zapped by an optimization pass in future.
         if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
           Arg = dyn_cast<Argument>(ZExt->getOperand(0));
-        if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+        else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
           Arg = dyn_cast<Argument>(SExt->getOperand(0));
         if (!Arg)
-          Arg = SI->getOperand(0);
+          Arg = SI->getValueOperand();
       } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        Arg = LI->getOperand(0);
+        Arg = LI->getPointerOperand();
       } else {
         continue;
       }
@@ -1494,7 +1547,7 @@ class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {
     assert(!Loads.empty());
 
     Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
-    IRBuilder<> PHIBuilder(&PN);
+    IRBuilderTy PHIBuilder(&PN);
     PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
                                           PN.getName() + ".sroa.speculated");
 
@@ -1517,7 +1570,7 @@ class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {
       TerminatorInst *TI = Pred->getTerminator();
       Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
       Value *InVal = PN.getIncomingValue(Idx);
-      IRBuilder<> PredBuilder(TI);
+      IRBuilderTy PredBuilder(TI);
 
       LoadInst *Load
         = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
@@ -1598,7 +1651,7 @@ class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {
     if (!isSafeSelectToSpeculate(SI, Loads))
       return;
 
-    IRBuilder<> IRB(&SI);
+    IRBuilderTy IRB(&SI);
     Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
     AllocaPartitioning::iterator PIs[2];
     PartitionUse PUs[2];
@@ -1662,9 +1715,8 @@ class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {
 ///
 /// This will return the BasePtr if that is valid, or build a new GEP
 /// instruction using the IRBuilder if GEP-ing is needed.
-static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
-                       SmallVectorImpl<Value *> &Indices,
-                       const Twine &Prefix) {
+static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
+                       SmallVectorImpl<Value *> &Indices) {
   if (Indices.empty())
     return BasePtr;
 
@@ -1673,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
   if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
     return BasePtr;
 
-  return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
+  return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
 }
 
 /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1685,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
 /// TargetTy. If we can't find one with the same type, we at least try to use
 /// one with the same size. If none of that works, we just produce the GEP as
 /// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
                                     Value *BasePtr, Type *Ty, Type *TargetTy,
-                                    SmallVectorImpl<Value *> &Indices,
-                                    const Twine &Prefix) {
+                                    SmallVectorImpl<Value *> &Indices) {
   if (Ty == TargetTy)
-    return buildGEP(IRB, BasePtr, Indices, Prefix);
+    return buildGEP(IRB, BasePtr, Indices);
 
   // See if we can descend into a struct and locate a field with the correct
   // type.
@@ -1717,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
   if (ElementTy != TargetTy)
     Indices.erase(Indices.end() - NumLayers, Indices.end());
 
-  return buildGEP(IRB, BasePtr, Indices, Prefix);
+  return buildGEP(IRB, BasePtr, Indices);
 }
 
 /// \brief Recursively compute indices for a natural GEP.
 ///
 /// This is the recursive step for getNaturalGEPWithOffset that walks down the
 /// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
                                        Value *Ptr, Type *Ty, APInt &Offset,
                                        Type *TargetTy,
-                                       SmallVectorImpl<Value *> &Indices,
-                                       const Twine &Prefix) {
+                                       SmallVectorImpl<Value *> &Indices) {
   if (Offset == 0)
-    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);
+    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
 
   // We can't recurse through pointer types.
   if (Ty->isPointerTy())
@@ -1750,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
     Offset -= NumSkippedElements * ElementSize;
     Indices.push_back(IRB.getInt(NumSkippedElements));
     return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
-                                    Offset, TargetTy, Indices, Prefix);
+                                    Offset, TargetTy, Indices);
   }
 
   if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1763,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
     Offset -= NumSkippedElements * ElementSize;
     Indices.push_back(IRB.getInt(NumSkippedElements));
     return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                    Indices, Prefix);
+                                    Indices);
   }
 
   StructType *STy = dyn_cast<StructType>(Ty);
@@ -1782,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
 
   Indices.push_back(IRB.getInt32(Index));
   return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices, Prefix);
+                                  Indices);
 }
 
 /// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1795,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
 /// Indices, and setting Ty to the result subtype.
 ///
 /// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
                                       Value *Ptr, APInt Offset, Type *TargetTy,
-                                      SmallVectorImpl<Value *> &Indices,
-                                      const Twine &Prefix) {
+                                      SmallVectorImpl<Value *> &Indices) {
   PointerType *Ty = cast<PointerType>(Ptr->getType());
 
   // Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1817,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
   Offset -= NumSkippedElements * ElementSize;
   Indices.push_back(IRB.getInt(NumSkippedElements));
   return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices, Prefix);
+                                  Indices);
 }
 
 /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1835,9 +1884,8 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
 /// properties. The algorithm tries to fold as many constant indices into
 /// a single GEP as possible, thus making each GEP more independent of the
 /// surrounding code.
-static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
-                             Value *Ptr, APInt Offset, Type *PointerTy,
-                             const Twine &Prefix) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+                             Value *Ptr, APInt Offset, Type *PointerTy) {
   // Even though we don't look through PHI nodes, we could be called on an
   // instruction in an unreachable block, which may be on a cycle.
   SmallPtrSet<Value *, 4> Visited;
@@ -1871,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
     // See if we can perform a natural GEP here.
     Indices.clear();
     if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
-                                           Indices, Prefix)) {
+                                           Indices)) {
       if (P->getType() == PointerTy) {
         // Zap any offset pointer that we ended up computing in previous rounds.
         if (OffsetPtr && OffsetPtr->use_empty())
@@ -1906,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
   if (!OffsetPtr) {
     if (!Int8Ptr) {
       Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
-                                  Prefix + ".raw_cast");
+                                  "raw_cast");
       Int8PtrOffset = Offset;
     }
 
     OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
       IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
-                            Prefix + ".raw_idx");
+                            "raw_idx");
   }
   Ptr = OffsetPtr;
 
   // On the off chance we were targeting i8*, guard the bitcast here.
   if (Ptr->getType() != PointerTy)
-    Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
+    Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
 
   return Ptr;
 }
@@ -1958,7 +2006,7 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
 /// This will try various different casting techniques, such as bitcasts,
 /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
 /// two types for viability with this routine.
-static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
                            Type *Ty) {
   assert(canConvertValue(DL, V->getType(), Ty) &&
          "Value not convertable to type");
@@ -2156,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
   return WholeAllocaOp;
 }
 
-static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
                              IntegerType *Ty, uint64_t Offset,
                              const Twine &Name) {
   DEBUG(dbgs() << "       start: " << *V << "\n");
@@ -2179,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
   return V;
 }
 
-static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
                             Value *V, uint64_t Offset, const Twine &Name) {
   IntegerType *IntTy = cast<IntegerType>(Old->getType());
   IntegerType *Ty = cast<IntegerType>(V->getType());
@@ -2210,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
   return V;
 }
 
-static Value *extractVector(IRBuilder<> &IRB, Value *V,
+static Value *extractVector(IRBuilderTy &IRB, Value *V,
                             unsigned BeginIndex, unsigned EndIndex,
                             const Twine &Name) {
   VectorType *VecTy = cast<VectorType>(V->getType());
@@ -2238,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V,
   return V;
 }
 
-static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
+static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
                            unsigned BeginIndex, const Twine &Name) {
   VectorType *VecTy = cast<VectorType>(Old->getType());
   assert(VecTy && "Can only insert a vector into a vector");
@@ -2332,8 +2380,9 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
   Use *OldUse;
   Instruction *OldPtr;
 
-  // The name prefix to use when rewriting instructions for this alloca.
-  std::string NamePrefix;
+  // Utility IR builder, whose name prefix is setup for each visited use, and
+  // the insertion point is set to point to the user.
+  IRBuilderTy IRB;
 
 public:
   AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
@@ -2346,7 +2395,8 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
       NewAllocaEndOffset(NewEndOffset),
       NewAllocaTy(NewAI.getAllocatedType()),
       VecTy(), ElementTy(), ElementSize(), IntTy(),
-      BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr() {
+      BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
+      IRB(NewAI.getContext(), ConstantFolder()) {
   }
 
   /// \brief Visit the users of the alloca partition and rewrite them.
@@ -2375,7 +2425,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
       IsSplit = I->isSplit();
       OldUse = I->getUse();
       OldPtr = cast<Instruction>(OldUse->get());
-      NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
+
+      Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+      IRB.SetInsertPoint(OldUserI);
+      IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+      IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+                        ".");
+
       CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
     }
     if (VecTy) {
@@ -2398,14 +2454,10 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
     llvm_unreachable("No rewrite rule for this instruction!");
   }
 
-  Twine getName(const Twine &Suffix) {
-    return NamePrefix + Suffix;
-  }
-
-  Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
+  Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
     assert(BeginOffset >= NewAllocaBeginOffset);
     APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
-    return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
+    return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
   }
 
   /// \brief Compute suitable alignment to access an offset into the new alloca.
@@ -2455,27 +2507,27 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
       Pass.DeadInsts.insert(I);
   }
 
-  Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) {
+  Value *rewriteVectorizedLoadInst() {
     unsigned BeginIndex = getIndex(BeginOffset);
     unsigned EndIndex = getIndex(EndOffset);
     assert(EndIndex > BeginIndex && "Empty vector!");
 
     Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                     getName(".load"));
-    return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec"));
+                                     "load");
+    return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
   }
 
-  Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+  Value *rewriteIntegerLoad(LoadInst &LI) {
     assert(IntTy && "We cannot insert an integer to the alloca");
     assert(!LI.isVolatile());
     Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                     getName(".load"));
+                                     "load");
     V = convertValue(TD, IRB, V, IntTy);
     assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
     if (Offset > 0 || EndOffset < NewAllocaEndOffset)
       V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
-                         getName(".extract"));
+                         "extract");
     return V;
   }
 
@@ -2486,24 +2538,23 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
 
     uint64_t Size = EndOffset - BeginOffset;
 
-    IRBuilder<> IRB(&LI);
     Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
                              : LI.getType();
     bool IsPtrAdjusted = false;
     Value *V;
     if (VecTy) {
-      V = rewriteVectorizedLoadInst(IRB);
+      V = rewriteVectorizedLoadInst();
     } else if (IntTy && LI.getType()->isIntegerTy()) {
-      V = rewriteIntegerLoad(IRB, LI);
+      V = rewriteIntegerLoad(LI);
     } else if (BeginOffset == NewAllocaBeginOffset &&
                canConvertValue(TD, NewAllocaTy, LI.getType())) {
       V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                LI.isVolatile(), getName(".load"));
+                                LI.isVolatile(), "load");
     } else {
       Type *LTy = TargetTy->getPointerTo();
       V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
                                 getPartitionTypeAlign(TargetTy),
-                                LI.isVolatile(), getName(".load"));
+                                LI.isVolatile(), "load");
       IsPtrAdjusted = true;
     }
     V = convertValue(TD, IRB, V, TargetTy);
@@ -2526,7 +2577,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
       Value *Placeholder
         = new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
       V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
-                        getName(".insert"));
+                        "insert");
       LI.replaceAllUsesWith(V);
       Placeholder->replaceAllUsesWith(&LI);
       delete Placeholder;
@@ -2540,7 +2591,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
     return !LI.isVolatile() && !IsPtrAdjusted;
   }
 
-  bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+  bool rewriteVectorizedStoreInst(Value *V,
                                   StoreInst &SI, Value *OldOp) {
     unsigned BeginIndex = getIndex(BeginOffset);
     unsigned EndIndex = getIndex(EndOffset);
@@ -2555,8 +2606,8 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
 
     // Mix in the existing elements.
     Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                       getName(".load"));
-    V = insertVector(IRB, Old, V, BeginIndex, getName(".vec"));
+                                       "load");
+    V = insertVector(IRB, Old, V, BeginIndex, "vec");
 
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
     Pass.DeadInsts.insert(&SI);
@@ -2566,17 +2617,17 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
     return true;
   }
 
-  bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
+  bool rewriteIntegerStore(Value *V, StoreInst &SI) {
     assert(IntTy && "We cannot extract an integer from the alloca");
     assert(!SI.isVolatile());
     if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
+                                         "oldload");
       Old = convertValue(TD, IRB, Old, IntTy);
       assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
       uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
       V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
-                        getName(".insert"));
+                        "insert");
     }
     V = convertValue(TD, IRB, V, NewAllocaTy);
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2590,7 +2641,6 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
     DEBUG(dbgs() << "    original: " << SI << "\n");
     Value *OldOp = SI.getOperand(1);
     assert(OldOp == OldPtr);
-    IRBuilder<> IRB(&SI);
 
     Value *V = SI.getValueOperand();
 
@@ -2611,16 +2661,17 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
              "Non-byte-multiple bit width");
       IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
       V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
-                         getName(".extract"));
+                         "extract");
     }
 
     if (VecTy)
-      return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+      return rewriteVectorizedStoreInst(V, SI, OldOp);
     if (IntTy && V->getType()->isIntegerTy())
-      return rewriteIntegerStore(IRB, V, SI);
+      return rewriteIntegerStore(V, SI);
 
     StoreInst *NewSI;
     if (BeginOffset == NewAllocaBeginOffset &&
+        EndOffset == NewAllocaEndOffset &&
         canConvertValue(TD, V->getType(), NewAllocaTy)) {
       V = convertValue(TD, IRB, V, NewAllocaTy);
       NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -2648,7 +2699,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
   ///
   /// \param V The i8 value to splat.
   /// \param Size The number of bytes in the output (assuming i8 is one byte)
-  Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) {
+  Value *getIntegerSplat(Value *V, unsigned Size) {
     assert(Size > 0 && "Expected a positive number of bytes.");
     IntegerType *VTy = cast<IntegerType>(V->getType());
     assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
@@ -2656,26 +2707,25 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
       return V;
 
     Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
-    V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+    V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
                       ConstantExpr::getUDiv(
                         Constant::getAllOnesValue(SplatIntTy),
                         ConstantExpr::getZExt(
                           Constant::getAllOnesValue(V->getType()),
                           SplatIntTy)),
-                      getName(".isplat"));
+                      "isplat");
     return V;
   }
 
   /// \brief Compute a vector splat for a given element value.
-  Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) {
-    V = IRB.CreateVectorSplat(NumElements, V, NamePrefix);
+  Value *getVectorSplat(Value *V, unsigned NumElements) {
+    V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
     DEBUG(dbgs() << "       splat: " << *V << "\n");
     return V;
   }
 
   bool visitMemSetInst(MemSetInst &II) {
     DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
     assert(II.getRawDest() == OldPtr);
 
     // If the memset has a variable size, it cannot be split, just adjust the
@@ -2732,31 +2782,31 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
       unsigned NumElements = EndIndex - BeginIndex;
       assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
 
-      Value *Splat = getIntegerSplat(IRB, II.getValue(),
-                                     TD.getTypeSizeInBits(ElementTy)/8);
+      Value *Splat =
+          getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
       Splat = convertValue(TD, IRB, Splat, ElementTy);
       if (NumElements > 1)
-        Splat = getVectorSplat(IRB, Splat, NumElements);
+        Splat = getVectorSplat(Splat, NumElements);
 
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
-      V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec"));
+                                         "oldload");
+      V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
     } else if (IntTy) {
       // If this is a memset on an alloca where we can widen stores, insert the
       // set integer.
       assert(!II.isVolatile());
 
       uint64_t Size = EndOffset - BeginOffset;
-      V = getIntegerSplat(IRB, II.getValue(), Size);
+      V = getIntegerSplat(II.getValue(), Size);
 
       if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
                     EndOffset != NewAllocaBeginOffset)) {
         Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                           getName(".oldload"));
+                                           "oldload");
         Old = convertValue(TD, IRB, Old, IntTy);
         assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
         uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-        V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+        V = insertInteger(TD, IRB, Old, V, Offset, "insert");
       } else {
         assert(V->getType() == IntTy &&
                "Wrong type for an alloca wide integer!");
@@ -2767,10 +2817,9 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
       assert(BeginOffset == NewAllocaBeginOffset);
       assert(EndOffset == NewAllocaEndOffset);
 
-      V = getIntegerSplat(IRB, II.getValue(),
-                          TD.getTypeSizeInBits(ScalarTy)/8);
+      V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
       if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
-        V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements());
+        V = getVectorSplat(V, AllocaVecTy->getNumElements());
 
       V = convertValue(TD, IRB, V, AllocaTy);
     }
@@ -2787,7 +2836,6 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
     // them into two categories: split intrinsics and unsplit intrinsics.
 
     DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
 
     assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
     bool IsDest = II.getRawDest() == OldPtr;
@@ -2871,8 +2919,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
 
       // Compute the other pointer, folding as much as possible to produce
       // a single, simple GEP in most cases.
-      OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
-                                getName("." + OtherPtr->getName()));
+      OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
 
       Value *OurPtr
         = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
@@ -2915,8 +2962,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
       OtherPtrTy = SubIntTy->getPointerTo();
     }
 
-    Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
-                                   getName("." + OtherPtr->getName()));
+    Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
     Value *DstPtr = &NewAI;
     if (!IsDest)
       std::swap(SrcPtr, DstPtr);
@@ -2924,31 +2970,31 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
     Value *Src;
     if (VecTy && !IsWholeAlloca && !IsDest) {
       Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                  getName(".load"));
-      Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
+                                  "load");
+      Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
     } else if (IntTy && !IsWholeAlloca && !IsDest) {
       Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                  getName(".load"));
+                                  "load");
       Src = convertValue(TD, IRB, Src, IntTy);
       assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
       uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-      Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
+      Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
     } else {
       Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
-                                  getName(".copyload"));
+                                  "copyload");
     }
 
     if (VecTy && !IsWholeAlloca && IsDest) {
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
-      Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
+                                         "oldload");
+      Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
     } else if (IntTy && !IsWholeAlloca && IsDest) {
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
+                                         "oldload");
       Old = convertValue(TD, IRB, Old, IntTy);
       assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
       uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-      Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+      Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
       Src = convertValue(TD, IRB, Src, NewAllocaTy);
     }
 
@@ -2963,7 +3009,6 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
     assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
            II.getIntrinsicID() == Intrinsic::lifetime_end);
     DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
     assert(II.getArgOperand(1) == OldPtr);
 
     // Record this instruction for deletion.
@@ -2991,7 +3036,9 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
     // as local as possible to the PHI. To do that, we re-use the location of
     // the old pointer, which necessarily must be in the right position to
     // dominate the PHI.
-    IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
+    IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+    PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+                             ".");
 
     Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
     // Replace the operands which were using the old pointer.
@@ -3004,7 +3051,6 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
 
   bool visitSelectInst(SelectInst &SI) {
     DEBUG(dbgs() << "    original: " << SI << "\n");
-    IRBuilder<> IRB(&SI);
 
     // Find the operand we need to rewrite here.
     bool IsTrueVal = SI.getTrueValue() == OldPtr;
@@ -3079,7 +3125,7 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
   class OpSplitter {
   protected:
     /// The builder used to form new instructions.
-    IRBuilder<> IRB;
+    IRBuilderTy IRB;
     /// The indices which to be used with insert- or extractvalue to select the
     /// appropriate value within the aggregate.
     SmallVector<unsigned, 4> Indices;
@@ -3291,12 +3337,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
     Type *ElementTy = SeqTy->getElementType();
     uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
     uint64_t NumSkippedElements = Offset / ElementSize;
-    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))
+    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
       if (NumSkippedElements >= ArrTy->getNumElements())
         return 0;
-    if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))
+    } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
       if (NumSkippedElements >= VecTy->getNumElements())
         return 0;
+    }
     Offset -= NumSkippedElements * ElementSize;
 
     // First check if we need to recurse.
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e590a374eac2..bfde334c36c3 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
 }
 
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the alloca instructions in the function, removing them
-// if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the entry block, removing
+// them if they are only used by getelementptr instructions.
 //
 bool SROA::performScalarRepl(Function &F) {
   std::vector<AllocaInst*> WorkList;
@@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
       continue;
 
     ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
-    if (!IdxVal) {
-      // Non constant GEPs are only a problem on arrays, structs, and pointers
-      // Vectors can be dynamically indexed.
-      // FIXME: Add support for dynamic indexing on arrays.  This should be
-      // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
-      // isn't.
-      if (!(*GEPIt)->isVectorTy())
-        return MarkUnsafe(Info, GEPI);
-      NonConstant = true;
-      NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
-    }
+    if (!IdxVal)
+      return MarkUnsafe(Info, GEPI);
   }
 
   // Compute the offset due to this GEP and check if the alloca has a
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 916b37d4a8ba..3514e6c2aadc 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
@@ -35,7 +34,6 @@
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 using namespace llvm;
 
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
 
 //===----------------------------------------------------------------------===//
 // Optimizer Base Class
@@ -91,8 +89,6 @@ namespace {
     TargetLibraryInfo *TLI;
 
     StringMap<LibCallOptimization*> Optimizations;
-
-    bool Modified;  // This is only used by doInitialization.
   public:
     static char ID; // Pass identification
     SimplifyLibCalls() : FunctionPass(ID) {
@@ -104,14 +100,6 @@ namespace {
     void InitOptimizations();
     bool runOnFunction(Function &F);
 
-    void setDoesNotAccessMemory(Function &F);
-    void setOnlyReadsMemory(Function &F);
-    void setDoesNotThrow(Function &F);
-    void setDoesNotCapture(Function &F, unsigned n);
-    void setDoesNotAlias(Function &F, unsigned n);
-    bool doInitialization(Module &M);
-
-    void inferPrototypeAttributes(Function &F);
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<TargetLibraryInfo>();
     }
@@ -208,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
   return Changed;
 }
 
-// Utility methods for doInitialization.
-
-void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
-  if (!F.doesNotAccessMemory()) {
-    F.setDoesNotAccessMemory();
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
-  if (!F.onlyReadsMemory()) {
-    F.setOnlyReadsMemory();
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-void SimplifyLibCalls::setDoesNotThrow(Function &F) {
-  if (!F.doesNotThrow()) {
-    F.setDoesNotThrow();
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
-  if (!F.doesNotCapture(n)) {
-    F.setDoesNotCapture(n);
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
-  if (!F.doesNotAlias(n)) {
-    F.setDoesNotAlias(n);
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-
-
-void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
-  FunctionType *FTy = F.getFunctionType();
-
-  StringRef Name = F.getName();
-  switch (Name[0]) {
-  case 's':
-    if (Name == "strlen") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "strchr" ||
-               Name == "strrchr") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isIntegerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-    } else if (Name == "strcpy" ||
-               Name == "stpcpy" ||
-               Name == "strcat" ||
-               Name == "strtol" ||
-               Name == "strtod" ||
-               Name == "strtof" ||
-               Name == "strtoul" ||
-               Name == "strtoll" ||
-               Name == "strtold" ||
-               Name == "strncat" ||
-               Name == "strncpy" ||
-               Name == "stpncpy" ||
-               Name == "strtoull") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "strxfrm") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "strcmp" ||
-               Name == "strspn" ||
-               Name == "strncmp" ||
-               Name == "strcspn" ||
-               Name == "strcoll" ||
-               Name == "strcasecmp" ||
-               Name == "strncasecmp") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "strstr" ||
-               Name == "strpbrk") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "strtok" ||
-               Name == "strtok_r") {
-      if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "scanf" ||
-               Name == "setbuf" ||
-               Name == "setvbuf") {
-      if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "strdup" ||
-               Name == "strndup") {
-      if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "stat" ||
-               Name == "sscanf" ||
-               Name == "sprintf" ||
-               Name == "statvfs") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "snprintf") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 3);
-    } else if (Name == "setitimer") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(1)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-      setDoesNotCapture(F, 3);
-    } else if (Name == "system") {
-      if (FTy->getNumParams() != 1 ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      // May throw; "system" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'm':
-    if (Name == "malloc") {
-      if (FTy->getNumParams() != 1 ||
-          !FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "memcmp") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "memchr" ||
-               Name == "memrchr") {
-      if (FTy->getNumParams() != 3)
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-    } else if (Name == "modf" ||
-               Name == "modff" ||
-               Name == "modfl" ||
-               Name == "memcpy" ||
-               Name == "memccpy" ||
-               Name == "memmove") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "memalign") {
-      if (!FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotAlias(F, 0);
-    } else if (Name == "mkdir" ||
-               Name == "mktime") {
-      if (FTy->getNumParams() == 0 ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'r':
-    if (Name == "realloc") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "read") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      // May throw; "read" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 2);
-    } else if (Name == "rmdir" ||
-               Name == "rewind" ||
-               Name == "remove" ||
-               Name == "realpath") {
-      if (FTy->getNumParams() < 1 ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "rename" ||
-               Name == "readlink") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 'w':
-    if (Name == "write") {
-      if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      // May throw; "write" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 'b':
-    if (Name == "bcopy") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "bcmp") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setOnlyReadsMemory(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "bzero") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'c':
-    if (Name == "calloc") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "chmod" ||
-               Name == "chown" ||
-               Name == "ctermid" ||
-               Name == "clearerr" ||
-               Name == "closedir") {
-      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'a':
-    if (Name == "atoi" ||
-        Name == "atol" ||
-        Name == "atof" ||
-        Name == "atoll") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setOnlyReadsMemory(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "access") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'f':
-    if (Name == "fopen") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "fdopen") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "feof" ||
-               Name == "free" ||
-               Name == "fseek" ||
-               Name == "ftell" ||
-               Name == "fgetc" ||
-               Name == "fseeko" ||
-               Name == "ftello" ||
-               Name == "fileno" ||
-               Name == "fflush" ||
-               Name == "fclose" ||
-               Name == "fsetpos" ||
-               Name == "flockfile" ||
-               Name == "funlockfile" ||
-               Name == "ftrylockfile") {
-      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "ferror") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setOnlyReadsMemory(F);
-    } else if (Name == "fputc" ||
-               Name == "fstat" ||
-               Name == "frexp" ||
-               Name == "frexpf" ||
-               Name == "frexpl" ||
-               Name == "fstatvfs") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "fgets") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 3);
-    } else if (Name == "fread" ||
-               Name == "fwrite") {
-      if (FTy->getNumParams() != 4 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(3)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 4);
-    } else if (Name == "fputs" ||
-               Name == "fscanf" ||
-               Name == "fprintf" ||
-               Name == "fgetpos") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 'g':
-    if (Name == "getc" ||
-        Name == "getlogin_r" ||
-        Name == "getc_unlocked") {
-      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "getenv") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setOnlyReadsMemory(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "gets" ||
-               Name == "getchar") {
-      setDoesNotThrow(F);
-    } else if (Name == "getitimer") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "getpwnam") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'u':
-    if (Name == "ungetc") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "uname" ||
-               Name == "unlink" ||
-               Name == "unsetenv") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "utime" ||
-               Name == "utimes") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 'p':
-    if (Name == "putc") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "puts" ||
-               Name == "printf" ||
-               Name == "perror") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "pread" ||
-               Name == "pwrite") {
-      if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      // May throw; these are valid pthread cancellation points.
-      setDoesNotCapture(F, 2);
-    } else if (Name == "putchar") {
-      setDoesNotThrow(F);
-    } else if (Name == "popen") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "pclose") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'v':
-    if (Name == "vscanf") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "vsscanf" ||
-               Name == "vfscanf") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(1)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "valloc") {
-      if (!FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "vprintf") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "vfprintf" ||
-               Name == "vsprintf") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "vsnprintf") {
-      if (FTy->getNumParams() != 4 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 3);
-    }
-    break;
-  case 'o':
-    if (Name == "open") {
-      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      // May throw; "open" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 1);
-    } else if (Name == "opendir") {
-      if (FTy->getNumParams() != 1 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 't':
-    if (Name == "tmpfile") {
-      if (!FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "times") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'h':
-    if (Name == "htonl" ||
-        Name == "htons") {
-      setDoesNotThrow(F);
-      setDoesNotAccessMemory(F);
-    }
-    break;
-  case 'n':
-    if (Name == "ntohl" ||
-        Name == "ntohs") {
-      setDoesNotThrow(F);
-      setDoesNotAccessMemory(F);
-    }
-    break;
-  case 'l':
-    if (Name == "lstat") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "lchown") {
-      if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'q':
-    if (Name == "qsort") {
-      if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
-        return;
-      // May throw; places call through function pointer.
-      setDoesNotCapture(F, 4);
-    }
-    break;
-  case '_':
-    if (Name == "__strdup" ||
-        Name == "__strndup") {
-      if (FTy->getNumParams() < 1 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "__strtok_r") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "_IO_getc") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "_IO_putc") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 1:
-    if (Name == "\1__isoc99_scanf") {
-      if (FTy->getNumParams() < 1 ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "\1stat64" ||
-               Name == "\1lstat64" ||
-               Name == "\1statvfs64" ||
-               Name == "\1__isoc99_sscanf") {
-      if (FTy->getNumParams() < 1 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "\1fopen64") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "\1fseeko64" ||
-               Name == "\1ftello64") {
-      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "\1tmpfile64") {
-      if (!FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "\1fstat64" ||
-               Name == "\1fstatvfs64") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "\1open64") {
-      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      // May throw; "open" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  }
-}
-
-/// doInitialization - Add attributes to well-known functions.
-///
-bool SimplifyLibCalls::doInitialization(Module &M) {
-  Modified = false;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    Function &F = *I;
-    if (F.isDeclaration() && F.hasName())
-      inferPrototypeAttributes(F);
-  }
-  return Modified;
-}
-
 // TODO:
 //   Additional cases that we need to add to this file:
 //
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 63d7a1d52aa5..be8d39e128a5 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -87,29 +87,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     assert(VMap.count(I) && "No mapping from source argument specified!");
 #endif
 
-  // Clone any attributes.
-  if (NewFunc->arg_size() == OldFunc->arg_size())
-    NewFunc->copyAttributesFrom(OldFunc);
-  else {
-    //Some arguments were deleted with the VMap. Copy arguments one by one
-    for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
-           E = OldFunc->arg_end(); I != E; ++I)
-      if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
-        AttributeSet attrs = OldFunc->getAttributes()
-          .getParamAttributes(I->getArgNo() + 1);
-        if (attrs.getNumSlots() > 0)
-          Anew->addAttr(attrs);
-      }
-    NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttributes(NewFunc->getContext(),
-                                          AttributeSet::ReturnIndex,
-                                          OldFunc->getAttributes()));
-    NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttributes(NewFunc->getContext(),
-                                          AttributeSet::FunctionIndex,
-                                          OldFunc->getAttributes()));
+  AttributeSet OldAttrs = OldFunc->getAttributes();
+  // Clone any argument attributes that are present in the VMap.
+  for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+                                    E = OldFunc->arg_end();
+       I != E; ++I)
+    if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+      AttributeSet attrs =
+          OldAttrs.getParamAttributes(I->getArgNo() + 1);
+      if (attrs.getNumSlots() > 0)
+        Anew->addAttr(attrs);
+    }
 
-  }
+  NewFunc->setAttributes(NewFunc->getAttributes()
+                         .addAttributes(NewFunc->getContext(),
+                                        AttributeSet::ReturnIndex,
+                                        OldAttrs.getRetAttributes()));
+  NewFunc->setAttributes(NewFunc->getAttributes()
+                         .addAttributes(NewFunc->getContext(),
+                                        AttributeSet::FunctionIndex,
+                                        OldAttrs.getFnAttributes()));
 
   // Loop over all of the basic blocks in the function, cloning them as
   // appropriate.  Note that we save BE this way in order to handle cloning of
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0d2598a221ab..e9828d60cd55 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -82,7 +82,8 @@ namespace {
     /// a simple branch. When there is more than one predecessor, we need to
     /// split the landing pad block after the landingpad instruction and jump
     /// to there.
-    void forwardResume(ResumeInst *RI);
+    void forwardResume(ResumeInst *RI,
+                       SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
 
     /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
     /// destination block for the given basic block, using the values for the
@@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
 /// block. When the landing pad block has only one predecessor, this is a simple
 /// branch. When there is more than one predecessor, we need to split the
 /// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+                               SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
   BasicBlock *Dest = getInnerResumeDest();
+  LandingPadInst *OuterLPad = getLandingPadInst();
   BasicBlock *Src = RI->getParent();
 
   BranchInst::Create(Dest, Src);
@@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
 
   InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
   RI->eraseFromParent();
+
+  // Append the clauses from the outer landing pad instruction into the inlined
+  // landing pad instructions.
+  for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+         E = InlinedLPads.end(); I != E; ++I) {
+    LandingPadInst *InlinedLPad = *I;
+    for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+         OuterIdx != OuterNum; ++OuterIdx)
+      InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+  }
 }
 
 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 
   // The inlined code is currently at the end of the function, scan from the
   // start of the inlined code to its end, checking for stuff we need to
-  // rewrite.  If the code doesn't have calls or unwinds, we know there is
-  // nothing to rewrite.
-  if (!InlinedCodeInfo.ContainsCalls) {
-    // Now that everything is happy, we have one final detail.  The PHI nodes in
-    // the exception destination block still have entries due to the original
-    // invoke instruction.  Eliminate these entries (which might even delete the
-    // PHI node) now.
-    InvokeDest->removePredecessor(II->getParent());
-    return;
-  }
-
+  // rewrite.
   InvokeInliningInfo Invoke(II);
-  
+
+  // Get all of the inlined landing pad instructions.
+  SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+  for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+      InlinedLPads.insert(II->getLandingPadInst());
+
   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
     if (InlinedCodeInfo.ContainsCalls)
       if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
@@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
         continue;
       }
 
+    // Forward any resumes that are remaining here.
     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
-      Invoke.forwardResume(RI);
+      Invoke.forwardResume(RI, InlinedLPads);
   }
 
   // Now that everything is happy, we have one final detail.  The PHI nodes in
   // the exception destination block still have entries due to the original
-  // invoke instruction.  Eliminate these entries (which might even delete the
+  // invoke instruction. Eliminate these entries (which might even delete the
   // PHI node) now.
   InvokeDest->removePredecessor(II->getParent());
 }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a54ee08b676f..be80d34d960f 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -985,22 +985,17 @@ bool llvm::removeUnreachableBlocks(Function &F) {
     if (Reachable.count(I))
       continue;
 
-    // Remove the block as predecessor of all its reachable successors.
-    // Unreachable successors don't matter as they'll soon be removed, too.
     for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
       if (Reachable.count(*SI))
         (*SI)->removePredecessor(I);
+    I->dropAllReferences();
+  }
 
-    // Zap all instructions in this basic block.
-    while (!I->empty()) {
-      Instruction &Inst = I->back();
-      if (!Inst.use_empty())
-        Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
-      I->getInstList().pop_back();
-    }
+  for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+    if (!Reachable.count(I))
+      I = F.getBasicBlockList().erase(I);
+    else
+      ++I;
 
-    --I;
-    llvm::next(I)->eraseFromParent();
-  }
   return true;
 }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 681bf9c2b7a4..6de602e4c3e4 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3073,7 +3073,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
   Value *Sub = SI->getCondition();
   if (!Offset->isNullValue())
     Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
-  Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+  Value *Cmp;
+  // If NumCases overflowed, then all possible values jump to the successor.
+  if (NumCases->isNullValue() && SI->getNumCases() != 0)
+    Cmp = ConstantInt::getTrue(SI->getContext());
+  else
+    Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
   BranchInst *NewBI = Builder.CreateCondBr(
       Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
 
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c231704414fc..6bea2ddd2014 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization {
     if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
       return 0;
 
+    // Do not do any of the following transformations if the fprintf return
+    // value is used, in general the fprintf return value is not compatible
+    // with fwrite(), fputc() or fputs().
+    if (!CI->use_empty())
+      return 0;
+
     // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
     if (CI->getNumArgOperands() == 2) {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization {
       // These optimizations require DataLayout.
       if (!TD) return 0;
 
-      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
-                                ConstantInt::get(TD->getIntPtrType(*Context),
-                                                 FormatStr.size()),
-                                CI->getArgOperand(0), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+      return EmitFWrite(CI->getArgOperand(1),
+                        ConstantInt::get(TD->getIntPtrType(*Context),
+                                         FormatStr.size()),
+                        CI->getArgOperand(0), B, TD, TLI);
     }
 
     // The remaining optimizations require the format string to be "%s" or "%c"
@@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization {
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> fputc(chr, F)
       if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
-                               TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+      return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
 
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) --> fputs(str, F)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+      if (!CI->getArgOperand(2)->getType()->isPointerTy())
         return 0;
       return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index e64034ab26b4..7ae082f55e08 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -2,6 +2,8 @@ add_llvm_library(LLVMVectorize
   BBVectorize.cpp
   Vectorize.cpp
   LoopVectorize.cpp
+  SLPVectorizer.cpp
+  VecUtils.cpp
   )
 
 add_dependencies(LLVMVectorize intrinsics_gen)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 930d9c412f2c..0c88ba7835d7 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -78,6 +78,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
@@ -87,6 +88,7 @@
 #include <map>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 static cl::opt<unsigned>
 VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
@@ -343,6 +345,7 @@ class LoopVectorizationLegality {
     RK_IntegerOr,   ///< Bitwise or logical OR of numbers.
     RK_IntegerAnd,  ///< Bitwise or logical AND of numbers.
     RK_IntegerXor,  ///< Bitwise or logical XOR of numbers.
+    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
     RK_FloatAdd,    ///< Sum of floats.
     RK_FloatMult    ///< Product of floats.
   };
@@ -356,13 +359,23 @@ class LoopVectorizationLegality {
     IK_ReversePtrInduction  ///< Reverse ptr indvar. Step = - sizeof(elem).
   };
 
+  // This enum represents the kind of minmax reduction.
+  enum MinMaxReductionKind {
+    MRK_Invalid,
+    MRK_UIntMin,
+    MRK_UIntMax,
+    MRK_SIntMin,
+    MRK_SIntMax
+  };
+
   /// This POD struct holds information about reduction variables.
   struct ReductionDescriptor {
     ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
-      Kind(RK_NoReduction) {}
+      Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
 
-    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
-        : StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
+                        MinMaxReductionKind MK)
+        : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
 
     // The starting value of the reduction.
     // It does not have to be zero!
@@ -371,6 +384,25 @@ class LoopVectorizationLegality {
     Instruction *LoopExitInstr;
     // The kind of the reduction.
     ReductionKind Kind;
+    // If this a min/max reduction the kind of reduction.
+    MinMaxReductionKind MinMaxKind;
+  };
+
+  /// This POD struct holds information about a potential reduction operation.
+  struct ReductionInstDesc {
+    ReductionInstDesc(bool IsRedux, Instruction *I) :
+      IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+
+    ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
+      IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
+
+    // Is this instruction a reduction candidate.
+    bool IsReduction;
+    // The last instruction in a min/max pattern (select of the select(icmp())
+    // pattern), or the current reduction instruction otherwise.
+    Instruction *PatternLastInst;
+    // If this is a min/max pattern the comparison predicate.
+    MinMaxReductionKind MinMaxKind;
   };
 
   // This POD struct holds information about the memory runtime legality
@@ -461,6 +493,11 @@ class LoopVectorizationLegality {
 
   /// Returns the information that we collected about runtime memory check.
   RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+
+  /// This function returns the identity element (or neutral element) for
+  /// the operation K.
+  static Constant *getReductionIdentity(ReductionKind K, Type *Tp,
+                                        MinMaxReductionKind MinMaxK);
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
@@ -487,9 +524,17 @@ class LoopVectorizationLegality {
   /// Returns True, if 'Phi' is the kind of reduction variable for type
   /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
   bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
-  /// Returns true if the instruction I can be a reduction variable of type
-  /// 'Kind'.
-  bool isReductionInstr(Instruction *I, ReductionKind Kind);
+  /// Returns a struct describing if the instruction 'I' can be a reduction
+  /// variable of type 'Kind'. If the reduction is a min/max pattern of
+  /// select(icmp()) this function advances the instruction pointer 'I' from the
+  /// compare instruction to the select instruction and stores this pointer in
+  /// 'PatternLastInst' member of the returned struct.
+  ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
+                                     ReductionInstDesc &Desc);
+  /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+  /// pattern corresponding to a min(X, Y) or max(X, Y).
+  static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
+                                                    ReductionInstDesc &Prev);
   /// Returns the induction kind of Phi. This function may return NoInduction
   /// if the PHI is not an induction variable.
   InductionKind isInductionVariable(PHINode *Phi);
@@ -1436,26 +1481,45 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
 
 /// This function returns the identity element (or neutral element) for
 /// the operation K.
-static Constant*
-getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
+Constant*
+LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp,
+                                                MinMaxReductionKind MinMaxK) {
   switch (K) {
-  case LoopVectorizationLegality:: RK_IntegerXor:
-  case LoopVectorizationLegality:: RK_IntegerAdd:
-  case LoopVectorizationLegality:: RK_IntegerOr:
+  case RK_IntegerXor:
+  case RK_IntegerAdd:
+  case RK_IntegerOr:
     // Adding, Xoring, Oring zero to a number does not change it.
     return ConstantInt::get(Tp, 0);
-  case LoopVectorizationLegality:: RK_IntegerMult:
+  case RK_IntegerMult:
     // Multiplying a number by 1 does not change it.
     return ConstantInt::get(Tp, 1);
-  case LoopVectorizationLegality:: RK_IntegerAnd:
+  case RK_IntegerAnd:
     // AND-ing a number with an all-1 value does not change it.
     return ConstantInt::get(Tp, -1, true);
-  case LoopVectorizationLegality:: RK_FloatMult:
+  case  RK_FloatMult:
     // Multiplying a number by 1 does not change it.
     return ConstantFP::get(Tp, 1.0L);
-  case LoopVectorizationLegality:: RK_FloatAdd:
+  case  RK_FloatAdd:
     // Adding zero to a number does not change it.
     return ConstantFP::get(Tp, 0.0L);
+  case  RK_IntegerMinMax:
+    switch(MinMaxK) {
+    default: llvm_unreachable("Unknown min/max predicate");
+    case MRK_UIntMin:
+      return ConstantInt::getAllOnesValue(Tp);
+    case MRK_UIntMax:
+      return ConstantInt::get(Tp, 0);
+    case MRK_SIntMin: {
+      unsigned BitWidth = Tp->getPrimitiveSizeInBits();
+      return ConstantInt::get(Tp->getContext(),
+                              APInt::getSignedMaxValue(BitWidth));
+    }
+    case LoopVectorizationLegality::MRK_SIntMax: {
+      unsigned BitWidth = Tp->getPrimitiveSizeInBits();
+      return ConstantInt::get(Tp->getContext(),
+                              APInt::getSignedMinValue(BitWidth));
+    }
+    }
   default:
     llvm_unreachable("Unknown reduction kind");
   }
@@ -1566,7 +1630,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
 }
 
 /// This function translates the reduction kind to an LLVM binary operator.
-static Instruction::BinaryOps
+static unsigned
 getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
   switch (Kind) {
     case LoopVectorizationLegality::RK_IntegerAdd:
@@ -1583,11 +1647,38 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
       return Instruction::FMul;
     case LoopVectorizationLegality::RK_FloatAdd:
       return Instruction::FAdd;
+    case LoopVectorizationLegality::RK_IntegerMinMax:
+      return Instruction::ICmp;
     default:
       llvm_unreachable("Unknown reduction operation");
   }
 }
 
+Value *createMinMaxOp(IRBuilder<> &Builder,
+                      LoopVectorizationLegality::MinMaxReductionKind RK,
+                      Value *Left,
+                      Value *Right) {
+  CmpInst::Predicate P = CmpInst::ICMP_NE;
+  switch (RK) {
+  default:
+    llvm_unreachable("Unknown min/max reduction kind");
+  case LoopVectorizationLegality::MRK_UIntMin:
+    P = CmpInst::ICMP_ULT;
+    break;
+  case LoopVectorizationLegality::MRK_UIntMax:
+    P = CmpInst::ICMP_UGT;
+    break;
+  case LoopVectorizationLegality::MRK_SIntMin:
+    P = CmpInst::ICMP_SLT;
+    break;
+  case LoopVectorizationLegality::MRK_SIntMax:
+    P = CmpInst::ICMP_SGT;
+  }
+  Value *Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+  return Select;
+}
+
 void
 InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   //===------------------------------------------------===//
@@ -1651,7 +1742,10 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Find the reduction identity variable. Zero for addition, or, xor,
     // one for multiplication, -1 for And.
-    Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
+    Constant *Iden =
+      LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+                                                      VecTy->getScalarType(),
+                                                      RdxDesc.MinMaxKind);
     Constant *Identity = ConstantVector::getSplat(VF, Iden);
 
     // This vector is the Identity vector where the first element is the
@@ -1699,10 +1793,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Reduce all of the unrolled parts into a single vector.
     Value *ReducedPartRdx = RdxParts[0];
+    unsigned Op = getReductionBinOp(RdxDesc.Kind);
     for (unsigned part = 1; part < UF; ++part) {
-      Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
-      ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
-                                           "bin.rdx");
+      if (Op != Instruction::ICmp)
+        ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
+                                             RdxParts[part], ReducedPartRdx,
+                                             "bin.rdx");
+      else
+        ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
+                                        ReducedPartRdx, RdxParts[part]);
     }
 
     // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
@@ -1727,8 +1826,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
                                     ConstantVector::get(ShuffleMask),
                                     "rdx.shuf");
 
-      Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
-      TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
+      if (Op != Instruction::ICmp)
+        TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+                                     "bin.rdx");
+      else
+        TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
     }
 
     // The result is in the first element of the vector.
@@ -2315,6 +2417,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
           continue;
         }
+        if (AddReductionVar(Phi, RK_IntegerMinMax)) {
+          DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
         if (AddReductionVar(Phi, RK_FloatMult)) {
           DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
           continue;
@@ -2733,7 +2839,18 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
   // used as reduction variables (such as ADD). We may have a single
   // out-of-block user. The cycle must end with the original PHI.
   Instruction *Iter = Phi;
-  while (true) {
+
+  // To recognize min/max patterns formed by a icmp select sequence, we store
+  // the number of instruction we saw from the recognized min/max pattern,
+  // such that we don't stop when we see the phi has two uses (one by the select
+  // and one by the icmp) and to make sure we only see exactly the two
+  // instructions.
+  unsigned NumICmpSelectPatternInst = 0;
+  ReductionInstDesc ReduxDesc(false, 0);
+
+  // Avoid cycles in the chain.
+  SmallPtrSet<Instruction *, 8> VisitedInsts;
+  while (VisitedInsts.insert(Iter)) {
     // If the instruction has no users then this is a broken
     // chain and can't be a reduction variable.
     if (Iter->use_empty())
@@ -2747,9 +2864,6 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
     // Is this a bin op ?
     FoundBinOp |= !isa<PHINode>(Iter);
 
-    // Remember the current instruction.
-    Instruction *OldIter = Iter;
-
     // For each of the *users* of iter.
     for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
          it != e; ++it) {
@@ -2778,25 +2892,33 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
           Iter->hasNUsesOrMore(2))
         continue;
 
-      // We can't have multiple inside users.
-      if (FoundInBlockUser)
+      // We can't have multiple inside users except for a combination of
+      // icmp/select both using the phi.
+      if (FoundInBlockUser && !NumICmpSelectPatternInst)
         return false;
       FoundInBlockUser = true;
 
       // Any reduction instr must be of one of the allowed kinds.
-      if (!isReductionInstr(U, Kind))
+      ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
+      if (!ReduxDesc.IsReduction)
         return false;
 
+      if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) ||
+                                       isa<SelectInst>(U)))
+          ++NumICmpSelectPatternInst;
+
       // Reductions of instructions such as Div, and Sub is only
       // possible if the LHS is the reduction variable.
-      if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
+      if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
+          !isa<ICmpInst>(U) && U->getOperand(0) != Iter)
         return false;
 
-      Iter = U;
+      Iter = ReduxDesc.PatternLastInst;
     }
 
-    // If all uses were skipped this can't be a reduction variable.
-    if (Iter == OldIter)
+    // This means we have seen one but not the other instruction of the
+    // pattern or more than just a select and cmp.
+    if (Kind == RK_IntegerMinMax && NumICmpSelectPatternInst != 2)
       return false;
 
     // We found a reduction var if we have reached the original
@@ -2807,47 +2929,94 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
       AllowedExit.insert(ExitInstruction);
 
       // Save the description of this reduction variable.
-      ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+      ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+                             ReduxDesc.MinMaxKind);
       Reductions[Phi] = RD;
       // We've ended the cycle. This is a reduction variable if we have an
       // outside user and it has a binary op.
       return FoundBinOp && ExitInstruction;
     }
   }
+
+  return false;
 }
 
-bool
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+LoopVectorizationLegality::ReductionInstDesc
+LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionInstDesc &Prev) {
+
+  assert((isa<ICmpInst>(I) || isa<SelectInst>(I)) &&
+         "Expect a select instruction");
+  ICmpInst *Cmp = 0;
+  SelectInst *Select = 0;
+
+  // We must handle the select(cmp()) as a single instruction. Advance to the
+  // select.
+  if ((Cmp = dyn_cast<ICmpInst>(I))) {
+    if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
+      return ReductionInstDesc(false, I);
+    return ReductionInstDesc(Select, Prev.MinMaxKind);
+  }
+
+  // Only handle single use cases for now.
+  if (!(Select = dyn_cast<SelectInst>(I)))
+    return ReductionInstDesc(false, I);
+  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))))
+    return ReductionInstDesc(false, I);
+  if (!Cmp->hasOneUse())
+    return ReductionInstDesc(false, I);
+
+  Value *CmpLeft = Cmp->getOperand(0);
+  Value *CmpRight = Cmp->getOperand(1);
+
+  // Look for a min/max pattern.
+  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_UIntMin);
+  else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_UIntMax);
+  else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_SIntMax);
+  else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_SIntMin);
+
+  return ReductionInstDesc(false, I);
+}
+
+LoopVectorizationLegality::ReductionInstDesc
 LoopVectorizationLegality::isReductionInstr(Instruction *I,
-                                            ReductionKind Kind) {
+                                            ReductionKind Kind,
+                                            ReductionInstDesc &Prev) {
   bool FP = I->getType()->isFloatingPointTy();
   bool FastMath = (FP && I->isCommutative() && I->isAssociative());
-
   switch (I->getOpcode()) {
   default:
-    return false;
+    return ReductionInstDesc(false, I);
   case Instruction::PHI:
       if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
-        return false;
-    // possibly.
-    return true;
+        return ReductionInstDesc(false, I);
+    return ReductionInstDesc(I, Prev.MinMaxKind);
   case Instruction::Sub:
   case Instruction::Add:
-    return Kind == RK_IntegerAdd;
-  case Instruction::SDiv:
-  case Instruction::UDiv:
+    return ReductionInstDesc(Kind == RK_IntegerAdd, I);
   case Instruction::Mul:
-    return Kind == RK_IntegerMult;
+    return ReductionInstDesc(Kind == RK_IntegerMult, I);
   case Instruction::And:
-    return Kind == RK_IntegerAnd;
+    return ReductionInstDesc(Kind == RK_IntegerAnd, I);
   case Instruction::Or:
-    return Kind == RK_IntegerOr;
+    return ReductionInstDesc(Kind == RK_IntegerOr, I);
   case Instruction::Xor:
-    return Kind == RK_IntegerXor;
+    return ReductionInstDesc(Kind == RK_IntegerXor, I);
   case Instruction::FMul:
-    return Kind == RK_FloatMult && FastMath;
+    return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
   case Instruction::FAdd:
-    return Kind == RK_FloatAdd && FastMath;
-   }
+    return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+  case Instruction::ICmp:
+  case Instruction::Select:
+    if (Kind != RK_IntegerMinMax)
+      return ReductionInstDesc(false, I);
+    return isMinMaxSelectCmpPattern(I, Prev);
+  }
 }
 
 LoopVectorizationLegality::InductionKind
@@ -3331,8 +3500,19 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   case Instruction::AShr:
   case Instruction::And:
   case Instruction::Or:
-  case Instruction::Xor:
-    return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy);
+  case Instruction::Xor: {
+    // Certain instructions can be cheaper to vectorize if they have a constant
+    // second vector operand. One example of this are shifts on x86.
+    TargetTransformInfo::OperandValueKind Op1VK =
+      TargetTransformInfo::OK_AnyValue;
+    TargetTransformInfo::OperandValueKind Op2VK =
+      TargetTransformInfo::OK_AnyValue;
+
+    if (isa<ConstantInt>(I->getOperand(1)))
+      Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+
+    return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
+  }
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
     const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
new file mode 100644
index 000000000000..6d4c36aacdc5
--- /dev/null
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -0,0 +1,330 @@
+//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+//  "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE SV_NAME
+
+#include "VecUtils.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<int>
+SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
+                 cl::desc("Only vectorize trees if the gain is above this "
+                          "number. (gain = -cost of vectorization)"));
+namespace {
+
+/// The SLPVectorizer Pass.
+struct SLPVectorizer : public FunctionPass {
+  typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+
+  /// Pass identification, replacement for typeid
+  static char ID;
+
+  explicit SLPVectorizer() : FunctionPass(ID) {
+    initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
+  }
+
+  ScalarEvolution *SE;
+  DataLayout *DL;
+  TargetTransformInfo *TTI;
+  AliasAnalysis *AA;
+  LoopInfo *LI;
+
+  virtual bool runOnFunction(Function &F) {
+    SE = &getAnalysis<ScalarEvolution>();
+    DL = getAnalysisIfAvailable<DataLayout>();
+    TTI = &getAnalysis<TargetTransformInfo>();
+    AA = &getAnalysis<AliasAnalysis>();
+    LI = &getAnalysis<LoopInfo>();
+
+    StoreRefs.clear();
+    bool Changed = false;
+
+    // Must have DataLayout. We can't require it because some tests run w/o
+    // triple.
+    if (!DL)
+      return false;
+
+    for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
+      BasicBlock *BB = it;
+      bool BBChanged = false;
+
+      // Use the bollom up slp vectorizer to construct chains that start with
+      // he store instructions.
+      BoUpSLP R(BB, SE, DL, TTI, AA);
+
+      // Vectorize trees that end at reductions.
+      BBChanged |= vectorizeReductions(BB, R);
+
+      // Vectorize trees that end at stores.
+      if (collectStores(BB, R)) {
+        DEBUG(dbgs()<<"SLP: Found stores to vectorize.\n");
+        BBChanged |= vectorizeStoreChains(R);
+      }
+
+      // Try to hoist some of the scalarization code to the preheader.
+      if (BBChanged) hoistGatherSequence(LI, BB, R);
+
+      Changed |= BBChanged;
+    }
+
+    if (Changed) {
+      DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
+      DEBUG(verifyFunction(F));
+    }
+    return Changed;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    FunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<TargetTransformInfo>();
+    AU.addRequired<LoopInfo>();
+  }
+
+private:
+
+  /// \brief Collect memory references and sort them according to their base
+  /// object. We sort the stores to their base objects to reduce the cost of the
+  /// quadratic search on the stores. TODO: We can further reduce this cost
+  /// if we flush the chain creation every time we run into a memory barrier.
+  bool collectStores(BasicBlock *BB, BoUpSLP &R);
+
+  /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
+  bool tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R);
+
+  /// \brief Try to vectorize a chain that may start at the operands of \V;
+  bool tryToVectorize(BinaryOperator *V,  BoUpSLP &R);
+
+  /// \brief Vectorize the stores that were collected in StoreRefs.
+  bool vectorizeStoreChains(BoUpSLP &R);
+
+  /// \brief Try to hoist gather sequences outside of the loop in cases where
+  /// all of the sources are loop invariant.
+  void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
+
+  /// \brief Scan the basic block and look for reductions that may start a
+  /// vectorization chain.
+  bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
+
+private:
+  StoreListMap StoreRefs;
+};
+
+bool SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
+  StoreRefs.clear();
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    StoreInst *SI = dyn_cast<StoreInst>(it);
+    if (!SI)
+      continue;
+
+    // Check that the pointer points to scalars.
+    if (SI->getValueOperand()->getType()->isAggregateType())
+      return false;
+
+    // Find the base of the GEP.
+    Value *Ptr = SI->getPointerOperand();
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+      Ptr = GEP->getPointerOperand();
+
+    // Save the store locations.
+    StoreRefs[Ptr].push_back(SI);
+  }
+  return true;
+}
+
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R) {
+  if (!A || !B) return false;
+  BoUpSLP::ValueList VL;
+  VL.push_back(A);
+  VL.push_back(B);
+  int Cost = R.getTreeCost(VL);
+  int ExtrCost = R.getScalarizationCost(VL);
+  DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+        " Cost of extract:" << ExtrCost << ".\n");
+  if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
+  DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
+  R.vectorizeArith(VL);
+  return true;
+}
+
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V,  BoUpSLP &R) {
+  if (!V) return false;
+  // Try to vectorize V.
+  if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+    return true;
+
+  BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+  BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+  // Try to skip B.
+  if (B && B->hasOneUse()) {
+    BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+    BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+    if (tryToVectorizePair(A, B0, R)) {
+      B->moveBefore(V);
+      return true;
+    }
+    if (tryToVectorizePair(A, B1, R)) {
+      B->moveBefore(V);
+      return true;
+    }
+  }
+
+  // Try to slip A.
+  if (A && A->hasOneUse()) {
+    BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+    BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+    if (tryToVectorizePair(A0, B, R)) {
+      A->moveBefore(V);
+      return true;
+    }
+    if (tryToVectorizePair(A1, B, R)) {
+      A->moveBefore(V);
+      return true;
+    }
+  }
+  return 0;
+}
+
+bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
+  bool Changed = false;
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    if (isa<DbgInfoIntrinsic>(it)) continue;
+
+    // Try to vectorize reductions that use PHINodes.
+    if (PHINode *P = dyn_cast<PHINode>(it)) {
+      // Check that the PHI is a reduction PHI.
+      if (P->getNumIncomingValues() != 2) return Changed;
+      Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
+                    (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
+                     0));
+      // Check if this is a Binary Operator.
+      BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
+      if (!BI)
+        continue;
+
+      Value *Inst = BI->getOperand(0);
+      if (Inst == P) Inst = BI->getOperand(1);
+      Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+      continue;
+    }
+
+    // Try to vectorize trees that start at compare instructions.
+    if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
+      if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
+        Changed |= true;
+        continue;
+      }
+      for (int i = 0; i < 2; ++i)
+        if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
+          Changed |= tryToVectorize(BI, R);
+      continue;
+    }
+  }
+
+  return Changed;
+}
+
+bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
+  bool Changed = false;
+  // Attempt to sort and vectorize each of the store-groups.
+  for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+       it != e; ++it) {
+    if (it->second.size() < 2)
+      continue;
+
+    DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
+          it->second.size() << ".\n");
+
+    Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+  }
+  return Changed;
+}
+
+void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
+                                        BoUpSLP &R) {
+  // Check if this block is inside a loop.
+  Loop *L = LI->getLoopFor(BB);
+  if (!L)
+    return;
+
+  // Check if it has a preheader.
+  BasicBlock *PreHeader = L->getLoopPreheader();
+  if (!PreHeader)
+    return;
+
+  // Mark the insertion point for the block.
+  Instruction *Location = PreHeader->getTerminator();
+
+  BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
+  for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
+       it != e; ++it) {
+    InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
+
+    // The InsertElement sequence can be simplified into a constant.
+    if (!Insert)
+      continue;
+
+    // If the vector or the element that we insert into it are
+    // instructions that are defined in this basic block then we can't
+    // hoist this instruction.
+    Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
+    Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
+    if (CurrVec && L->contains(CurrVec)) continue;
+    if (NewElem && L->contains(NewElem)) continue;
+
+    // We can hoist this instruction. Move it to the pre-header.
+    Insert->moveBefore(Location);
+  }
+}
+
+} // end anonymous namespace
+
+char SLPVectorizer::ID = 0;
+static const char lv_name[] = "SLP Vectorizer";
+INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
+
+namespace llvm {
+  Pass *createSLPVectorizerPass() {
+    return new SLPVectorizer();
+  }
+}
+
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
new file mode 100644
index 000000000000..a69646336c8a
--- /dev/null
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -0,0 +1,645 @@
+//===- VecUtils.h --- Vectorization Utilities -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "SLP"
+
+#include "VecUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+
+static const unsigned MinVecRegSize = 128;
+
+static const unsigned RecursionMaxDepth = 6;
+
+namespace llvm {
+
+BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
+                             TargetTransformInfo *Tti, AliasAnalysis *Aa) :
+                             BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa) {
+  numberInstructions();
+}
+
+void BoUpSLP::numberInstructions() {
+  int Loc = 0;
+  InstrIdx.clear();
+  InstrVec.clear();
+  // Number the instructions in the block.
+  for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
+    InstrIdx[it] = Loc++;
+    InstrVec.push_back(it);
+    assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
+  }
+}
+
+Value *BoUpSLP::getPointerOperand(Value *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
+  return 0;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+  if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
+  if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
+  return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+  Value *PtrA = getPointerOperand(A);
+  Value *PtrB = getPointerOperand(B);
+  unsigned ASA = getAddressSpaceOperand(A);
+  unsigned ASB = getAddressSpaceOperand(B);
+
+  // Check that the address spaces match and that the pointers are valid.
+  if (!PtrA || !PtrB || (ASA != ASB)) return false;
+
+  // Check that A and B are of the same type.
+  if (PtrA->getType() != PtrB->getType()) return false;
+
+  // Calculate the distance.
+  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+  const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
+  const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
+
+  // Non constant distance.
+  if (!ConstOffSCEV) return false;
+
+  unsigned Offset = ConstOffSCEV->getValue()->getSExtValue();
+  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+  // The Instructions are connsecutive if the size of the first load/store is
+  // the same as the offset.
+  unsigned Sz = DL->getTypeStoreSize(Ty);
+  return ((-Offset) == Sz);
+}
+
+bool BoUpSLP::vectorizeStoreChain(ValueList &Chain, int CostThreshold) {
+  Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
+  unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+  unsigned VF = MinVecRegSize / Sz;
+
+  if (!isPowerOf2_32(Sz) || VF < 2) return false;
+
+  bool Changed = false;
+  // Look for profitable vectorizable trees at all offsets, starting at zero.
+  for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
+    if (i + VF > e) return Changed;
+    DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
+    ValueList Operands(&Chain[i], &Chain[i] + VF);
+
+    int Cost = getTreeCost(Operands);
+    DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+    if (Cost < CostThreshold) {
+      DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+      vectorizeTree(Operands, VF);
+      i += VF;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) {
+  ValueSet Heads, Tails;
+  SmallDenseMap<Value*, Value*> ConsecutiveChain;
+
+  // We may run into multiple chains that merge into a single chain. We mark the
+  // stores that we vectorized so that we don't visit the same store twice.
+  ValueSet VectorizedStores;
+  bool Changed = false;
+
+  // Do a quadratic search on all of the given stores and find
+  // all of the pairs of loads that follow each other.
+  for (unsigned i = 0, e = Stores.size(); i < e; ++i)
+    for (unsigned j = 0; j < e; ++j) {
+      if (i == j) continue;
+      if (isConsecutiveAccess(Stores[i], Stores[j])) {
+        Tails.insert(Stores[j]);
+        Heads.insert(Stores[i]);
+        ConsecutiveChain[Stores[i]] = Stores[j];
+      }
+    }
+
+  // For stores that start but don't end a link in the chain:
+  for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+    if (Tails.count(*it)) continue;
+
+    // We found a store instr that starts a chain. Now follow the chain and try
+    // to vectorize it.
+    ValueList Operands;
+    Value *I = *it;
+    // Collect the chain into a list.
+    while (Tails.count(I) || Heads.count(I)) {
+      if (VectorizedStores.count(I)) break;
+      Operands.push_back(I);
+      // Move to the next value in the chain.
+      I = ConsecutiveChain[I];
+    }
+
+    bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
+
+    // Mark the vectorized stores so that we don't vectorize them again.
+    if (Vectorized)
+      VectorizedStores.insert(Operands.begin(), Operands.end());
+    Changed |= Vectorized;
+  }
+
+  return Changed;
+}
+
+int BoUpSLP::getScalarizationCost(ValueList &VL) {
+  // Find the type of the operands in VL.
+  Type *ScalarTy = VL[0]->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+  // Find the cost of inserting/extracting values from the vector.
+  return getScalarizationCost(VecTy);
+}
+
+int BoUpSLP::getScalarizationCost(Type *Ty) {
+  int Cost = 0;
+  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+  return Cost;
+}
+
+AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
+  return AliasAnalysis::Location();
+}
+
+Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
+  assert(Src->getParent() == Dst->getParent() && "Not the same BB");
+  BasicBlock::iterator I = Src, E = Dst;
+  /// Scan all of the instruction from SRC to DST and check if
+  /// the source may alias.
+  for (++I; I != E; ++I) {
+    // Ignore store instructions that are marked as 'ignore'.
+    if (MemBarrierIgnoreList.count(I)) continue;
+    if (Src->mayWriteToMemory()) /* Write */ {
+      if (!I->mayReadOrWriteMemory()) continue;
+    } else /* Read */ {
+      if (!I->mayWriteToMemory()) continue;
+    }
+    AliasAnalysis::Location A = getLocation(&*I);
+    AliasAnalysis::Location B = getLocation(Src);
+
+    if (!A.Ptr || !B.Ptr || AA->alias(A, B))
+      return I;
+  }
+  return 0;
+}
+
+void BoUpSLP::vectorizeArith(ValueList &Operands) {
+  Value *Vec = vectorizeTree(Operands, Operands.size());
+  BasicBlock::iterator Loc = cast<Instruction>(Vec);
+  IRBuilder<> Builder(++Loc);
+  // After vectorizing the operands we need to generate extractelement
+  // instructions and replace all of the uses of the scalar values with
+  // the values that we extracted from the vectorized tree.
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
+    Operands[i]->replaceAllUsesWith(S);
+  }
+}
+
+int BoUpSLP::getTreeCost(ValueList &VL) {
+  // Get rid of the list of stores that were removed, and from the
+  // lists of instructions with multiple users.
+  MemBarrierIgnoreList.clear();
+  LaneMap.clear();
+  MultiUserVals.clear();
+  MustScalarize.clear();
+
+  // Scan the tree and find which value is used by which lane, and which values
+  // must be scalarized.
+  getTreeUses_rec(VL, 0);
+
+  // Check that instructions with multiple users can be vectorized. Mark unsafe
+  // instructions.
+  for (ValueSet::iterator it = MultiUserVals.begin(),
+       e = MultiUserVals.end(); it != e; ++it) {
+    // Check that all of the users of this instr are within the tree
+    // and that they are all from the same lane.
+    int Lane = -1;
+    for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
+         I != E; ++I) {
+      if (LaneMap.find(*I) == LaneMap.end()) {
+        MustScalarize.insert(*it);
+        DEBUG(dbgs()<<"SLP: Adding " << **it <<
+              " to MustScalarize because of an out of tree usage.\n");
+        break;
+      }
+      if (Lane == -1) Lane = LaneMap[*I];
+      if (Lane != LaneMap[*I]) {
+        MustScalarize.insert(*it);
+        DEBUG(dbgs()<<"Adding " << **it <<
+              " to MustScalarize because multiple lane use it: "
+              << Lane << " and " << LaneMap[*I] << ".\n");
+        break;
+      }
+    }
+  }
+
+  // Now calculate the cost of vectorizing the tree.
+  return getTreeCost_rec(VL, 0);
+}
+
+void BoUpSLP::getTreeUses_rec(ValueList &VL, unsigned Depth) {
+  if (Depth == RecursionMaxDepth) return;
+
+  // Don't handle vectors.
+  if (VL[0]->getType()->isVectorTy()) return;
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    if (SI->getValueOperand()->getType()->isVectorTy()) return;
+
+  // Check if all of the operands are constants.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    AllConst &= isa<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If one of the instructions is out of this BB, we need to scalarize all.
+    if (I && I->getParent() != BB) return;
+  }
+
+  // If all of the operands are identical or constant we have a simple solution.
+  if (AllConst || AllSameScalar) return;
+
+  // Scalarize unknown structures.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (!VL0) return;
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return;
+  }
+
+  // Mark instructions with multiple users.
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // Remember to check if all of the users of this instr are vectorized
+    // within our tree.
+    if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
+  }
+
+  for (int i = 0, e = VL.size(); i < e; ++i) {
+    // Check that the instruction is only used within
+    // one lane.
+    if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
+    // Make this instruction as 'seen' and remember the lane.
+    LaneMap[VL[i]] = i;
+  }
+
+  switch (Opcode) {
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+        ValueList Operands;
+        // Prepare the operand vector.
+        for (unsigned j = 0; j < VL.size(); ++j)
+          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+        getTreeUses_rec(Operands, Depth+1);
+      }
+    }
+    case Instruction::Store: {
+      ValueList Operands;
+      for (unsigned j = 0; j < VL.size(); ++j)
+        Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      getTreeUses_rec(Operands, Depth+1);
+      return;
+    }
+    default:
+    return;
+  }
+}
+
+int BoUpSLP::getTreeCost_rec(ValueList &VL, unsigned Depth) {
+  Type *ScalarTy = VL[0]->getType();
+
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+
+  /// Don't mess with vectors.
+  if (ScalarTy->isVectorTy()) return max_cost;
+  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+  if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
+
+  // Check if all of the operands are constants.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    AllConst &= isa<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    // Must have a single use.
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // This instruction is outside the basic block or if it is a known hazard.
+    if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
+      return getScalarizationCost(VecTy);
+  }
+
+  // Is this a simple vector constant.
+  if (AllConst) return 0;
+
+  // If all of the operands are identical we can broadcast them.
+  if (AllSameScalar)
+    return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
+
+  // Scalarize unknown structures.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (!VL0) return getScalarizationCost(VecTy);
+  assert(VL0->getParent() == BB && "Wrong BB");
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
+  }
+
+  // Check if it is safe to sink the loads or the stores.
+  if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+    int MaxIdx = InstrIdx[VL0];
+    for (unsigned i = 1, e = VL.size(); i < e; ++i )
+      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+
+    Instruction *Last = InstrVec[MaxIdx];
+    for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
+      if (VL[i] == Last) continue;
+      Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
+      if (Barrier) {
+        DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
+              *Last << "\n because of " << *Barrier << "\n");
+        return max_cost;
+      }
+    }
+  }
+
+  switch (Opcode) {
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    int Cost = 0;
+    // Calculate the cost of all of the operands.
+    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+      ValueList Operands;
+      // Prepare the operand vector.
+      for (unsigned j = 0; j < VL.size(); ++j)
+        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+      Cost += getTreeCost_rec(Operands, Depth+1);
+      if (Cost >= max_cost) return max_cost;
+    }
+
+    // Calculate the cost of this instruction.
+    int ScalarCost = VecTy->getNumElements() *
+      TTI->getArithmeticInstrCost(Opcode, ScalarTy);
+
+    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+    Cost += (VecCost - ScalarCost);
+    return Cost;
+  }
+  case Instruction::Load: {
+    // If we are scalarize the loads, add the cost of forming the vector.
+    for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
+      if (!isConsecutiveAccess(VL[i], VL[i+1]))
+        return getScalarizationCost(VecTy);
+
+    // Cost of wide load - cost of scalar loads.
+    int ScalarLdCost = VecTy->getNumElements() *
+      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+    int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+    return VecLdCost - ScalarLdCost;
+  }
+  case Instruction::Store: {
+    // We know that we can merge the stores. Calculate the cost.
+    int ScalarStCost = VecTy->getNumElements() *
+      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
+    int StoreCost = VecStCost - ScalarStCost;
+
+    ValueList Operands;
+    for (unsigned j = 0; j < VL.size(); ++j) {
+      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      MemBarrierIgnoreList.insert(VL[j]);
+    }
+
+    int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
+    return TotalCost;
+  }
+  default:
+    // Unable to vectorize unknown instructions.
+    return getScalarizationCost(VecTy);
+  }
+}
+
+Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) {
+  int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
+  for (unsigned i = 0; i < VF; ++i )
+    MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+  return InstrVec[MaxIdx + 1];
+}
+
+Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
+  IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
+  Value *Vec = UndefValue::get(Ty);
+  for (unsigned i=0; i < Ty->getNumElements(); ++i) {
+    // Generate the 'InsertElement' instruction.
+    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+    // Remember that this instruction is used as part of a 'gather' sequence.
+    // The caller of the bottom-up slp vectorizer can try to hoist the sequence
+    // if the users are outside of the basic block.
+    GatherInstructions.push_back(Vec);
+  }
+
+  return Vec;
+}
+
+Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) {
+  Value *V = vectorizeTree_rec(VL, VF);
+  // We moved some instructions around. We have to number them again
+  // before we can do any analysis.
+  numberInstructions();
+  MustScalarize.clear();
+  return V;
+}
+
+Value *BoUpSLP::vectorizeTree_rec(ValueList &VL, int VF) {
+  Type *ScalarTy = VL[0]->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+  VectorType *VecTy = VectorType::get(ScalarTy, VF);
+
+  // Check if all of the operands are constants or identical.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  for (unsigned i = 0, e = VF; i < e; ++i) {
+    AllConst &= !!dyn_cast<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    // The instruction must be in the same BB, and it must be vectorizable.
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
+      return Scalarize(VL, VecTy);
+  }
+
+  // Check that this is a simple vector constant.
+  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+
+  // Scalarize unknown structures.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (!VL0) return Scalarize(VL, VecTy);
+
+  if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VF; i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+  }
+
+  switch (Opcode) {
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    ValueList LHSVL, RHSVL;
+    for (int i = 0; i < VF; ++i) {
+      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+    }
+
+    Value *RHS = vectorizeTree_rec(RHSVL, VF);
+    Value *LHS = vectorizeTree_rec(LHSVL, VF);
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0);
+    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
+    VectorizedValues[VL0] = V;
+    return V;
+  }
+  case Instruction::Load: {
+    LoadInst *LI = dyn_cast<LoadInst>(VL0);
+    unsigned Alignment = LI->getAlignment();
+
+    // Check if all of the loads are consecutive.
+    for (unsigned i = 1, e = VF; i < e; ++i)
+      if (!isConsecutiveAccess(VL[i-1], VL[i]))
+        return Scalarize(VL, VecTy);
+
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+                                          VecTy->getPointerTo());
+    LI = Builder.CreateLoad(VecPtr);
+    LI->setAlignment(Alignment);
+    VectorizedValues[VL0] = LI;
+    return LI;
+  }
+  case Instruction::Store: {
+    StoreInst *SI = dyn_cast<StoreInst>(VL0);
+    unsigned Alignment = SI->getAlignment();
+
+    ValueList ValueOp;
+    for (int i = 0; i < VF; ++i)
+      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
+
+    Value *VecValue = vectorizeTree_rec(ValueOp, VF);
+
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+                                          VecTy->getPointerTo());
+    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
+
+    for (int i = 0; i < VF; ++i)
+      cast<Instruction>(VL[i])->eraseFromParent();
+    return 0;
+  }
+  default:
+    Value *S = Scalarize(VL, VecTy);
+    VectorizedValues[VL0] = S;
+    return S;
+  }
+}
+
+} // end of namespace
diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h
new file mode 100644
index 000000000000..fed5178b8028
--- /dev/null
+++ b/lib/Transforms/Vectorize/VecUtils.h
@@ -0,0 +1,162 @@
+//===- VecUtils.cpp - Vectorization Utilities -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of classes and functions manipulate vectors and chains of
+// vectors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock; class Instruction; class Type;
+class VectorType; class StoreInst; class Value;
+class ScalarEvolution; class DataLayout;
+class TargetTransformInfo; class AliasAnalysis;
+
+/// Bottom Up SLP vectorization utility class.
+struct BoUpSLP  {
+  typedef SmallVector<Value*, 8> ValueList;
+  typedef SmallPtrSet<Value*, 16> ValueSet;
+  typedef SmallVector<StoreInst*, 8> StoreList;
+  static const int max_cost = 1<<20;
+
+  // \brief C'tor.
+  BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
+         TargetTransformInfo *Tti, AliasAnalysis *Aa);
+
+  /// \brief Take the pointer operand from the Load/Store instruction.
+  /// \returns NULL if this is not a valid Load/Store instruction.
+  static Value *getPointerOperand(Value *I);
+
+  /// \brief Take the address space operand from the Load/Store instruction.
+  /// \returns -1 if this is not a valid Load/Store instruction.
+  static unsigned getAddressSpaceOperand(Value *I);
+
+  /// \returns true if the memory operations A and B are consecutive.
+  bool isConsecutiveAccess(Value *A, Value *B);
+
+  /// \brief Vectorize the tree that starts with the elements in \p VL.
+  /// \returns the vectorized value.
+  Value *vectorizeTree(ValueList &VL, int VF);
+
+  /// \returns the vectorization cost of the subtree that starts at \p VL.
+  /// A negative number means that this is profitable.
+  int getTreeCost(ValueList &VL);
+
+  /// \returns the scalarization cost for this ValueList. Assuming that this
+  /// subtree gets vectorized, we may need to extract the values from the
+  /// roots. This method calculates the cost of extracting the values.
+  int getScalarizationCost(ValueList &VL);
+
+  /// \brief Attempts to order and vectorize a sequence of stores. This
+  /// function does a quadratic scan of the given stores.
+  /// \returns true if the basic block was modified.
+  bool vectorizeStores(StoreList &Stores, int costThreshold);
+
+  /// \brief Vectorize a group of scalars into a vector tree.
+  void vectorizeArith(ValueList &Operands);
+
+  /// \returns the list of new instructions that were added in order to collect
+  /// scalars into vectors. This list can be used to further optimize the gather
+  /// sequences.
+  ValueList &getGatherSeqInstructions() {return GatherInstructions; }
+
+private:
+  /// \brief This method contains the recursive part of getTreeCost.
+  int getTreeCost_rec(ValueList &VL, unsigned Depth);
+
+  /// \brief This recursive method looks for vectorization hazards such as
+  /// values that are used by multiple users and checks that values are used
+  /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.
+  void getTreeUses_rec(ValueList &VL, unsigned Depth);
+
+  /// \brief This method contains the recursive part of vectorizeTree.
+  Value *vectorizeTree_rec(ValueList &VL, int VF);
+
+  /// \brief Number all of the instructions in the block.
+  void numberInstructions();
+
+  ///  \brief Vectorize a sorted sequence of stores.
+  bool vectorizeStoreChain(ValueList &Chain, int CostThreshold);
+
+  /// \returns the scalarization cost for this type. Scalarization in this
+  /// context means the creation of vectors from a group of scalars.
+  int getScalarizationCost(Type *Ty);
+
+  /// \returns the AA location that is being access by the instruction.
+  AliasAnalysis::Location getLocation(Instruction *I);
+
+  /// \brief Checks if it is possible to sink an instruction from
+  /// \p Src to \p Dst.
+  /// \returns the pointer to the barrier instruction if we can't sink.
+  Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
+
+  /// \returns the instruction that appears last in the BB from \p VL.
+  /// Only consider the first \p VF elements.
+  Instruction *GetLastInstr(ValueList &VL, unsigned VF);
+
+  /// \returns a vector from a collection of scalars in \p VL.
+  Value *Scalarize(ValueList &VL, VectorType *Ty);
+  
+private:
+  /// Maps instructions to numbers and back.
+  SmallDenseMap<Value*, int> InstrIdx;
+  /// Maps integers to Instructions.
+  std::vector<Instruction*> InstrVec;
+
+  // -- containers that are used during getTreeCost -- //
+
+  /// Contains values that must be scalarized because they are used
+  /// by multiple lanes, or by users outside the tree.
+  /// NOTICE: The vectorization methods also use this set.
+  ValueSet MustScalarize;
+
+  /// Contains a list of values that are used outside the current tree. This
+  /// set must be reset between runs.
+  ValueSet MultiUserVals;
+  /// Maps values in the tree to the vector lanes that uses them. This map must
+  /// be reset between runs of getCost.
+  std::map<Value*, int> LaneMap;
+  /// A list of instructions to ignore while sinking
+  /// memory instructions. This map must be reset between runs of getCost.
+  SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
+
+  // -- Containers that are used during vectorizeTree -- //
+
+  /// Maps between the first scalar to the vector. This map must be reset
+  ///between runs.
+  DenseMap<Value*, Value*> VectorizedValues;
+
+  // -- Containers that are used after vectorization by the caller -- //
+
+  /// A list of instructions that are used when gathering scalars into vectors.
+  /// In many cases these instructions can be hoisted outside of the BB.
+  /// Iterating over this list is faster than calling LICM.
+  ValueList GatherInstructions;
+
+  // Analysis and block reference.
+  BasicBlock *BB;
+  ScalarEvolution *SE;
+  DataLayout *DL;
+  TargetTransformInfo *TTI;
+  AliasAnalysis *AA;
+};
+
+} // end of namespace
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 19eefd2f87e0..a927fe145171 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
-   //===-- Vectorize.cpp -----------------------------------------------------===//
+//===-- Vectorize.cpp -----------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -28,6 +28,7 @@ using namespace llvm;
 void llvm::initializeVectorization(PassRegistry &Registry) {
   initializeBBVectorizePass(Registry);
   initializeLoopVectorizePass(Registry);
+  initializeSLPVectorizerPass(Registry);
 }
 
 void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
 void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopVectorizePass());
 }
+
+void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createSLPVectorizerPass());
+}
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index 2b58f6ce38c5..ba9d84cf3e23 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios6.0.0"
 
@@ -152,15 +152,29 @@ define i32 @casts() {
   ; CHECK: cost of 10 {{.*}} uitofp
   %r69 = uitofp i64 undef to double
 
-  ; Vector cast cost of instructions lowering the cast to the stack.
-  ; CHECK: cost of 24 {{.*}} sext
+  ; CHECK: cost of 3 {{.*}} sext
   %r70 = sext <8 x i8> undef to <8 x i32>
-  ; CHECK: cost of 48 {{.*}} sext
+  ; CHECK: cost of 6 {{.*}} sext
   %r71 = sext <16 x i8> undef to <16 x i32>
-  ; CHECK: cost of 22 {{.*}} zext
+  ; CHECK: cost of 3 {{.*}} zext
   %r72 = zext <8 x i8> undef to <8 x i32>
-  ; CHECK: cost of 44 {{.*}} zext
+  ; CHECK: cost of 6 {{.*}} zext
   %r73 = zext <16 x i8> undef to <16 x i32>
+
+  ; CHECK: cost of 7 {{.*}} sext
+  %rext_0 = sext <8 x i8> undef to <8 x i64>
+  ; CHECK: cost of 7 {{.*}} zext
+  %rext_1 = zext <8 x i8> undef to <8 x i64>
+  ; CHECK: cost of 6 {{.*}} sext
+  %rext_2 = sext <8 x i16> undef to <8 x i64>
+  ; CHECK: cost of 6 {{.*}} zext
+  %rext_3 = zext <8 x i16> undef to <8 x i64>
+  ; CHECK: cost of 3 {{.*}} sext
+  %rext_4 = sext <4 x i16> undef to <4 x i64>
+  ; CHECK: cost of 3 {{.*}} zext
+  %rext_5 = zext <4 x i16> undef to <4 x i64>
+
+  ; Vector cast cost of instructions lowering the cast to the stack.
   ; CHECK: cost of 19 {{.*}} trunc
   %r74 = trunc <8 x i32> undef to <8 x i8>
   ; CHECK: cost of 38 {{.*}} trunc
@@ -190,6 +204,343 @@ define i32 @casts() {
   ; CHECK: cost of 16 {{.*}} fpext <16 x float
   %r89 = fpext <16 x float> undef to <16 x double>
 
+  ;; Floating point to integer vector casts.
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r90 = fptoui <2 x float> undef to <2 x i1>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r91 = fptosi <2 x float> undef to <2 x i1>
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r92 = fptoui <2 x float> undef to <2 x i8>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r93 = fptosi <2 x float> undef to <2 x i8>
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r94 = fptoui <2 x float> undef to <2 x i16>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r95 = fptosi <2 x float> undef to <2 x i16>
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r96 = fptoui <2 x float> undef to <2 x i32>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r97 = fptosi <2 x float> undef to <2 x i32>
+  ; CHECK: cost of 24 {{.*}} fptoui
+  %r98 = fptoui <2 x float> undef to <2 x i64>
+  ; CHECK: cost of 24 {{.*}} fptosi
+  %r99 = fptosi <2 x float> undef to <2 x i64>
+
+  ; CHECK: cost of 8 {{.*}} fptoui
+  %r100 = fptoui <2 x double> undef to <2 x i1>
+  ; CHECK: cost of 8 {{.*}} fptosi
+  %r101 = fptosi <2 x double> undef to <2 x i1>
+  ; CHECK: cost of 8 {{.*}} fptoui
+  %r102 = fptoui <2 x double> undef to <2 x i8>
+  ; CHECK: cost of 8 {{.*}} fptosi
+  %r103 = fptosi <2 x double> undef to <2 x i8>
+  ; CHECK: cost of 8 {{.*}} fptoui
+  %r104 = fptoui <2 x double> undef to <2 x i16>
+  ; CHECK: cost of 8 {{.*}} fptosi
+  %r105 = fptosi <2 x double> undef to <2 x i16>
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r106 = fptoui <2 x double> undef to <2 x i32>
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r107 = fptosi <2 x double> undef to <2 x i32>
+  ; CHECK: cost of 24 {{.*}} fptoui
+  %r108 = fptoui <2 x double> undef to <2 x i64>
+  ; CHECK: cost of 24 {{.*}} fptosi
+  %r109 = fptosi <2 x double> undef to <2 x i64>
+
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r110 = fptoui <4 x float> undef to <4 x i1>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r111 = fptosi <4 x float> undef to <4 x i1>
+  ; CHECK: cost of 3 {{.*}} fptoui
+  %r112 = fptoui <4 x float> undef to <4 x i8>
+  ; CHECK: cost of 3 {{.*}} fptosi
+  %r113 = fptosi <4 x float> undef to <4 x i8>
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r114 = fptoui <4 x float> undef to <4 x i16>
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r115 = fptosi <4 x float> undef to <4 x i16>
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r116 = fptoui <4 x float> undef to <4 x i32>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r117 = fptosi <4 x float> undef to <4 x i32>
+  ; CHECK: cost of 48 {{.*}} fptoui
+  %r118 = fptoui <4 x float> undef to <4 x i64>
+  ; CHECK: cost of 48 {{.*}} fptosi
+  %r119 = fptosi <4 x float> undef to <4 x i64>
+
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r120 = fptoui <4 x double> undef to <4 x i1>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r121 = fptosi <4 x double> undef to <4 x i1>
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r122 = fptoui <4 x double> undef to <4 x i8>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r123 = fptosi <4 x double> undef to <4 x i8>
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r124 = fptoui <4 x double> undef to <4 x i16>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r125 = fptosi <4 x double> undef to <4 x i16>
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r126 = fptoui <4 x double> undef to <4 x i32>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r127 = fptosi <4 x double> undef to <4 x i32>
+  ; CHECK: cost of 48 {{.*}} fptoui
+  %r128 = fptoui <4 x double> undef to <4 x i64>
+  ; CHECK: cost of 48 {{.*}} fptosi
+  %r129 = fptosi <4 x double> undef to <4 x i64>
+
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r130 = fptoui <8 x float> undef to <8 x i1>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r131 = fptosi <8 x float> undef to <8 x i1>
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r132 = fptoui <8 x float> undef to <8 x i8>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r133 = fptosi <8 x float> undef to <8 x i8>
+  ; CHECK: cost of 4 {{.*}} fptoui
+  %r134 = fptoui <8 x float> undef to <8 x i16>
+  ; CHECK: cost of 4 {{.*}} fptosi
+  %r135 = fptosi <8 x float> undef to <8 x i16>
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r136 = fptoui <8 x float> undef to <8 x i32>
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r137 = fptosi <8 x float> undef to <8 x i32>
+  ; CHECK: cost of 96 {{.*}} fptoui
+  %r138 = fptoui <8 x float> undef to <8 x i64>
+  ; CHECK: cost of 96 {{.*}} fptosi
+  %r139 = fptosi <8 x float> undef to <8 x i64>
+
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r140 = fptoui <8 x double> undef to <8 x i1>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r141 = fptosi <8 x double> undef to <8 x i1>
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r142 = fptoui <8 x double> undef to <8 x i8>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r143 = fptosi <8 x double> undef to <8 x i8>
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r144 = fptoui <8 x double> undef to <8 x i16>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r145 = fptosi <8 x double> undef to <8 x i16>
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r146 = fptoui <8 x double> undef to <8 x i32>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r147 = fptosi <8 x double> undef to <8 x i32>
+  ; CHECK: cost of 96 {{.*}} fptoui
+  %r148 = fptoui <8 x double> undef to <8 x i64>
+  ; CHECK: cost of 96 {{.*}} fptosi
+  %r149 = fptosi <8 x double> undef to <8 x i64>
+
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r150 = fptoui <16 x float> undef to <16 x i1>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r151 = fptosi <16 x float> undef to <16 x i1>
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r152 = fptoui <16 x float> undef to <16 x i8>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r153 = fptosi <16 x float> undef to <16 x i8>
+  ; CHECK: cost of 8 {{.*}} fptoui
+  %r154 = fptoui <16 x float> undef to <16 x i16>
+  ; CHECK: cost of 8 {{.*}} fptosi
+  %r155 = fptosi <16 x float> undef to <16 x i16>
+  ; CHECK: cost of 4 {{.*}} fptoui
+  %r156 = fptoui <16 x float> undef to <16 x i32>
+  ; CHECK: cost of 4 {{.*}} fptosi
+  %r157 = fptosi <16 x float> undef to <16 x i32>
+  ; CHECK: cost of 192 {{.*}} fptoui
+  %r158 = fptoui <16 x float> undef to <16 x i64>
+  ; CHECK: cost of 192 {{.*}} fptosi
+  %r159 = fptosi <16 x float> undef to <16 x i64>
+
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r160 = fptoui <16 x double> undef to <16 x i1>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r161 = fptosi <16 x double> undef to <16 x i1>
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r162 = fptoui <16 x double> undef to <16 x i8>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r163 = fptosi <16 x double> undef to <16 x i8>
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r164 = fptoui <16 x double> undef to <16 x i16>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r165 = fptosi <16 x double> undef to <16 x i16>
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r166 = fptoui <16 x double> undef to <16 x i32>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r167 = fptosi <16 x double> undef to <16 x i32>
+  ; CHECK: cost of 192 {{.*}} fptoui
+  %r168 = fptoui <16 x double> undef to <16 x i64>
+  ; CHECK: cost of 192 {{.*}} fptosi
+  %r169 = fptosi <16 x double> undef to <16 x i64>
+
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r170 = uitofp <2 x i1> undef to <2 x float>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r171 = sitofp <2 x i1> undef to <2 x float>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r172 = uitofp <2 x i8> undef to <2 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r173 = sitofp <2 x i8> undef to <2 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r174 = uitofp <2 x i16> undef to <2 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r175 = sitofp <2 x i16> undef to <2 x float>
+  ; CHECK: cost of 1 {{.*}} uitofp
+  %r176 = uitofp <2 x i32> undef to <2 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %r177 = sitofp <2 x i32> undef to <2 x float>
+  ; CHECK: cost of 24 {{.*}} uitofp
+  %r178 = uitofp <2 x i64> undef to <2 x float>
+  ; CHECK: cost of 24 {{.*}} sitofp
+  %r179 = sitofp <2 x i64> undef to <2 x float>
+
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r180 = uitofp <2 x i1> undef to <2 x double>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r181 = sitofp <2 x i1> undef to <2 x double>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r182 = uitofp <2 x i8> undef to <2 x double>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r183 = sitofp <2 x i8> undef to <2 x double>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r184 = uitofp <2 x i16> undef to <2 x double>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r185 = sitofp <2 x i16> undef to <2 x double>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r186 = uitofp <2 x i32> undef to <2 x double>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r187 = sitofp <2 x i32> undef to <2 x double>
+  ; CHECK: cost of 24 {{.*}} uitofp
+  %r188 = uitofp <2 x i64> undef to <2 x double>
+  ; CHECK: cost of 24 {{.*}} sitofp
+  %r189 = sitofp <2 x i64> undef to <2 x double>
+
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r190 = uitofp <4 x i1> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r191 = sitofp <4 x i1> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r192 = uitofp <4 x i8> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r193 = sitofp <4 x i8> undef to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r194 = uitofp <4 x i16> undef to <4 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r195 = sitofp <4 x i16> undef to <4 x float>
+  ; CHECK: cost of 1 {{.*}} uitofp
+  %r196 = uitofp <4 x i32> undef to <4 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %r197 = sitofp <4 x i32> undef to <4 x float>
+  ; CHECK: cost of 48 {{.*}} uitofp
+  %r198 = uitofp <4 x i64> undef to <4 x float>
+  ; CHECK: cost of 48 {{.*}} sitofp
+  %r199 = sitofp <4 x i64> undef to <4 x float>
+
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r200 = uitofp <4 x i1> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r201 = sitofp <4 x i1> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r202 = uitofp <4 x i8> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r203 = sitofp <4 x i8> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r204 = uitofp <4 x i16> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r205 = sitofp <4 x i16> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r206 = uitofp <4 x i32> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r207 = sitofp <4 x i32> undef to <4 x double>
+  ; CHECK: cost of 48 {{.*}} uitofp
+  %r208 = uitofp <4 x i64> undef to <4 x double>
+  ; CHECK: cost of 48 {{.*}} sitofp
+  %r209 = sitofp <4 x i64> undef to <4 x double>
+
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r210 = uitofp <8 x i1> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r211 = sitofp <8 x i1> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r212 = uitofp <8 x i8> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r213 = sitofp <8 x i8> undef to <8 x float>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r214 = uitofp <8 x i16> undef to <8 x float>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r215 = sitofp <8 x i16> undef to <8 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r216 = uitofp <8 x i32> undef to <8 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r217 = sitofp <8 x i32> undef to <8 x float>
+  ; CHECK: cost of 96 {{.*}} uitofp
+  %r218 = uitofp <8 x i64> undef to <8 x float>
+  ; CHECK: cost of 96 {{.*}} sitofp
+  %r219 = sitofp <8 x i64> undef to <8 x float>
+
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r220 = uitofp <8 x i1> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r221 = sitofp <8 x i1> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r222 = uitofp <8 x i8> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r223 = sitofp <8 x i8> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r224 = uitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r225 = sitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r226 = uitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r227 = sitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 96 {{.*}} uitofp
+  %r228 = uitofp <8 x i64> undef to <8 x double>
+  ; CHECK: cost of 96 {{.*}} sitofp
+  %r229 = sitofp <8 x i64> undef to <8 x double>
+
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r230 = uitofp <16 x i1> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r231 = sitofp <16 x i1> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r232 = uitofp <16 x i8> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r233 = sitofp <16 x i8> undef to <16 x float>
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r234 = uitofp <16 x i16> undef to <16 x float>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r235 = sitofp <16 x i16> undef to <16 x float>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r236 = uitofp <16 x i32> undef to <16 x float>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r237 = sitofp <16 x i32> undef to <16 x float>
+  ; CHECK: cost of 192 {{.*}} uitofp
+  %r238 = uitofp <16 x i64> undef to <16 x float>
+  ; CHECK: cost of 192 {{.*}} sitofp
+  %r239 = sitofp <16 x i64> undef to <16 x float>
+
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r240 = uitofp <16 x i1> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r241 = sitofp <16 x i1> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r242 = uitofp <16 x i8> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r243 = sitofp <16 x i8> undef to <16 x double>
+  ; C4ECK: cost of 64 {{.*}} uitofp
+  %r244 = uitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r245 = sitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r246 = uitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r247 = sitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 192 {{.*}} uitofp
+  %r248 = uitofp <16 x i64> undef to <16 x double>
+  ; CHECK: cost of 192 {{.*}} sitofp
+  %r249 = sitofp <16 x i64> undef to <16 x double>
+
   ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index f0521bae48be..92f5a1ec3a00 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -66,9 +66,63 @@ define void @avx2mull() {
 
 ; CHECK: fmul
 define i32 @fmul(i32 %arg) {
-  ;CHECK: cost of 1 {{.*}} fmul
+  ;CHECK: cost of 2 {{.*}} fmul
   %A = fmul <4 x float> undef, undef
-  ;CHECK: cost of 1 {{.*}} fmul
+  ;CHECK: cost of 2 {{.*}} fmul
   %B = fmul <8 x float> undef, undef
   ret i32 undef
 }
+
+; AVX: shift
+; AVX2: shift
+define void @shift() {
+  ; AVX: cost of 2 {{.*}} shl
+  ; AVX2: cost of 1 {{.*}} shl
+  %A0 = shl <4 x i32> undef, undef
+  ; AVX: cost of 2 {{.*}} shl
+  ; AVX2: cost of 1 {{.*}} shl
+  %A1 = shl <2 x i64> undef, undef
+
+  ; AVX: cost of 2 {{.*}} lshr
+  ; AVX2: cost of 1 {{.*}} lshr
+  %B0 = lshr <4 x i32> undef, undef
+  ; AVX: cost of 2 {{.*}} lshr
+  ; AVX2: cost of 1 {{.*}} lshr
+  %B1 = lshr <2 x i64> undef, undef
+
+  ; AVX: cost of 2 {{.*}} ashr
+  ; AVX2: cost of 1 {{.*}} ashr
+  %C0 = ashr <4 x i32> undef, undef
+  ; AVX: cost of 6 {{.*}} ashr
+  ; AVX2: cost of 20 {{.*}} ashr
+  %C1 = ashr <2 x i64> undef, undef
+
+  ret void
+}
+
+; AVX: avx2shift
+; AVX2: avx2shift
+define void @avx2shift() {
+  ; AVX: cost of 2 {{.*}} shl
+  ; AVX2: cost of 1 {{.*}} shl
+  %A0 = shl <8 x i32> undef, undef
+  ; AVX: cost of 2 {{.*}} shl
+  ; AVX2: cost of 1 {{.*}} shl
+  %A1 = shl <4 x i64> undef, undef
+
+  ; AVX: cost of 2 {{.*}} lshr
+  ; AVX2: cost of 1 {{.*}} lshr
+  %B0 = lshr <8 x i32> undef, undef
+  ; AVX: cost of 2 {{.*}} lshr
+  ; AVX2: cost of 1 {{.*}} lshr
+  %B1 = lshr <4 x i64> undef, undef
+
+  ; AVX: cost of 2 {{.*}} ashr
+  ; AVX2: cost of 1 {{.*}} ashr
+  %C0 = ashr <8 x i32> undef, undef
+  ; AVX: cost of 12 {{.*}} ashr
+  ; AVX2: cost of 40 {{.*}} ashr
+  %C1 = ashr <4 x i64> undef, undef
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index bacc77869170..b69b3bf6304c 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -44,9 +44,9 @@ define i32 @zext_sext(<8 x i1> %in) {
   %B = zext <8 x i16> undef to <8 x i32>
   ;CHECK: cost of 1 {{.*}} sext
   %C = sext <4 x i32> undef to <4 x i64>
-  ;CHECK: cost of 8 {{.*}} sext
+  ;CHECK: cost of 6 {{.*}} sext
   %C1 = sext <4 x i8> undef to <4 x i64>
-  ;CHECK: cost of 8 {{.*}} sext
+  ;CHECK: cost of 6 {{.*}} sext
   %C2 = sext <4 x i16> undef to <4 x i64>
 
   ;CHECK: cost of 1 {{.*}} zext
@@ -77,3 +77,78 @@ define i32 @masks4(<4 x i1> %in) {
   ret i32 undef
 }
 
+define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %A1 = sitofp <4 x i1> %a to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %A2 = sitofp <4 x i1> %a to <4 x double>
+
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %B1 = sitofp <4 x i8> %b to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %B2 = sitofp <4 x i8> %b to <4 x double>
+
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %C1 = sitofp <4 x i16> %c to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %C2 = sitofp <4 x i16> %c to <4 x double>
+
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D1 = sitofp <4 x i32> %d to <4 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D2 = sitofp <4 x i32> %d to <4 x double>
+  ret void
+}
+
+define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %A1 = sitofp <8 x i1> %a to <8 x float>
+
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %B1 = sitofp <8 x i8> %b to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} sitofp
+  %C1 = sitofp <8 x i16> %c to <8 x float>
+
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D1 = sitofp <8 x i32> %d to <8 x float>
+  ret void
+}
+
+define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
+  ; CHECK: cost of 7 {{.*}} uitofp
+  %A1 = uitofp <4 x i1> %a to <4 x float>
+  ; CHECK: cost of 7 {{.*}} uitofp
+  %A2 = uitofp <4 x i1> %a to <4 x double>
+
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %B1 = uitofp <4 x i8> %b to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %B2 = uitofp <4 x i8> %b to <4 x double>
+
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %C1 = uitofp <4 x i16> %c to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %C2 = uitofp <4 x i16> %c to <4 x double>
+
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %D1 = uitofp <4 x i32> %d to <4 x float>
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %D2 = uitofp <4 x i32> %d to <4 x double>
+  ret void
+}
+
+define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %A1 = uitofp <8 x i1> %a to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} uitofp
+  %B1 = uitofp <8 x i8> %b to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} uitofp
+  %C1 = uitofp <8 x i16> %c to <8 x float>
+
+  ; CHECK: cost of 9 {{.*}} uitofp
+  %D1 = uitofp <8 x i32> %d to <8 x float>
+  ret void
+}
diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll
new file mode 100644
index 000000000000..338d97416526
--- /dev/null
+++ b/test/Analysis/CostModel/X86/sitofp.ll
@@ -0,0 +1,281 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
+  ; SSE2: sitofpv2i8v2double
+  ; SSE2: cost of 20 {{.*}} sitofp
+  %1 = sitofp <2 x i8> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) {
+  ; SSE2: sitofpv4i8v4double
+  ; SSE2: cost of 40 {{.*}} sitofp
+  %1 = sitofp <4 x i8> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) {
+  ; SSE2: sitofpv8i8v8double
+  ; SSE2: cost of 80 {{.*}} sitofp
+%1 = sitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) {
+  ; SSE2: sitofpv16i8v16double
+  ; SSE2: cost of 160 {{.*}} sitofp
+  %1 = sitofp <16 x i8> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) {
+  ; SSE2: sitofpv32i8v32double
+  ; SSE2: cost of 320 {{.*}} sitofp
+  %1 = sitofp <32 x i8> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) {
+  ; SSE2: sitofpv2i16v2double
+  ; SSE2: cost of 20 {{.*}} sitofp
+  %1 = sitofp <2 x i16> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) {
+  ; SSE2: sitofpv4i16v4double
+  ; SSE2: cost of 40 {{.*}} sitofp
+  %1 = sitofp <4 x i16> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) {
+  ; SSE2: sitofpv8i16v8double
+  ; SSE2: cost of 80 {{.*}} sitofp
+  %1 = sitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) {
+  ; SSE2: sitofpv16i16v16double
+  ; SSE2: cost of 160 {{.*}} sitofp
+  %1 = sitofp <16 x i16> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) {
+  ; SSE2: sitofpv32i16v32double
+  ; SSE2: cost of 320 {{.*}} sitofp
+  %1 = sitofp <32 x i16> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) {
+  ; SSE2: sitofpv2i32v2double
+  ; SSE2: cost of 20 {{.*}} sitofp
+  %1 = sitofp <2 x i32> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) {
+  ; SSE2: sitofpv4i32v4double
+  ; SSE2: cost of 40 {{.*}} sitofp
+  %1 = sitofp <4 x i32> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) {
+  ; SSE2: sitofpv8i32v8double
+  ; SSE2: cost of 80 {{.*}} sitofp
+  %1 = sitofp <8 x i32> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) {
+  ; SSE2: sitofpv16i32v16double
+  ; SSE2: cost of 160 {{.*}} sitofp
+  %1 = sitofp <16 x i32> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) {
+  ; SSE2: sitofpv32i32v32double
+  ; SSE2: cost of 320 {{.*}} sitofp
+  %1 = sitofp <32 x i32> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) {
+  ; SSE2: sitofpv2i64v2double
+  ; SSE2: cost of 20 {{.*}} sitofp
+  %1 = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) {
+  ; SSE2: sitofpv4i64v4double
+  ; SSE2: cost of 40 {{.*}} sitofp
+  %1 = sitofp <4 x i64> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) {
+  %1 = sitofp <8 x i64> %a to <8 x double>
+  ; SSE2: sitofpv8i64v8double
+  ; SSE2: cost of 80 {{.*}} sitofp
+  ret <8 x double> %1
+}
+
+define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) {
+  ; SSE2: sitofpv16i64v16double
+  ; SSE2: cost of 160 {{.*}} sitofp
+  %1 = sitofp <16 x i64> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) {
+  ; SSE2: sitofpv32i64v32double
+  ; SSE2: cost of 320 {{.*}} sitofp
+  %1 = sitofp <32 x i64> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
+  ; SSE2: sitofpv2i8v2float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <2 x i8> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
+  ; SSE2: sitofpv4i8v4float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <4 x i8> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) {
+  ; SSE2: sitofpv8i8v8float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <8 x i8> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) {
+  ; SSE2: sitofpv16i8v16float
+  ; SSE2: cost of 8 {{.*}} sitofp
+  %1 = sitofp <16 x i8> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) {
+  ; SSE2: sitofpv32i8v32float
+  ; SSE2: cost of 16 {{.*}} sitofp
+  %1 = sitofp <32 x i8> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
+  ; SSE2: sitofpv2i16v2float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <2 x i16> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
+  ; SSE2: sitofpv4i16v4float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <4 x i16> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) {
+  ; SSE2: sitofpv8i16v8float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <8 x i16> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) {
+  ; SSE2: sitofpv16i16v16float
+  ; SSE2: cost of 30 {{.*}} sitofp
+  %1 = sitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) {
+  ; SSE2: sitofpv32i16v32float
+  ; SSE2: cost of 60 {{.*}} sitofp
+  %1 = sitofp <32 x i16> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
+  ; SSE2: sitofpv2i32v2float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
+  ; SSE2: sitofpv4i32v4float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
+  ; SSE2: sitofpv8i32v8float
+  ; SSE2: cost of 30 {{.*}} sitofp
+  %1 = sitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
+  ; SSE2: sitofpv16i32v16float
+  ; SSE2: cost of 60 {{.*}} sitofp
+  %1 = sitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
+  ; SSE2: sitofpv32i32v32float
+  ; SSE2: cost of 120 {{.*}} sitofp
+  %1 = sitofp <32 x i32> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) {
+  ; SSE2: sitofpv2i64v2float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <2 x i64> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) {
+  ; SSE2: sitofpv4i64v4float
+  ; SSE2: cost of 30 {{.*}} sitofp
+  %1 = sitofp <4 x i64> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) {
+  ; SSE2: sitofpv8i64v8float
+  ; SSE2: cost of 60 {{.*}} sitofp
+  %1 = sitofp <8 x i64> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) {
+  ; SSE2: sitofpv16i64v16float
+  ; SSE2: cost of 120 {{.*}} sitofp
+  %1 = sitofp <16 x i64> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
+  ; SSE2: sitofpv32i64v32float
+  ; SSE2: cost of 240 {{.*}} sitofp
+  %1 = sitofp <32 x i64> %a to <32 x float>
+  ret <32 x float> %1
+}
diff --git a/test/Analysis/CostModel/X86/testshiftashr.ll b/test/Analysis/CostModel/X86/testshiftashr.ll
new file mode 100644
index 000000000000..d96a92fe2a8a
--- /dev/null
+++ b/test/Analysis/CostModel/X86/testshiftashr.ll
@@ -0,0 +1,531 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+%shifttype = type <2 x i16>
+define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
+entry:
+  ; SSE2: shift2i16
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i16
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype %a , %b
+  ret %shifttype %0
+}
+
+%shifttype4i16 = type <4 x i16>
+define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
+entry:
+  ; SSE2: shift4i16
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i16
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype4i16 %a , %b
+  ret %shifttype4i16 %0
+}
+
+%shifttype8i16 = type <8 x i16>
+define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
+entry:
+  ; SSE2: shift8i16
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i16
+  ; SSE2-CODEGEN: sarw %cl
+
+  %0 = ashr %shifttype8i16 %a , %b
+  ret %shifttype8i16 %0
+}
+
+%shifttype16i16 = type <16 x i16>
+define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
+entry:
+  ; SSE2: shift16i16
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i16
+  ; SSE2-CODEGEN: sarw %cl
+
+  %0 = ashr %shifttype16i16 %a , %b
+  ret %shifttype16i16 %0
+}
+
+%shifttype32i16 = type <32 x i16>
+define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
+entry:
+  ; SSE2: shift32i16
+  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i16
+  ; SSE2-CODEGEN: sarw %cl
+
+  %0 = ashr %shifttype32i16 %a , %b
+  ret %shifttype32i16 %0
+}
+
+%shifttype2i32 = type <2 x i32>
+define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
+entry:
+  ; SSE2: shift2i32
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i32
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype2i32 %a , %b
+  ret %shifttype2i32 %0
+}
+
+%shifttype4i32 = type <4 x i32>
+define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
+entry:
+  ; SSE2: shift4i32
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i32
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype4i32 %a , %b
+  ret %shifttype4i32 %0
+}
+
+%shifttype8i32 = type <8 x i32>
+define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
+entry:
+  ; SSE2: shift8i32
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i32
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype8i32 %a , %b
+  ret %shifttype8i32 %0
+}
+
+%shifttype16i32 = type <16 x i32>
+define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
+entry:
+  ; SSE2: shift16i32
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i32
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype16i32 %a , %b
+  ret %shifttype16i32 %0
+}
+
+%shifttype32i32 = type <32 x i32>
+define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
+entry:
+  ; SSE2: shift32i32
+  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i32
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype32i32 %a , %b
+  ret %shifttype32i32 %0
+}
+
+%shifttype2i64 = type <2 x i64>
+define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
+entry:
+  ; SSE2: shift2i64
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype2i64 %a , %b
+  ret %shifttype2i64 %0
+}
+
+%shifttype4i64 = type <4 x i64>
+define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
+entry:
+  ; SSE2: shift4i64
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype4i64 %a , %b
+  ret %shifttype4i64 %0
+}
+
+%shifttype8i64 = type <8 x i64>
+define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
+entry:
+  ; SSE2: shift8i64
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype8i64 %a , %b
+  ret %shifttype8i64 %0
+}
+
+%shifttype16i64 = type <16 x i64>
+define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
+entry:
+  ; SSE2: shift16i64
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype16i64 %a , %b
+  ret %shifttype16i64 %0
+}
+
+%shifttype32i64 = type <32 x i64>
+define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
+entry:
+  ; SSE2: shift32i64
+  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype32i64 %a , %b
+  ret %shifttype32i64 %0
+}
+
+%shifttype2i8 = type <2 x i8>
+define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
+entry:
+  ; SSE2: shift2i8
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i8
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype2i8 %a , %b
+  ret %shifttype2i8 %0
+}
+
+%shifttype4i8 = type <4 x i8>
+define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
+entry:
+  ; SSE2: shift4i8
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i8
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype4i8 %a , %b
+  ret %shifttype4i8 %0
+}
+
+%shifttype8i8 = type <8 x i8>
+define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
+entry:
+  ; SSE2: shift8i8
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i8
+  ; SSE2-CODEGEN: sarw %cl
+
+  %0 = ashr %shifttype8i8 %a , %b
+  ret %shifttype8i8 %0
+}
+
+%shifttype16i8 = type <16 x i8>
+define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
+entry:
+  ; SSE2: shift16i8
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i8
+  ; SSE2-CODEGEN: sarb %cl
+
+  %0 = ashr %shifttype16i8 %a , %b
+  ret %shifttype16i8 %0
+}
+
+%shifttype32i8 = type <32 x i8>
+define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
+entry:
+  ; SSE2: shift32i8
+  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i8
+  ; SSE2-CODEGEN: sarb %cl
+
+  %0 = ashr %shifttype32i8 %a , %b
+  ret %shifttype32i8 %0
+}
+
+; Test shift by a constant a value.
+
+%shifttypec = type <2 x i16>
+define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
+entry:
+  ; SSE2: shift2i16const
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i16const
+  ; SSE2-CODEGEN: sarq $
+
+  %0 = ashr %shifttypec %a , <i16 3, i16 3>
+  ret %shifttypec %0
+}
+
+%shifttypec4i16 = type <4 x i16>
+define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
+entry:
+  ; SSE2: shift4i16const
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i16const
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec4i16 %0
+}
+
+%shifttypec8i16 = type <8 x i16>
+define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
+entry:
+  ; SSE2: shift8i16const
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i16const
+  ; SSE2-CODEGEN: psraw $3
+
+  %0 = ashr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                  i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec8i16 %0
+}
+
+%shifttypec16i16 = type <16 x i16>
+define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
+                                         %shifttypec16i16 %b) {
+entry:
+  ; SSE2: shift16i16const
+  ; SSE2: cost of 2 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i16const
+  ; SSE2-CODEGEN: psraw $3
+
+  %0 = ashr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec16i16 %0
+}
+
+%shifttypec32i16 = type <32 x i16>
+define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
+                                        %shifttypec32i16 %b) {
+entry:
+  ; SSE2: shift32i16const
+  ; SSE2: cost of 4 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i16const
+  ; SSE2-CODEGEN: psraw $3
+
+  %0 = ashr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec32i16 %0
+}
+
+%shifttypec2i32 = type <2 x i32>
+define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
+entry:
+  ; SSE2: shift2i32c
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i32c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec2i32 %a , <i32 3, i32 3>
+  ret %shifttypec2i32 %0
+}
+
+%shifttypec4i32 = type <4 x i32>
+define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
+entry:
+  ; SSE2: shift4i32c
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i32c
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec4i32 %0
+}
+
+%shifttypec8i32 = type <8 x i32>
+define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
+entry:
+  ; SSE2: shift8i32c
+  ; SSE2: cost of 2 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i32c
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                  i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec8i32 %0
+}
+
+%shifttypec16i32 = type <16 x i32>
+define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
+entry:
+  ; SSE2: shift16i32c
+  ; SSE2: cost of 4 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i32c
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec16i32 %0
+}
+
+%shifttypec32i32 = type <32 x i32>
+define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
+entry:
+  ; SSE2: shift32i32c
+  ; getTypeConversion fails here and promotes this to a i64.
+  ; SSE2: cost of 8 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i32c
+  ; SSE2-CODEGEN: psrad $3
+  %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec32i32 %0
+}
+
+%shifttypec2i64 = type <2 x i64>
+define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
+entry:
+  ; SSE2: shift2i64c
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i64c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec2i64 %a , <i64 3, i64 3>
+  ret %shifttypec2i64 %0
+}
+
+%shifttypec4i64 = type <4 x i64>
+define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
+entry:
+  ; SSE2: shift4i64c
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i64c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec4i64 %0
+}
+
+%shifttypec8i64 = type <8 x i64>
+define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
+entry:
+  ; SSE2: shift8i64c
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i64c
+  ; SSE2-CODEGEN: sarq $3
+
+ %0 = ashr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                 i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec8i64 %0
+}
+
+%shifttypec16i64 = type <16 x i64>
+define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
+entry:
+  ; SSE2: shift16i64c
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i64c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec16i64 %0
+}
+
+%shifttypec32i64 = type <32 x i64>
+define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
+entry:
+  ; SSE2: shift32i64c
+  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i64c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec32i64 %0
+}
+
+%shifttypec2i8 = type <2 x i8>
+define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
+entry:
+  ; SSE2: shift2i8c
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i8c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec2i8 %a , <i8 3, i8 3>
+  ret %shifttypec2i8 %0
+}
+
+%shifttypec4i8 = type <4 x i8>
+define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
+entry:
+  ; SSE2: shift4i8c
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i8c
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec4i8 %0
+}
+
+%shifttypec8i8 = type <8 x i8>
+define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
+entry:
+  ; SSE2: shift8i8c
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i8c
+  ; SSE2-CODEGEN: psraw $3
+
+  %0 = ashr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                 i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec8i8 %0
+}
+
+%shifttypec16i8 = type <16 x i8>
+define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
+entry:
+  ; SSE2: shift16i8c
+  ; SSE2: cost of 4 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = ashr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec16i8 %0
+}
+
+%shifttypec32i8 = type <32 x i8>
+define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
+entry:
+  ; SSE2: shift32i8c
+  ; SSE2: cost of 8 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = ashr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec32i8 %0
+}
+
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
new file mode 100644
index 000000000000..7bc8d89e4ad8
--- /dev/null
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -0,0 +1,529 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+%shifttype = type <2 x i16>
+define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
+entry:
+  ; SSE2: shift2i16
+  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i16
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype %a , %b
+  ret %shifttype %0
+}
+
+%shifttype4i16 = type <4 x i16>
+define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
+entry:
+  ; SSE2: shift4i16
+  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i16
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype4i16 %a , %b
+  ret %shifttype4i16 %0
+}
+
+%shifttype8i16 = type <8 x i16>
+define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
+entry:
+  ; SSE2: shift8i16
+  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i16
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype8i16 %a , %b
+  ret %shifttype8i16 %0
+}
+
+%shifttype16i16 = type <16 x i16>
+define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
+entry:
+  ; SSE2: shift16i16
+  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i16
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype16i16 %a , %b
+  ret %shifttype16i16 %0
+}
+
+%shifttype32i16 = type <32 x i16>
+define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
+entry:
+  ; SSE2: shift32i16
+  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i16
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype32i16 %a , %b
+  ret %shifttype32i16 %0
+}
+
+%shifttype2i32 = type <2 x i32>
+define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
+entry:
+  ; SSE2: shift2i32
+  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i32
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype2i32 %a , %b
+  ret %shifttype2i32 %0
+}
+
+%shifttype4i32 = type <4 x i32>
+define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
+entry:
+  ; SSE2: shift4i32
+  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i32
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype4i32 %a , %b
+  ret %shifttype4i32 %0
+}
+
+%shifttype8i32 = type <8 x i32>
+define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
+entry:
+  ; SSE2: shift8i32
+  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i32
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype8i32 %a , %b
+  ret %shifttype8i32 %0
+}
+
+%shifttype16i32 = type <16 x i32>
+define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
+entry:
+  ; SSE2: shift16i32
+  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i32
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype16i32 %a , %b
+  ret %shifttype16i32 %0
+}
+
+%shifttype32i32 = type <32 x i32>
+define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
+entry:
+  ; SSE2: shift32i32
+  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i32
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype32i32 %a , %b
+  ret %shifttype32i32 %0
+}
+
+%shifttype2i64 = type <2 x i64>
+define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
+entry:
+  ; SSE2: shift2i64
+  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype2i64 %a , %b
+  ret %shifttype2i64 %0
+}
+
+%shifttype4i64 = type <4 x i64>
+define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
+entry:
+  ; SSE2: shift4i64
+  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype4i64 %a , %b
+  ret %shifttype4i64 %0
+}
+
+%shifttype8i64 = type <8 x i64>
+define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
+entry:
+  ; SSE2: shift8i64
+  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype8i64 %a , %b
+  ret %shifttype8i64 %0
+}
+
+%shifttype16i64 = type <16 x i64>
+define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
+entry:
+  ; SSE2: shift16i64
+  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype16i64 %a , %b
+  ret %shifttype16i64 %0
+}
+
+%shifttype32i64 = type <32 x i64>
+define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
+entry:
+  ; SSE2: shift32i64
+  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype32i64 %a , %b
+  ret %shifttype32i64 %0
+}
+
+%shifttype2i8 = type <2 x i8>
+define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
+entry:
+  ; SSE2: shift2i8
+  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i8
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype2i8 %a , %b
+  ret %shifttype2i8 %0
+}
+
+%shifttype4i8 = type <4 x i8>
+define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
+entry:
+  ; SSE2: shift4i8
+  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i8
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype4i8 %a , %b
+  ret %shifttype4i8 %0
+}
+
+%shifttype8i8 = type <8 x i8>
+define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
+entry:
+  ; SSE2: shift8i8
+  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i8
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype8i8 %a , %b
+  ret %shifttype8i8 %0
+}
+
+%shifttype16i8 = type <16 x i8>
+define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
+entry:
+  ; SSE2: shift16i8
+  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i8
+  ; SSE2-CODEGEN: shrb %cl
+
+  %0 = lshr %shifttype16i8 %a , %b
+  ret %shifttype16i8 %0
+}
+
+%shifttype32i8 = type <32 x i8>
+define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
+entry:
+  ; SSE2: shift32i8
+  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i8
+  ; SSE2-CODEGEN: shrb %cl
+
+  %0 = lshr %shifttype32i8 %a , %b
+  ret %shifttype32i8 %0
+}
+
+; Test shift by a constant vector.
+
+%shifttypec = type <2 x i16>
+define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
+entry:
+  ; SSE2: shift2i16const
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i16const
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec %a , <i16 3, i16 3>
+  ret %shifttypec %0
+}
+
+%shifttypec4i16 = type <4 x i16>
+define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
+entry:
+  ; SSE2: shift4i16const
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i16const
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec4i16 %0
+}
+
+%shifttypec8i16 = type <8 x i16>
+define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
+entry:
+  ; SSE2: shift8i16const
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i16const
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                  i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec8i16 %0
+}
+
+%shifttypec16i16 = type <16 x i16>
+define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
+                                         %shifttypec16i16 %b) {
+entry:
+  ; SSE2: shift16i16const
+  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i16const
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec16i16 %0
+}
+
+%shifttypec32i16 = type <32 x i16>
+define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
+                                        %shifttypec32i16 %b) {
+entry:
+  ; SSE2: shift32i16const
+  ; SSE2: cost of 4 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i16const
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec32i16 %0
+}
+
+%shifttypec2i32 = type <2 x i32>
+define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
+entry:
+  ; SSE2: shift2i32c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i32c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec2i32 %a , <i32 3, i32 3>
+  ret %shifttypec2i32 %0
+}
+
+%shifttypec4i32 = type <4 x i32>
+define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
+entry:
+  ; SSE2: shift4i32c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i32c
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec4i32 %0
+}
+
+%shifttypec8i32 = type <8 x i32>
+define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
+entry:
+  ; SSE2: shift8i32c
+  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i32c
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                  i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec8i32 %0
+}
+
+%shifttypec16i32 = type <16 x i32>
+define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
+entry:
+  ; SSE2: shift16i32c
+  ; SSE2: cost of 4 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i32c
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec16i32 %0
+}
+
+%shifttypec32i32 = type <32 x i32>
+define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
+entry:
+  ; SSE2: shift32i32c
+  ; SSE2: cost of 8 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i32c
+  ; SSE2-CODEGEN: psrld $3
+  %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec32i32 %0
+}
+
+%shifttypec2i64 = type <2 x i64>
+define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
+entry:
+  ; SSE2: shift2i64c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec2i64 %a , <i64 3, i64 3>
+  ret %shifttypec2i64 %0
+}
+
+%shifttypec4i64 = type <4 x i64>
+define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
+entry:
+  ; SSE2: shift4i64c
+  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec4i64 %0
+}
+
+%shifttypec8i64 = type <8 x i64>
+define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
+entry:
+  ; SSE2: shift8i64c
+  ; SSE2: cost of 4 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                 i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec8i64 %0
+}
+
+%shifttypec16i64 = type <16 x i64>
+define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
+entry:
+  ; SSE2: shift16i64c
+  ; SSE2: cost of 8 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec16i64 %0
+}
+
+%shifttypec32i64 = type <32 x i64>
+define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
+entry:
+  ; SSE2: shift32i64c
+  ; SSE2: cost of 16 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec32i64 %0
+}
+
+%shifttypec2i8 = type <2 x i8>
+define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
+entry:
+  ; SSE2: shift2i8c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i8c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec2i8 %a , <i8 3, i8 3>
+  ret %shifttypec2i8 %0
+}
+
+%shifttypec4i8 = type <4 x i8>
+define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
+entry:
+  ; SSE2: shift4i8c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i8c
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec4i8 %0
+}
+
+%shifttypec8i8 = type <8 x i8>
+define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
+entry:
+  ; SSE2: shift8i8c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                 i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec8i8 %0
+}
+
+%shifttypec16i8 = type <16 x i8>
+define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
+entry:
+  ; SSE2: shift16i8c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec16i8 %0
+}
+
+%shifttypec32i8 = type <32 x i8>
+define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
+entry:
+  ; SSE2: shift32i8c
+  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec32i8 %0
+}
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll
new file mode 100644
index 000000000000..40effd02922c
--- /dev/null
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -0,0 +1,529 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+%shifttype = type <2 x i16>
+define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
+entry:
+  ; SSE2: shift2i16
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i16
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype %a , %b
+  ret %shifttype %0
+}
+
+%shifttype4i16 = type <4 x i16>
+define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
+entry:
+  ; SSE2: shift4i16
+  ; SSE2: cost of 10 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i16
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype4i16 %a , %b
+  ret %shifttype4i16 %0
+}
+
+%shifttype8i16 = type <8 x i16>
+define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
+entry:
+  ; SSE2: shift8i16
+  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i16
+  ; SSE2-CODEGEN: shll %cl
+
+  %0 = shl %shifttype8i16 %a , %b
+  ret %shifttype8i16 %0
+}
+
+%shifttype16i16 = type <16 x i16>
+define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
+entry:
+  ; SSE2: shift16i16
+  ; SSE2: cost of 160 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i16
+  ; SSE2-CODEGEN: shll %cl
+
+  %0 = shl %shifttype16i16 %a , %b
+  ret %shifttype16i16 %0
+}
+
+%shifttype32i16 = type <32 x i16>
+define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
+entry:
+  ; SSE2: shift32i16
+  ; SSE2: cost of 320 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i16
+  ; SSE2-CODEGEN: shll %cl
+
+  %0 = shl %shifttype32i16 %a , %b
+  ret %shifttype32i16 %0
+}
+
+%shifttype2i32 = type <2 x i32>
+define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
+entry:
+  ; SSE2: shift2i32
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i32
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype2i32 %a , %b
+  ret %shifttype2i32 %0
+}
+
+%shifttype4i32 = type <4 x i32>
+define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
+entry:
+  ; SSE2: shift4i32
+  ; SSE2: cost of 10 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i32
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype4i32 %a , %b
+  ret %shifttype4i32 %0
+}
+
+%shifttype8i32 = type <8 x i32>
+define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
+entry:
+  ; SSE2: shift8i32
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i32
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype8i32 %a , %b
+  ret %shifttype8i32 %0
+}
+
+%shifttype16i32 = type <16 x i32>
+define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
+entry:
+  ; SSE2: shift16i32
+  ; SSE2: cost of 40 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i32
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype16i32 %a , %b
+  ret %shifttype16i32 %0
+}
+
+%shifttype32i32 = type <32 x i32>
+define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
+entry:
+  ; SSE2: shift32i32
+  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i32
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype32i32 %a , %b
+  ret %shifttype32i32 %0
+}
+
+%shifttype2i64 = type <2 x i64>
+define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
+entry:
+  ; SSE2: shift2i64
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype2i64 %a , %b
+  ret %shifttype2i64 %0
+}
+
+%shifttype4i64 = type <4 x i64>
+define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
+entry:
+  ; SSE2: shift4i64
+  ; SSE2: cost of 40 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype4i64 %a , %b
+  ret %shifttype4i64 %0
+}
+
+%shifttype8i64 = type <8 x i64>
+define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
+entry:
+  ; SSE2: shift8i64
+  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype8i64 %a , %b
+  ret %shifttype8i64 %0
+}
+
+%shifttype16i64 = type <16 x i64>
+define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
+entry:
+  ; SSE2: shift16i64
+  ; SSE2: cost of 160 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype16i64 %a , %b
+  ret %shifttype16i64 %0
+}
+
+%shifttype32i64 = type <32 x i64>
+define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
+entry:
+  ; SSE2: shift32i64
+  ; SSE2: cost of 320 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype32i64 %a , %b
+  ret %shifttype32i64 %0
+}
+
+%shifttype2i8 = type <2 x i8>
+define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
+entry:
+  ; SSE2: shift2i8
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i8
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype2i8 %a , %b
+  ret %shifttype2i8 %0
+}
+
+%shifttype4i8 = type <4 x i8>
+define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
+entry:
+  ; SSE2: shift4i8
+  ; SSE2: cost of 10 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i8
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype4i8 %a , %b
+  ret %shifttype4i8 %0
+}
+
+%shifttype8i8 = type <8 x i8>
+define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
+entry:
+  ; SSE2: shift8i8
+  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i8
+  ; SSE2-CODEGEN: shll
+
+  %0 = shl %shifttype8i8 %a , %b
+  ret %shifttype8i8 %0
+}
+
+%shifttype16i8 = type <16 x i8>
+define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
+entry:
+  ; SSE2: shift16i8
+  ; SSE2: cost of 30 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i8
+  ; SSE2-CODEGEN: cmpeqb
+
+  %0 = shl %shifttype16i8 %a , %b
+  ret %shifttype16i8 %0
+}
+
+%shifttype32i8 = type <32 x i8>
+define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
+entry:
+  ; SSE2: shift32i8
+  ; SSE2: cost of 60 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i8
+  ; SSE2-CODEGEN: cmpeqb
+
+  %0 = shl %shifttype32i8 %a , %b
+  ret %shifttype32i8 %0
+}
+
+; Test shift by a constant vector.
+
+%shifttypec = type <2 x i16>
+define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
+entry:
+  ; SSE2: shift2i16const
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i16const
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec %a , <i16 3, i16 3>
+  ret %shifttypec %0
+}
+
+%shifttypec4i16 = type <4 x i16>
+define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
+entry:
+  ; SSE2: shift4i16const
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i16const
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec4i16 %0
+}
+
+%shifttypec8i16 = type <8 x i16>
+define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
+entry:
+  ; SSE2: shift8i16const
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i16const
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                  i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec8i16 %0
+}
+
+%shifttypec16i16 = type <16 x i16>
+define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
+                                         %shifttypec16i16 %b) {
+entry:
+  ; SSE2: shift16i16const
+  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i16const
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec16i16 %0
+}
+
+%shifttypec32i16 = type <32 x i16>
+define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
+                                        %shifttypec32i16 %b) {
+entry:
+  ; SSE2: shift32i16const
+  ; SSE2: cost of 4 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i16const
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec32i16 %0
+}
+
+%shifttypec2i32 = type <2 x i32>
+define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
+entry:
+  ; SSE2: shift2i32c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i32c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec2i32 %a , <i32 3, i32 3>
+  ret %shifttypec2i32 %0
+}
+
+%shifttypec4i32 = type <4 x i32>
+define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
+entry:
+  ; SSE2: shift4i32c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i32c
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec4i32 %0
+}
+
+%shifttypec8i32 = type <8 x i32>
+define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
+entry:
+  ; SSE2: shift8i32c
+  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i32c
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                  i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec8i32 %0
+}
+
+%shifttypec16i32 = type <16 x i32>
+define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
+entry:
+  ; SSE2: shift16i32c
+  ; SSE2: cost of 4 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i32c
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec16i32 %0
+}
+
+%shifttypec32i32 = type <32 x i32>
+define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
+entry:
+  ; SSE2: shift32i32c
+  ; SSE2: cost of 8 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i32c
+  ; SSE2-CODEGEN: pslld $3
+  %0 = shl %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec32i32 %0
+}
+
+%shifttypec2i64 = type <2 x i64>
+define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
+entry:
+  ; SSE2: shift2i64c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i64c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec2i64 %a , <i64 3, i64 3>
+  ret %shifttypec2i64 %0
+}
+
+%shifttypec4i64 = type <4 x i64>
+define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
+entry:
+  ; SSE2: shift4i64c
+  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i64c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec4i64 %0
+}
+
+%shifttypec8i64 = type <8 x i64>
+define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
+entry:
+  ; SSE2: shift8i64c
+  ; SSE2: cost of 4 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i64c
+  ; SSE2-CODEGEN: psllq $3
+
+ %0 = shl %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                 i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec8i64 %0
+}
+
+%shifttypec16i64 = type <16 x i64>
+define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
+entry:
+  ; SSE2: shift16i64c
+  ; SSE2: cost of 8 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i64c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec16i64 %0
+}
+
+%shifttypec32i64 = type <32 x i64>
+define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
+entry:
+  ; SSE2: shift32i64c
+  ; SSE2: cost of 16 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i64c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec32i64 %0
+}
+
+%shifttypec2i8 = type <2 x i8>
+define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
+entry:
+  ; SSE2: shift2i8c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i8c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec2i8 %a , <i8 3, i8 3>
+  ret %shifttypec2i8 %0
+}
+
+%shifttypec4i8 = type <4 x i8>
+define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
+entry:
+  ; SSE2: shift4i8c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i8c
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec4i8 %0
+}
+
+%shifttypec8i8 = type <8 x i8>
+define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
+entry:
+  ; SSE2: shift8i8c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i8c
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                 i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec8i8 %0
+}
+
+%shifttypec16i8 = type <16 x i8>
+define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
+entry:
+  ; SSE2: shift16i8c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i8c
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec16i8 %0
+}
+
+%shifttypec32i8 = type <32 x i8>
+define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
+entry:
+  ; SSE2: shift32i8c
+  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i8c
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec32i8 %0
+}
diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll
new file mode 100644
index 000000000000..a41a04df84ec
--- /dev/null
+++ b/test/Analysis/CostModel/X86/uitofp.ll
@@ -0,0 +1,368 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+; In X86TargetTransformInfo::getCastInstrCost we have code that depends on
+; getSimpleVT on a value type. On AVX2 we execute this code. Make sure we exit
+; early if the type is not a simple value type before we call this function.
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s
+
+define <2 x double> @uitofpv2i8v2double(<2 x i8> %a) {
+  ; SSE2: uitofpv2i8v2double
+  ; SSE2: cost of 20 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv2i8v2double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <2 x i8> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @uitofpv4i8v4double(<4 x i8> %a) {
+  ; SSE2: uitofpv4i8v4double
+  ; SSE2: cost of 40 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv4i8v4double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <4 x i8> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @uitofpv8i8v8double(<8 x i8> %a) {
+  ; SSE2: uitofpv8i8v8double
+  ; SSE2: cost of 80 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv8i8v8double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+%1 = uitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @uitofpv16i8v16double(<16 x i8> %a) {
+  ; SSE2: uitofpv16i8v16double
+  ; SSE2: cost of 160 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv16i8v16double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <16 x i8> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @uitofpv32i8v32double(<32 x i8> %a) {
+  ; SSE2: uitofpv32i8v32double
+  ; SSE2: cost of 320 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv32i8v32double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <32 x i8> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @uitofpv2i16v2double(<2 x i16> %a) {
+  ; SSE2: uitofpv2i16v2double
+  ; SSE2: cost of 20 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv2i16v2double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <2 x i16> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @uitofpv4i16v4double(<4 x i16> %a) {
+  ; SSE2: uitofpv4i16v4double
+  ; SSE2: cost of 40 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv4i16v4double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <4 x i16> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @uitofpv8i16v8double(<8 x i16> %a) {
+  ; SSE2: uitofpv8i16v8double
+  ; SSE2: cost of 80 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv8i16v8double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @uitofpv16i16v16double(<16 x i16> %a) {
+  ; SSE2: uitofpv16i16v16double
+  ; SSE2: cost of 160 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv16i16v16double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <16 x i16> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @uitofpv32i16v32double(<32 x i16> %a) {
+  ; SSE2: uitofpv32i16v32double
+  ; SSE2: cost of 320 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv32i16v32double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <32 x i16> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @uitofpv2i32v2double(<2 x i32> %a) {
+  ; SSE2: uitofpv2i32v2double
+  ; SSE2: cost of 20 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv2i32v2double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <2 x i32> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @uitofpv4i32v4double(<4 x i32> %a) {
+  ; SSE2: uitofpv4i32v4double
+  ; SSE2: cost of 40 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv4i32v4double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <4 x i32> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @uitofpv8i32v8double(<8 x i32> %a) {
+  ; SSE2: uitofpv8i32v8double
+  ; SSE2: cost of 80 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv8i32v8double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <8 x i32> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @uitofpv16i32v16double(<16 x i32> %a) {
+  ; SSE2: uitofpv16i32v16double
+  ; SSE2: cost of 160 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv16i32v16double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <16 x i32> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @uitofpv32i32v32double(<32 x i32> %a) {
+  ; SSE2: uitofpv32i32v32double
+  ; SSE2: cost of 320 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv32i32v32double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <32 x i32> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @uitofpv2i64v2double(<2 x i64> %a) {
+  ; SSE2: uitofpv2i64v2double
+  ; SSE2: cost of 20 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv2i64v2double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @uitofpv4i64v4double(<4 x i64> %a) {
+  ; SSE2: uitofpv4i64v4double
+  ; SSE2: cost of 40 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv4i64v4double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <4 x i64> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @uitofpv8i64v8double(<8 x i64> %a) {
+  %1 = uitofp <8 x i64> %a to <8 x double>
+  ; SSE2: uitofpv8i64v8double
+  ; SSE2: cost of 80 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv8i64v8double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  ret <8 x double> %1
+}
+
+define <16 x double> @uitofpv16i64v16double(<16 x i64> %a) {
+  ; SSE2: uitofpv16i64v16double
+  ; SSE2: cost of 160 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv16i64v16double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <16 x i64> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @uitofpv32i64v32double(<32 x i64> %a) {
+  ; SSE2: uitofpv32i64v32double
+  ; SSE2: cost of 320 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv32i64v32double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <32 x i64> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x float> @uitofpv2i8v2float(<2 x i8> %a) {
+  ; SSE2: uitofpv2i8v2float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <2 x i8> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @uitofpv4i8v4float(<4 x i8> %a) {
+  ; SSE2: uitofpv4i8v4float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <4 x i8> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @uitofpv8i8v8float(<8 x i8> %a) {
+  ; SSE2: uitofpv8i8v8float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <8 x i8> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @uitofpv16i8v16float(<16 x i8> %a) {
+  ; SSE2: uitofpv16i8v16float
+  ; SSE2: cost of 8 {{.*}} uitofp
+  %1 = uitofp <16 x i8> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @uitofpv32i8v32float(<32 x i8> %a) {
+  ; SSE2: uitofpv32i8v32float
+  ; SSE2: cost of 16 {{.*}} uitofp
+  %1 = uitofp <32 x i8> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @uitofpv2i16v2float(<2 x i16> %a) {
+  ; SSE2: uitofpv2i16v2float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <2 x i16> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @uitofpv4i16v4float(<4 x i16> %a) {
+  ; SSE2: uitofpv4i16v4float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <4 x i16> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @uitofpv8i16v8float(<8 x i16> %a) {
+  ; SSE2: uitofpv8i16v8float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <8 x i16> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @uitofpv16i16v16float(<16 x i16> %a) {
+  ; SSE2: uitofpv16i16v16float
+  ; SSE2: cost of 30 {{.*}} uitofp
+  %1 = uitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @uitofpv32i16v32float(<32 x i16> %a) {
+  ; SSE2: uitofpv32i16v32float
+  ; SSE2: cost of 60 {{.*}} uitofp
+  %1 = uitofp <32 x i16> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @uitofpv2i32v2float(<2 x i32> %a) {
+  ; SSE2: uitofpv2i32v2float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @uitofpv4i32v4float(<4 x i32> %a) {
+  ; SSE2: uitofpv4i32v4float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @uitofpv8i32v8float(<8 x i32> %a) {
+  ; SSE2: uitofpv8i32v8float
+  ; SSE2: cost of 30 {{.*}} uitofp
+  %1 = uitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) {
+  ; SSE2: uitofpv16i32v16float
+  ; SSE2: cost of 60 {{.*}} uitofp
+  %1 = uitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) {
+  ; SSE2: uitofpv32i32v32float
+  ; SSE2: cost of 120 {{.*}} uitofp
+  %1 = uitofp <32 x i32> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @uitofpv2i64v2float(<2 x i64> %a) {
+  ; SSE2: uitofpv2i64v2float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <2 x i64> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @uitofpv4i64v4float(<4 x i64> %a) {
+  ; SSE2: uitofpv4i64v4float
+  ; SSE2: cost of 30 {{.*}} uitofp
+  %1 = uitofp <4 x i64> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @uitofpv8i64v8float(<8 x i64> %a) {
+  ; SSE2: uitofpv8i64v8float
+  ; SSE2: cost of 60 {{.*}} uitofp
+  %1 = uitofp <8 x i64> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @uitofpv16i64v16float(<16 x i64> %a) {
+  ; SSE2: uitofpv16i64v16float
+  ; SSE2: cost of 120 {{.*}} uitofp
+  %1 = uitofp <16 x i64> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @uitofpv32i64v32float(<32 x i64> %a) {
+  ; SSE2: uitofpv32i64v32float
+  ; SSE2: cost of 240 {{.*}} uitofp
+  %1 = uitofp <32 x i64> %a to <32 x float>
+  ret <32 x float> %1
+}
+
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
index d507d3fd71a4..444b7dc27410 100644
--- a/test/Analysis/Profiling/lit.local.cfg
+++ b/test/Analysis/Profiling/lit.local.cfg
@@ -11,3 +11,6 @@ root = getRoot(config)
 # doesn't have any JIT at present so they will fail when run there.
 if root.host_arch in ['AArch64']:
     config.unsupported = True
+
+if 'hexagon' in root.target_triple:
+    config.unsupported = True
diff --git a/test/Analysis/ScalarEvolution/scev-invalid.ll b/test/Analysis/ScalarEvolution/scev-invalid.ll
new file mode 100644
index 000000000000..aac0d319ae84
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/scev-invalid.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -S -indvars -loop-unroll | FileCheck %s
+;
+; PR15570: SEGV: SCEV back-edge info invalid after dead code removal.
+;
+; Indvars creates a SCEV expression for the loop's back edge taken
+; count, then determines that the comparison is always true and
+; removes it.
+;
+; When loop-unroll asks for the expression, it contains a NULL
+; SCEVUnknkown (as a CallbackVH).
+;
+; forgetMemoizedResults should invalidate the backedge taken count expression.
+
+; CHECK: @test
+; CHECK-NOT: phi
+; CHECK-NOT: icmp
+; CHECK: ret void
+define void @test() {
+entry:
+  %xor1 = xor i32 0, 1
+  br label %b17
+
+b17:
+  br i1 undef, label %b22, label %b18
+
+b18:
+  %phi1 = phi i32 [ %add1, %b18 ], [ %xor1, %b17 ]
+  %add1 = add nsw i32 %phi1, -1
+  %cmp1 = icmp sgt i32 %add1, 0
+  br i1 %cmp1, label %b18, label %b22
+
+b22:
+  ret void
+}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
new file mode 100644
index 000000000000..f1edb4482cf1
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; Generated with "clang -cc1 -disable-llvm-optzns -O1 -emit-llvm"
+; #include <new>
+; struct Foo { long i; };
+; struct Bar { void *p; };
+; long foo(int n) {
+;   Foo *f = new Foo;
+;   f->i = 1;
+;   for (int i=0; i<n; ++i) {
+;     Bar *b = new (f) Bar;
+;     b->p = 0;
+;     f = new (f) Foo;
+;     f->i = i;
+;   }
+;   return f->i;
+; }
+
+; Basic AA says MayAlias, TBAA says NoAlias
+; CHECK: MayAlias: i64* %i5, i8** %p
+; CHECK: NoAlias: store i64 %conv, i64* %i5, align 8, !tbaa !4 <->   store i8* null, i8** %p, align 8, !tbaa !3
+
+%struct.Foo = type { i64 }
+%struct.Bar = type { i8* }
+
+define i64 @_Z3fooi(i32 %n) #0 {
+entry:
+  %n.addr = alloca i32, align 4
+  %f = alloca %struct.Foo*, align 8
+  %i1 = alloca i32, align 4
+  %b = alloca %struct.Bar*, align 8
+  store i32 %n, i32* %n.addr, align 4, !tbaa !0
+  %call = call noalias i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.Foo*
+  store %struct.Foo* %0, %struct.Foo** %f, align 8, !tbaa !3
+  %1 = load %struct.Foo** %f, align 8, !tbaa !3
+  %i = getelementptr inbounds %struct.Foo* %1, i32 0, i32 0
+  store i64 1, i64* %i, align 8, !tbaa !4
+  store i32 0, i32* %i1, align 4, !tbaa !0
+  br label %for.cond
+
+for.cond:
+  %2 = load i32* %i1, align 4, !tbaa !0
+  %3 = load i32* %n.addr, align 4, !tbaa !0
+  %cmp = icmp slt i32 %2, %3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %4 = load %struct.Foo** %f, align 8, !tbaa !3
+  %5 = bitcast %struct.Foo* %4 to i8*
+  %new.isnull = icmp eq i8* %5, null
+  br i1 %new.isnull, label %new.cont, label %new.notnull
+
+new.notnull:
+  %6 = bitcast i8* %5 to %struct.Bar*
+  br label %new.cont
+
+new.cont:
+  %7 = phi %struct.Bar* [ %6, %new.notnull ], [ null, %for.body ]
+  store %struct.Bar* %7, %struct.Bar** %b, align 8, !tbaa !3
+  %8 = load %struct.Bar** %b, align 8, !tbaa !3
+  %p = getelementptr inbounds %struct.Bar* %8, i32 0, i32 0
+  store i8* null, i8** %p, align 8, !tbaa !3
+  %9 = load %struct.Foo** %f, align 8, !tbaa !3
+  %10 = bitcast %struct.Foo* %9 to i8*
+  %new.isnull2 = icmp eq i8* %10, null
+  br i1 %new.isnull2, label %new.cont4, label %new.notnull3
+
+new.notnull3:
+  %11 = bitcast i8* %10 to %struct.Foo*
+  br label %new.cont4
+
+new.cont4:
+  %12 = phi %struct.Foo* [ %11, %new.notnull3 ], [ null, %new.cont ]
+  store %struct.Foo* %12, %struct.Foo** %f, align 8, !tbaa !3
+  %13 = load i32* %i1, align 4, !tbaa !0
+  %conv = sext i32 %13 to i64
+  %14 = load %struct.Foo** %f, align 8, !tbaa !3
+  %i5 = getelementptr inbounds %struct.Foo* %14, i32 0, i32 0
+  store i64 %conv, i64* %i5, align 8, !tbaa !4
+  br label %for.inc
+
+for.inc:
+  %15 = load i32* %i1, align 4, !tbaa !0
+  %inc = add nsw i32 %15, 1
+  store i32 %inc, i32* %i1, align 4, !tbaa !0
+  br label %for.cond
+
+for.end:
+  %16 = load %struct.Foo** %f, align 8, !tbaa !3
+  %i6 = getelementptr inbounds %struct.Foo* %16, i32 0, i32 0
+  %17 = load i64* %i6, align 8, !tbaa !4
+  ret i64 %17
+}
+
+declare noalias i8* @_Znwm(i64)
+
+attributes #0 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !"long", metadata !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
new file mode 100644
index 000000000000..c63abca38853
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
@@ -0,0 +1,392 @@
+; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -gvn -S | FileCheck %s --check-prefix=OPT
+; Generated from clang/test/CodeGen/tbaa.cpp with "-O1 -struct-path-tbaa -disable-llvm-optzns".
+
+%struct.StructA = type { i16, i32, i16, i32 }
+%struct.StructB = type { i16, %struct.StructA, i32 }
+%struct.StructS = type { i16, i32 }
+%struct.StructS2 = type { i16, i32 }
+%struct.StructC = type { i16, %struct.StructB, i32 }
+%struct.StructD = type { i16, %struct.StructB, i32, i8 }
+
+define i32 @_Z1gPjP7StructAy(i32* %s, %struct.StructA* %A, i64 %count) #0 {
+entry:
+; Access to i32* and &(A->f32).
+; CHECK: Function
+; CHECK: MayAlias:   store i32 4, i32* %f32, align 4, !tbaa !8 <->   store i32 1, i32* %0, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; OPT: %[[RET:.*]] = load i32*
+; OPT: ret i32 %[[RET]]
+  %s.addr = alloca i32*, align 8
+  %A.addr = alloca %struct.StructA*, align 8
+  %count.addr = alloca i64, align 8
+  store i32* %s, i32** %s.addr, align 8, !tbaa !0
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load i32** %s.addr, align 8, !tbaa !0
+  store i32 1, i32* %0, align 4, !tbaa !6
+  %1 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %1, i32 0, i32 1
+  store i32 4, i32* %f32, align 4, !tbaa !8
+  %2 = load i32** %s.addr, align 8, !tbaa !0
+  %3 = load i32* %2, align 4, !tbaa !6
+  ret i32 %3
+}
+
+define i32 @_Z2g2PjP7StructAy(i32* %s, %struct.StructA* %A, i64 %count) #0 {
+entry:
+; Access to i32* and &(A->f16).
+; CHECK: Function
+; CHECK: NoAlias:   store i16 4, i16* %f16, align 2, !tbaa !8 <->   store i32 1, i32* %0, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i16 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %s.addr = alloca i32*, align 8
+  %A.addr = alloca %struct.StructA*, align 8
+  %count.addr = alloca i64, align 8
+  store i32* %s, i32** %s.addr, align 8, !tbaa !0
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load i32** %s.addr, align 8, !tbaa !0
+  store i32 1, i32* %0, align 4, !tbaa !6
+  %1 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f16 = getelementptr inbounds %struct.StructA* %1, i32 0, i32 0
+  store i16 4, i16* %f16, align 2, !tbaa !11
+  %2 = load i32** %s.addr, align 8, !tbaa !0
+  %3 = load i32* %2, align 4, !tbaa !6
+  ret i32 %3
+}
+
+define i32 @_Z2g3P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(B->a.f32).
+; CHECK: Function
+; CHECK: MayAlias:   store i32 4, i32* %f321, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; OPT: %[[RET:.*]] = load i32*
+; OPT: ret i32 %[[RET]]
+  %A.addr = alloca %struct.StructA*, align 8
+  %B.addr = alloca %struct.StructB*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1
+  %f321 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1
+  store i32 4, i32* %f321, align 4, !tbaa !12
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f322, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g4P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(B->a.f16).
+; CHECK: Function
+; CHECK: NoAlias:   store i16 4, i16* %f16, align 2, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i16 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %B.addr = alloca %struct.StructB*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1
+  %f16 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 0
+  store i16 4, i16* %f16, align 2, !tbaa !14
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f321, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g5P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(B->f32).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f321, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %B.addr = alloca %struct.StructB*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructB* %1, i32 0, i32 2
+  store i32 4, i32* %f321, align 4, !tbaa !15
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f322, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g6P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(B->a.f32_2).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f32_2, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %B.addr = alloca %struct.StructB*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1
+  %f32_2 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 3
+  store i32 4, i32* %f32_2, align 4, !tbaa !16
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f321, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g7P7StructAP7StructSy(%struct.StructA* %A, %struct.StructS* %S, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(S->f32).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f321, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %S.addr = alloca %struct.StructS*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructS* %1, i32 0, i32 1
+  store i32 4, i32* %f321, align 4, !tbaa !17
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f322, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g8P7StructAP7StructSy(%struct.StructA* %A, %struct.StructS* %S, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(S->f16).
+; CHECK: Function
+; CHECK: NoAlias:   store i16 4, i16* %f16, align 2, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i16 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %S.addr = alloca %struct.StructS*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f16 = getelementptr inbounds %struct.StructS* %1, i32 0, i32 0
+  store i16 4, i16* %f16, align 2, !tbaa !19
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f321, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g9P7StructSP8StructS2y(%struct.StructS* %S, %struct.StructS2* %S2, i64 %count) #0 {
+entry:
+; Access to &(S->f32) and &(S2->f32).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f321, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %S.addr = alloca %struct.StructS*, align 8
+  %S2.addr = alloca %struct.StructS2*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
+  store %struct.StructS2* %S2, %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructS* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !17
+  %1 = load %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructS2* %1, i32 0, i32 1
+  store i32 4, i32* %f321, align 4, !tbaa !20
+  %2 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f322 = getelementptr inbounds %struct.StructS* %2, i32 0, i32 1
+  %3 = load i32* %f322, align 4, !tbaa !17
+  ret i32 %3
+}
+
+define i32 @_Z3g10P7StructSP8StructS2y(%struct.StructS* %S, %struct.StructS2* %S2, i64 %count) #0 {
+entry:
+; Access to &(S->f32) and &(S2->f16).
+; CHECK: Function
+; CHECK: NoAlias:   store i16 4, i16* %f16, align 2, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i16 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %S.addr = alloca %struct.StructS*, align 8
+  %S2.addr = alloca %struct.StructS2*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
+  store %struct.StructS2* %S2, %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructS* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !17
+  %1 = load %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  %f16 = getelementptr inbounds %struct.StructS2* %1, i32 0, i32 0
+  store i16 4, i16* %f16, align 2, !tbaa !22
+  %2 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructS* %2, i32 0, i32 1
+  %3 = load i32* %f321, align 4, !tbaa !17
+  ret i32 %3
+}
+
+define i32 @_Z3g11P7StructCP7StructDy(%struct.StructC* %C, %struct.StructD* %D, i64 %count) #0 {
+entry:
+; Access to &(C->b.a.f32) and &(D->b.a.f32).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f323, align 4, !tbaa !12 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %C.addr = alloca %struct.StructC*, align 8
+  %D.addr = alloca %struct.StructD*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructC* %C, %struct.StructC** %C.addr, align 8, !tbaa !0
+  store %struct.StructD* %D, %struct.StructD** %D.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %b = getelementptr inbounds %struct.StructC* %0, i32 0, i32 1
+  %a = getelementptr inbounds %struct.StructB* %b, i32 0, i32 1
+  %f32 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !23
+  %1 = load %struct.StructD** %D.addr, align 8, !tbaa !0
+  %b1 = getelementptr inbounds %struct.StructD* %1, i32 0, i32 1
+  %a2 = getelementptr inbounds %struct.StructB* %b1, i32 0, i32 1
+  %f323 = getelementptr inbounds %struct.StructA* %a2, i32 0, i32 1
+  store i32 4, i32* %f323, align 4, !tbaa !25
+  %2 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %b4 = getelementptr inbounds %struct.StructC* %2, i32 0, i32 1
+  %a5 = getelementptr inbounds %struct.StructB* %b4, i32 0, i32 1
+  %f326 = getelementptr inbounds %struct.StructA* %a5, i32 0, i32 1
+  %3 = load i32* %f326, align 4, !tbaa !23
+  ret i32 %3
+}
+
+define i32 @_Z3g12P7StructCP7StructDy(%struct.StructC* %C, %struct.StructD* %D, i64 %count) #0 {
+entry:
+; Access to &(b1->a.f32) and &(b2->a.f32).
+; CHECK: Function
+; CHECK: MayAlias:   store i32 4, i32* %f325, align 4, !tbaa !6 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; OPT: %[[RET:.*]] = load i32*
+; OPT: ret i32 %[[RET]]
+  %C.addr = alloca %struct.StructC*, align 8
+  %D.addr = alloca %struct.StructD*, align 8
+  %count.addr = alloca i64, align 8
+  %b1 = alloca %struct.StructB*, align 8
+  %b2 = alloca %struct.StructB*, align 8
+  store %struct.StructC* %C, %struct.StructC** %C.addr, align 8, !tbaa !0
+  store %struct.StructD* %D, %struct.StructD** %D.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %b = getelementptr inbounds %struct.StructC* %0, i32 0, i32 1
+  store %struct.StructB* %b, %struct.StructB** %b1, align 8, !tbaa !0
+  %1 = load %struct.StructD** %D.addr, align 8, !tbaa !0
+  %b3 = getelementptr inbounds %struct.StructD* %1, i32 0, i32 1
+  store %struct.StructB* %b3, %struct.StructB** %b2, align 8, !tbaa !0
+  %2 = load %struct.StructB** %b1, align 8, !tbaa !0
+  %a = getelementptr inbounds %struct.StructB* %2, i32 0, i32 1
+  %f32 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !12
+  %3 = load %struct.StructB** %b2, align 8, !tbaa !0
+  %a4 = getelementptr inbounds %struct.StructB* %3, i32 0, i32 1
+  %f325 = getelementptr inbounds %struct.StructA* %a4, i32 0, i32 1
+  store i32 4, i32* %f325, align 4, !tbaa !12
+  %4 = load %struct.StructB** %b1, align 8, !tbaa !0
+  %a6 = getelementptr inbounds %struct.StructB* %4, i32 0, i32 1
+  %f327 = getelementptr inbounds %struct.StructA* %a6, i32 0, i32 1
+  %5 = load i32* %f327, align 4, !tbaa !12
+  ret i32 %5
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !1, metadata !1, i64 0}
+!1 = metadata !{metadata !"any pointer", i64 0, metadata !2}
+!2 = metadata !{metadata !"omnipotent char", i64 0, metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA"}
+!4 = metadata !{metadata !5, metadata !5, i64 0}
+!5 = metadata !{metadata !"long long", i64 0, metadata !2}
+!6 = metadata !{metadata !7, metadata !7, i64 0}
+!7 = metadata !{metadata !"int", i64 0, metadata !2}
+!8 = metadata !{metadata !9, metadata !7, i64 4}
+!9 = metadata !{metadata !"_ZTS7StructA", i64 0, metadata !10, i64 4, metadata !7, i64 8, metadata !10, i64 12, metadata !7}
+!10 = metadata !{metadata !"short", i64 0, metadata !2}
+!11 = metadata !{metadata !9, metadata !10, i64 0}
+!12 = metadata !{metadata !13, metadata !7, i64 8}
+!13 = metadata !{metadata !"_ZTS7StructB", i64 0, metadata !10, i64 4, metadata !9, i64 20, metadata !7}
+!14 = metadata !{metadata !13, metadata !10, i64 4}
+!15 = metadata !{metadata !13, metadata !7, i64 20}
+!16 = metadata !{metadata !13, metadata !7, i64 16}
+!17 = metadata !{metadata !18, metadata !7, i64 4}
+!18 = metadata !{metadata !"_ZTS7StructS", i64 0, metadata !10, i64 4, metadata !7}
+!19 = metadata !{metadata !18, metadata !10, i64 0}
+!20 = metadata !{metadata !21, metadata !7, i64 4}
+!21 = metadata !{metadata !"_ZTS8StructS2", i64 0, metadata !10, i64 4, metadata !7}
+!22 = metadata !{metadata !21, metadata !10, i64 0}
+!23 = metadata !{metadata !24, metadata !7, i64 12}
+!24 = metadata !{metadata !"_ZTS7StructC", i64 0, metadata !10, i64 4, metadata !13, i64 28, metadata !7}
+!25 = metadata !{metadata !26, metadata !7, i64 12}
+!26 = metadata !{metadata !"_ZTS7StructD", i64 0, metadata !10, i64 4, metadata !13, i64 28, metadata !7, i64 32, metadata !2}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8faec8be47f9..728213f6130a 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -29,7 +29,7 @@ set(LLVM_TEST_DEPENDS UnitTests
           macho-dump opt
           profile_rt-shared
           FileCheck count not
-          yaml2obj)
+          yaml2obj obj2yaml)
 
 # If Intel JIT events are supported, depend on a tool that tests the listener.
 if( LLVM_USE_INTEL_JITEVENTS )
diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll
index c33b442624a5..cf411166a3a0 100644
--- a/test/CodeGen/AArch64/adrp-relocation.ll
+++ b/test/CodeGen/AArch64/adrp-relocation.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | elf-dump | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | llvm-readobj -s -r | FileCheck %s
 
 define i64 @testfn() nounwind {
 entry:
@@ -19,17 +19,9 @@ entry:
 ; relative offsets of testfn and foo) because its value depends on where this
 ; object file's .text section gets relocated in memory.
 
-; CHECK: .rela.text
-
-; CHECK: # Relocation 0
-; CHECK-NEXT: (('r_offset', 0x0000000000000010)
-; CHECK-NEXT:  ('r_sym', 0x00000007)
-; CHECK-NEXT:  ('r_type', 0x00000113)
-; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-; CHECK-NEXT: ),
-; CHECK-NEXT:  Relocation 1
-; CHECK-NEXT: (('r_offset', 0x0000000000000014)
-; CHECK-NEXT:  ('r_sym', 0x00000007)
-; CHECK-NEXT:  ('r_type', 0x00000115)
-; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-; CHECK-NEXT: ),
+; CHECK:      Relocations [
+; CHECK-NEXT:   Section (1) .text {
+; CHECK-NEXT:     0x10 R_AARCH64_ADR_PREL_PG_HI21 testfn 0x0
+; CHECK-NEXT:     0x14 R_AARCH64_ADD_ABS_LO12_NC testfn 0x0
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index 64217695d810..c62edf6503c6 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -71,8 +71,8 @@ define void @test_variadic_alloca(i64 %n, ...) {
 ; CHECK: sub     sp, sp, #208
 ; CHECK: stp     x29, x30, [sp, #192]
 ; CHECK: add     x29, sp, #192
-; CHECK: sub     x9, x29, #192
-; CHECK: add     x8, x9, #0
+; CHECK: sub     [[TMP:x[0-9]+]], x29, #192
+; CHECK: add     x8, [[TMP]], #0
 ; CHECK: str     q7, [x8, #112]
 ; [...]
 ; CHECK: str     q1, [x8, #16]
@@ -131,4 +131,4 @@ define void @test_scoped_alloca(i64 %n) {
 ; CHECK: mov sp, [[SAVED_SP]]
 
   ret void
-}
\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
index 3c03e47147b0..9888a742e32b 100644
--- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
 
 define i32 @foo(i32* %var, i1 %cond) {
 ; CHECK: foo:
@@ -9,7 +9,9 @@ simple_ver:
   store i32 %newval, i32* %var
   br label %somewhere
 atomic_ver:
-  %val = atomicrmw add i32* %var, i32 -1 seq_cst
+  fence seq_cst
+  %val = atomicrmw add i32* %var, i32 -1 monotonic
+  fence seq_cst
   br label %somewhere
 ; CHECK: dmb
 ; CHECK: ldxr
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index f3c16171cc83..5e87f21a217d 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -8,18 +8,18 @@
 define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_add_i8:
    %old = atomicrmw add i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -27,19 +27,19 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_add_i16:
-   %old = atomicrmw add i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw add i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -47,8 +47,8 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_add_i32:
-   %old = atomicrmw add i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw add i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -57,9 +57,9 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -67,8 +67,8 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_add_i64:
-   %old = atomicrmw add i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw add i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -79,7 +79,7 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -87,8 +87,8 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_sub_i8:
-   %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw sub i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
@@ -99,7 +99,7 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -107,8 +107,8 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_sub_i16:
-   %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw sub i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -117,9 +117,9 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -127,19 +127,19 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_sub_i32:
-   %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw sub i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -148,18 +148,18 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_sub_i64:
    %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -167,8 +167,8 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_and_i8:
-   %old = atomicrmw and i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw and i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
@@ -177,9 +177,9 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -187,8 +187,8 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_and_i16:
-   %old = atomicrmw and i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw and i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -199,7 +199,7 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -208,18 +208,18 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_and_i32:
    %old = atomicrmw and i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -227,19 +227,19 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_and_i64:
-   %old = atomicrmw and i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw and i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -248,18 +248,18 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_or_i8:
    %old = atomicrmw or i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -267,8 +267,8 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_or_i16:
-   %old = atomicrmw or i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw or i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -279,7 +279,7 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -287,19 +287,19 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_or_i32:
-   %old = atomicrmw or i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw or i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -307,8 +307,8 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_or_i64:
-   %old = atomicrmw or i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw or i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -317,9 +317,9 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -327,19 +327,19 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_xor_i8:
-   %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xor i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -347,8 +347,8 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_xor_i16:
-   %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xor i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -357,9 +357,9 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -368,18 +368,18 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_xor_i32:
    %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -387,8 +387,8 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_xor_i64:
-   %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xor i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -399,7 +399,7 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -407,8 +407,8 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_xchg_i8:
-   %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
@@ -418,7 +418,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -427,17 +427,17 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_xchg_i16:
    %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -445,8 +445,8 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_xchg_i32:
-   %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xchg i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -454,9 +454,9 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -464,18 +464,18 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_xchg_i64:
-   %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xchg i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -484,20 +484,20 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_min_i8:
-   %old = atomicrmw min i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw min i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -505,8 +505,8 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_min_i16:
-   %old = atomicrmw min i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw min i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -516,9 +516,9 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
-; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -526,8 +526,8 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_min_i32:
-   %old = atomicrmw min i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw min i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -539,7 +539,7 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -548,19 +548,19 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_min_i64:
    %old = atomicrmw min i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -569,19 +569,19 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_max_i8:
    %old = atomicrmw max i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -589,20 +589,20 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_max_i16:
-   %old = atomicrmw max i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw max i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -610,8 +610,8 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_max_i32:
-   %old = atomicrmw max i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw max i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -621,9 +621,9 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -631,8 +631,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_max_i64:
-   %old = atomicrmw max i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw max i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -644,7 +644,7 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -652,8 +652,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_umin_i8:
-   %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umin i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
@@ -665,7 +665,7 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -673,20 +673,20 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_umin_i16:
-   %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umin i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -695,19 +695,19 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_umin_i32:
    %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -715,20 +715,20 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_umin_i64:
-   %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -736,20 +736,20 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_umax_i8:
-   %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -757,8 +757,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_umax_i16:
-   %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umax i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -770,7 +770,7 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -779,19 +779,19 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_umax_i32:
    %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -799,8 +799,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_umax_i64:
-   %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umax i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -810,9 +810,9 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -820,13 +820,13 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK: test_atomic_cmpxchg_i8:
-   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst
-; CHECK: dmb ish
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
@@ -834,7 +834,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
   ; As above, w1 is a reasonable guess.
 ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -843,20 +843,20 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK: test_atomic_cmpxchg_i16:
    %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
-; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -864,8 +864,8 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK: test_atomic_cmpxchg_i32:
-   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst
-; CHECK: dmb ish
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -876,9 +876,9 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
-; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -886,8 +886,8 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 
 define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK: test_atomic_cmpxchg_i64:
-   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst
-; CHECK: dmb ish
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -900,7 +900,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
   ; As above, w1 is a reasonable guess.
 ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -933,19 +933,26 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
 define i8 @test_atomic_load_acquire_i8() nounwind {
 ; CHECK: test_atomic_load_acquire_i8:
   %val = load atomic i8* @var8 acquire, align 1
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK-NOT: dmb
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
-
+; CHECK-NOT: dmb
 ; CHECK: ldarb w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
   ret i8 %val
 }
 
 define i8 @test_atomic_load_seq_cst_i8() nounwind {
 ; CHECK: test_atomic_load_seq_cst_i8:
   %val = load atomic i8* @var8 seq_cst, align 1
-; CHECK: adrp x[[HIADDR:[0-9]+]], var8
-; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK-NOT: dmb
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK-NOT: dmb
+; CHECK: ldarb w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
   ret i8 %val
 }
 
@@ -954,6 +961,7 @@ define i16 @test_atomic_load_monotonic_i16() nounwind {
   %val = load atomic i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK-NOT: dmb
 ; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16]
 ; CHECK-NOT: dmb
 
@@ -976,9 +984,13 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
 define i64 @test_atomic_load_seq_cst_i64() nounwind {
 ; CHECK: test_atomic_load_seq_cst_i64:
   %val = load atomic i64* @var64 seq_cst, align 8
-; CHECK: adrp x[[HIADDR:[0-9]+]], var64
-; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
+; CHECK: adrp [[HIADDR:x[0-9]+]], var64
+; CHECK-NOT: dmb
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK-NOT: dmb
+; CHECK: ldar x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
   ret i64 %val
 }
 
@@ -1005,20 +1017,26 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val)
 define void @test_atomic_store_release_i8(i8 %val) nounwind {
 ; CHECK: test_atomic_store_release_i8:
   store atomic i8 %val, i8* @var8 release, align 1
+; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK-NOT: dmb
 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK-NOT: dmb
 ; CHECK: stlrb w0, [x[[ADDR]]]
-
+; CHECK-NOT: dmb
   ret void
 }
 
 define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
 ; CHECK: test_atomic_store_seq_cst_i8:
   store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK-NOT: dmb
 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK-NOT: dmb
 ; CHECK: stlrb w0, [x[[ADDR]]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
   ret void
 }
@@ -1026,9 +1044,11 @@ define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
 define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
 ; CHECK: test_atomic_store_monotonic_i16:
   store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK-NOT: dmb
 ; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16]
-
+; CHECK-NOT: dmb
   ret void
 }
 
@@ -1039,7 +1059,9 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va
   %addr = inttoptr i64 %addr_int to i32*
 
   store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
 ; CHECK: str w2, [x0, x1]
+; CHECK-NOT: dmb
 
   ret void
 }
@@ -1047,9 +1069,12 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va
 define void @test_atomic_store_release_i64(i64 %val) nounwind {
 ; CHECK: test_atomic_store_release_i64:
   store atomic i64 %val, i64* @var64 release, align 8
+; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
+; CHECK-NOT: dmb
 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK-NOT: dmb
 ; CHECK: stlr x0, [x[[ADDR]]]
-
+; CHECK-NOT: dmb
   ret void
 }
diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll
index ee89d8d94ba4..8bf1b2ff4fa9 100644
--- a/test/CodeGen/AArch64/elf-extern.ll
+++ b/test/CodeGen/AArch64/elf-extern.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
 
 ; External symbols are a different concept to global variables but should still
 ; get relocations and so on when used.
@@ -10,12 +10,8 @@ define i32 @check_extern() {
   ret i32 0
 }
 
-; CHECK: .rela.text
-; CHECK: ('r_sym', 0x00000009)
-; CHECK-NEXT: ('r_type', 0x0000011b)
-
-; CHECK: .symtab
-; CHECK: Symbol 9
-; CHECK-NEXT: memcpy
-
-
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_AARCH64_CALL26 memcpy
+; CHECK:   }
+; CHECK: ]
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index dcf9f4ed455c..d3299e1f7423 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s -check-prefix=CHECK-ELF
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
@@ -44,13 +44,15 @@ lbl4:
 ; ELF tests:
 
 ; First make sure we get a page/lo12 pair in .text to pick up the jump-table
-; CHECK-ELF: .rela.text
-; CHECK-ELF: ('r_sym', 0x00000008)
-; CHECK-ELF-NEXT: ('r_type', 0x00000113)
-; CHECK-ELF: ('r_sym', 0x00000008)
-; CHECK-ELF-NEXT: ('r_type', 0x00000115)
+
+; CHECK-ELF:      Relocations [
+; CHECK-ELF:        Section ({{[0-9]+}}) .text {
+; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 .rodata
+; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC .rodata
+; CHECK-ELF:        }
 
 ; Also check the targets in .rodata are relocated
-; CHECK-ELF: .rela.rodata
-; CHECK-ELF: ('r_sym', 0x00000005)
-; CHECK-ELF-NEXT: ('r_type', 0x00000101)
\ No newline at end of file
+; CHECK-ELF:        Section ({{[0-9]+}}) .rodata {
+; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ABS64 .text
+; CHECK-ELF:        }
+; CHECK-ELF:      ]
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 9269d6416b18..91a9903f3852 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -76,27 +76,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
 !2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
 !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
 !8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{null, metadata !12, metadata !13}
 !12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
 !13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 786478, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
 !18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
 !21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !22 = metadata !{metadata !13}
 !23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -104,7 +104,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 18, i32 0, metadata !28, null}
 !30 = metadata !{i32 20, i32 0, metadata !28, null}
 !31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -112,11 +112,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 11, i32 0, metadata !16, null}
 !35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 786443, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 786443, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 786443, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 786443, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !41 = metadata !{i32 24, i32 0, metadata !39, null}
 !42 = metadata !{i32 25, i32 0, metadata !39, null}
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
index 36d15757c314..b253fefe87c4 100644
--- a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
+++ b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
@@ -1,36 +1,47 @@
 ; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=BASIC %s 
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=BASIC %s 
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
 ; RUN:    -mattr=-neon,-vfp3,+vfp2 \
 ; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=CORTEXA8 %s
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=CORTEXA8 %s
 
 
 ; This tests that the extpected ARM attributes are emitted.
 ;
-; BASIC:        .ARM.attributes
-; BASIC-NEXT:         0x70000003
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x0000003c
-; BASIC-NEXT:         0x00000022
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000001
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         '41210000 00616561 62690001 17000000 060a0741 08010902 14011501 17031801 1901'
+; BASIC:        Section {
+; BASIC:          Name: .ARM.attributes
+; BASIC-NEXT:     Type: SHT_ARM_ATTRIBUTES
+; BASIC-NEXT:     Flags [ (0x0)
+; BASIC-NEXT:     ]
+; BASIC-NEXT:     Address: 0x0
+; BASIC-NEXT:     Offset: 0x3C
+; BASIC-NEXT:     Size: 34
+; BASIC-NEXT:     Link: 0
+; BASIC-NEXT:     Info: 0
+; BASIC-NEXT:     AddressAlignment: 1
+; BASIC-NEXT:     EntrySize: 0
+; BASIC-NEXT:     SectionData (
+; BASIC-NEXT:       0000: 41210000 00616561 62690001 17000000
+; BASIC-NEXT:       0010: 060A0741 08010902 14011501 17031801
+; BASIC-NEXT:       0020: 1901
+; BASIC-NEXT:     )
 
-; CORTEXA8:        .ARM.attributes
-; CORTEXA8-NEXT:         0x70000003
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x0000003c
-; CORTEXA8-NEXT:         0x0000002f
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000001
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         '412e0000 00616561 62690001 24000000 05434f52 5445582d 41380006 0a074108 0109020a 02140115 01170318 011901'
+; CORTEXA8:        Name: .ARM.attributes
+; CORTEXA8-NEXT:     Type: SHT_ARM_ATTRIBUTES
+; CORTEXA8-NEXT:     Flags [ (0x0)
+; CORTEXA8-NEXT:     ]
+; CORTEXA8-NEXT:     Address: 0x0
+; CORTEXA8-NEXT:     Offset: 0x3C
+; CORTEXA8-NEXT:     Size: 47
+; CORTEXA8-NEXT:     Link: 0
+; CORTEXA8-NEXT:     Info: 0
+; CORTEXA8-NEXT:     AddressAlignment: 1
+; CORTEXA8-NEXT:     EntrySize: 0
+; CORTEXA8-NEXT:     SectionData (
+; CORTEXA8-NEXT:       0000: 412E0000 00616561 62690001 24000000
+; CORTEXA8-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
+; CORTEXA8-NEXT:       0020: 0109020A 02140115 01170318 011901
+; CORTEXA8-NEXT:     )
 
 define i32 @f(i64 %z) {
        ret i32 0
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
index 94a05412f5d4..9eecd045bfa0 100644
--- a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
+++ b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
@@ -1,5 +1,5 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN:    llvm-readobj -s -sr -sd | FileCheck  -check-prefix=OBJ %s
 
 target triple = "armv7-none-linux-gnueabi"
 
@@ -9,32 +9,17 @@ define arm_aapcs_vfpcc i32 @barf() nounwind {
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
   ret i32 %0
-; OBJ:         '.text'
-; OBJ-NEXT:    'sh_type'
-; OBJ-NEXT:    'sh_flags'
-; OBJ-NEXT:    'sh_addr'
-; OBJ-NEXT:    'sh_offset'
-; OBJ-NEXT:    'sh_size'
-; OBJ-NEXT:    'sh_link'
-; OBJ-NEXT:    'sh_info'
-; OBJ-NEXT:    'sh_addralign'
-; OBJ-NEXT:    'sh_entsize'
-; OBJ-NEXT:    '_section_data', '00482de9 000000e3 000040e3 feffffeb 0088bde8'
-
-; OBJ:            Relocation 0
-; OBJ-NEXT:       'r_offset', 0x00000004
-; OBJ-NEXT:       'r_sym', 0x000009
-; OBJ-NEXT:        'r_type', 0x2b
-
-; OBJ:          Relocation 1
-; OBJ-NEXT:       'r_offset', 0x00000008
-; OBJ-NEXT:       'r_sym'
-; OBJ-NEXT:        'r_type', 0x2c
-
-; OBJ:          # Relocation 2
-; OBJ-NEXT:       'r_offset', 0x0000000c
-; OBJ-NEXT:       'r_sym', 0x00000a
-; OBJ-NEXT:       'r_type', 0x1c
+; OBJ:        Section {
+; OBJ:          Name: .text
+; OBJ:          Relocations [
+; OBJ-NEXT:       0x4 R_ARM_MOVW_ABS_NC a
+; OBJ-NEXT:       0x8 R_ARM_MOVT_ABS
+; OBJ-NEXT:       0xC R_ARM_CALL foo
+; OBJ-NEXT:     ]
+; OBJ-NEXT:     SectionData (
+; OBJ-NEXT:       0000: 00482DE9 000000E3 000040E3 FEFFFFEB
+; OBJ-NEXT:       0010: 0088BDE8
+; OBJ-NEXT:     )
 
 }
 
diff --git a/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
index b8ed8199d398..1351a26756ef 100644
--- a/test/CodeGen/ARM/2010-12-08-tpsoft.ll
+++ b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -1,9 +1,9 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
 ; RUN:    FileCheck  -check-prefix=ELFASM %s 
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=ELFOBJ %s
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=ELFOBJ %s
 
-;; Make sure that bl __aeabi_read_tp is materiazlied and fixed up correctly
+;; Make sure that bl __aeabi_read_tp is materialized and fixed up correctly
 ;; in the obj case. 
 
 @i = external thread_local global i32
@@ -24,19 +24,13 @@ bb:                                               ; preds = %entry
 ; ELFASM:       	bl	__aeabi_read_tp
 
 
-; ELFOBJ:   '.text'
-; ELFOBJ-NEXT:  'sh_type'
-; ELFOBJ-NEXT:  'sh_flags'
-; ELFOBJ-NEXT:  'sh_addr'
-; ELFOBJ-NEXT:  'sh_offset'
-; ELFOBJ-NEXT:  'sh_size'
-; ELFOBJ-NEXT:  'sh_link'
-; ELFOBJ-NEXT:  'sh_info'
-; ELFOBJ-NEXT:  'sh_addralign'
-; ELFOBJ-NEXT:  'sh_entsize'
-;;;               BL __aeabi_read_tp is ---+
-;;;                                        V
-; ELFOBJ-NEXT:  00482de9 3c009fe5 00109fe7 feffffeb
+; ELFOBJ:      Sections [
+; ELFOBJ:        Section {
+; ELFOBJ:          Name: .text
+; ELFOBJ:          SectionData (
+;;;                  BL __aeabi_read_tp is ---------+
+;;;                                                 V
+; ELFOBJ-NEXT:     0000: 00482DE9 3C009FE5 00109FE7 FEFFFFEB
 
 
 bb1:                                              ; preds = %entry
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index 1272a257931d..f13bc1214a5a 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -1,5 +1,5 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN:    llvm-readobj -s -t | FileCheck  -check-prefix=OBJ %s
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
 ; RUN:    FileCheck  -check-prefix=ASM %s
 
@@ -15,17 +15,20 @@
 ; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
 
 
-
-; OBJ:          Section 4
-; OBJ-NEXT:     '.bss'
-
-; OBJ:          'array00'
-; OBJ-NEXT:     'st_value', 0x00000000
-; OBJ-NEXT:     'st_size', 0x00000050
-; OBJ-NEXT:     'st_bind', 0x0
-; OBJ-NEXT:     'st_type', 0x1
-; OBJ-NEXT:     'st_other', 0x00
-; OBJ-NEXT:     'st_shndx', 0x0004
+; OBJ:      Sections [
+; OBJ:        Section {
+; OBJ:          Index: 4
+; OBJ-NEXT:     Name: .bss
+
+; OBJ:      Symbols [
+; OBJ:        Symbol {
+; OBJ:          Name: array00
+; OBJ-NEXT:     Value: 0x0
+; OBJ-NEXT:     Size: 80
+; OBJ-NEXT:     Binding: Local
+; OBJ-NEXT:     Type: Object
+; OBJ-NEXT:     Other: 0
+; OBJ-NEXT:     Section: .bss
 
 define i32 @main(i32 %argc) nounwind {
   %1 = load i32* @sum, align 4
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 3c98585042ee..1d1b89a34f9a 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -77,16 +77,16 @@ entry:
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5}
-!5 = metadata !{i32 786468, metadata !1, metadata !"_Bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786468, metadata !1, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ]
 !10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !12 = metadata !{i32 786443, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 10f85e8b9da7..266609b8ce69 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -74,16 +74,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ]
 !10 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !12 = metadata !{i32 786443, metadata !1, i32 5, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index b5f6d311cb9c..b0644d17431d 100644
--- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -19,7 +19,7 @@ entry:
 ; CHECK: bfc	[[REG]], #0, #3
   %0 = va_arg i8** %g, double
   call void @llvm.va_end(i8* %g1)
-  
+
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
new file mode 100644
index 000000000000..459992818749
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s
+; PR14824. The test is presented by Jiangning Liu. If the ld/st optimization algorithm is changed, this test case may fail.
+; Also if the machine code for ld/st optimizor is changed, this test case may fail. If so, remove this test.
+
+define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind {
+; CHECK: sample_test
+; CHECK-NOT: vldmia
+; CHECK: add
+entry:
+
+; Load %source
+  %s0 = load <8 x i64> * %source, align 64
+  %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6
+  %s120 = load <8 x i64> * %arrayidx64, align 64
+  %s122 = bitcast <8 x i64> %s120 to i512
+  %data.i.i677.48.extract.shift = lshr i512 %s122, 384
+  %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
+  %s123 = insertelement <8 x i64> undef, i64 %data.i.i677.48.extract.trunc, i32 0
+  %data.i.i677.32.extract.shift = lshr i512 %s122, 256
+  %data.i.i677.32.extract.trunc = trunc i512 %data.i.i677.32.extract.shift to i64
+  %s124 = insertelement <8 x i64> %s123, i64 %data.i.i677.32.extract.trunc, i32 1
+  %data.i.i677.16.extract.shift = lshr i512 %s122, 128
+  %data.i.i677.16.extract.trunc = trunc i512 %data.i.i677.16.extract.shift to i64
+  %s125 = insertelement <8 x i64> %s124, i64 %data.i.i677.16.extract.trunc, i32 2
+  %data.i.i677.56.extract.shift = lshr i512 %s122, 448
+  %data.i.i677.56.extract.trunc = trunc i512 %data.i.i677.56.extract.shift to i64
+  %s126 = insertelement <8 x i64> %s125, i64 %data.i.i677.56.extract.trunc, i32 3
+  %data.i.i677.24.extract.shift = lshr i512 %s122, 192
+  %data.i.i677.24.extract.trunc = trunc i512 %data.i.i677.24.extract.shift to i64
+  %s127 = insertelement <8 x i64> %s126, i64 %data.i.i677.24.extract.trunc, i32 4
+  %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5
+  %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6
+  %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
+
+; Load %secondSource
+  %s1 = load <8 x i64> * %secondSource, align 64
+  %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
+  %s121 = load <8 x i64> * %arrayidx67, align 64
+  %s131 = bitcast <8 x i64> %s121 to i512
+  %data.i1.i676.48.extract.shift = lshr i512 %s131, 384
+  %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
+  %s132 = insertelement <8 x i64> undef, i64 %data.i1.i676.48.extract.trunc, i32 0
+  %data.i1.i676.32.extract.shift = lshr i512 %s131, 256
+  %data.i1.i676.32.extract.trunc = trunc i512 %data.i1.i676.32.extract.shift to i64
+  %s133 = insertelement <8 x i64> %s132, i64 %data.i1.i676.32.extract.trunc, i32 1
+  %data.i1.i676.16.extract.shift = lshr i512 %s131, 128
+  %data.i1.i676.16.extract.trunc = trunc i512 %data.i1.i676.16.extract.shift to i64
+  %s134 = insertelement <8 x i64> %s133, i64 %data.i1.i676.16.extract.trunc, i32 2
+  %data.i1.i676.56.extract.shift = lshr i512 %s131, 448
+  %data.i1.i676.56.extract.trunc = trunc i512 %data.i1.i676.56.extract.shift to i64
+  %s135 = insertelement <8 x i64> %s134, i64 %data.i1.i676.56.extract.trunc, i32 3
+  %data.i1.i676.24.extract.shift = lshr i512 %s131, 192
+  %data.i1.i676.24.extract.trunc = trunc i512 %data.i1.i676.24.extract.shift to i64
+  %s136 = insertelement <8 x i64> %s135, i64 %data.i1.i676.24.extract.trunc, i32 4
+  %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5
+  %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6
+  %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7
+
+; Operations about %Source and %secondSource
+  %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
+  %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
+  %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
+  %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+  %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6
+  store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
+  %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7
+  %s141 = load <8 x i64> * %arrayidx78, align 64
+  %s151 = bitcast <8 x i64> %s141 to i512
+  %data.i1.i649.32.extract.shift = lshr i512 %s151, 256
+  %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
+  %s152 = insertelement <8 x i64> undef, i64 %data.i1.i649.32.extract.trunc, i32 0
+  %s153 = insertelement <8 x i64> %s152, i64 %data.i1.i649.32.extract.trunc, i32 1
+  %data.i1.i649.16.extract.shift = lshr i512 %s151, 128
+  %data.i1.i649.16.extract.trunc = trunc i512 %data.i1.i649.16.extract.shift to i64
+  %s154 = insertelement <8 x i64> %s153, i64 %data.i1.i649.16.extract.trunc, i32 2
+  %data.i1.i649.8.extract.shift = lshr i512 %s151, 64
+  %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64
+  %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3
+  %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7
+  store <8 x i64> %s155, <8 x i64> * %arrayidx83, align 64
+  ret void
+}
diff --git a/test/CodeGen/ARM/DbgValueOtherTargets.test b/test/CodeGen/ARM/DbgValueOtherTargets.test
index 8739a4364016..bf90891de0a7 100644
--- a/test/CodeGen/ARM/DbgValueOtherTargets.test
+++ b/test/CodeGen/ARM/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=arm -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=arm -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/ARM/MergeConsecutiveStores.ll b/test/CodeGen/ARM/MergeConsecutiveStores.ll
new file mode 100644
index 000000000000..06c87e986a83
--- /dev/null
+++ b/test/CodeGen/ARM/MergeConsecutiveStores.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy.
+; CHECK: MergeLoadStoreBaseIndexOffset
+; CHECK: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK: strh    [[REG]], [r1], #2
+define void @MergeLoadStoreBaseIndexOffset(i32* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
+  %.0 = phi i32* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i32* %.0, i32 1
+  %3 = load i32* %.0, align 1
+  %4 = getelementptr inbounds i8* %c, i32 %3
+  %5 = load i8* %4, align 1
+  %6 = add i32 %3, 1
+  %7 = getelementptr inbounds i8* %c, i32 %6
+  %8 = load i8* %7, align 1
+  store i8 %5, i8* %.08, align 1
+  %9 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %8, i8* %9, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 2
+  %11 = add nsw i32 %.09, -1
+  %12 = icmp eq i32 %11, 0
+  br i1 %12, label %13, label %1
+
+; <label>:13
+  ret void
+}
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy even if there are intermediate sign
+; extensions.
+; CHECK: MergeLoadStoreBaseIndexOffsetSext
+; CHECK: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK: strh    [[REG]], [r1], #2
+define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i32 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i32
+  %5 = getelementptr inbounds i8* %c, i32 %4
+  %6 = load i8* %5, align 1
+  %7 = add i32 %4, 1
+  %8 = getelementptr inbounds i8* %c, i32 %7
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i32 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
+
+; However, we can only merge ignore sign extensions when they are on all memory
+; computations;
+; CHECK: loadStoreBaseIndexOffsetSextNoSex
+; CHECK-NOT: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK-NOT: strh    [[REG]], [r1], #2
+define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i32 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i32
+  %5 = getelementptr inbounds i8* %c, i32 %4
+  %6 = load i8* %5, align 1
+  %7 = add i8 %3, 1
+  %wrap.4 = sext i8 %7 to i32
+  %8 = getelementptr inbounds i8* %c, i32 %wrap.4
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i32 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll
index 17e3eba27235..a52468e5be9e 100644
--- a/test/CodeGen/ARM/a15-SD-dep.ll
+++ b/test/CodeGen/ARM/a15-SD-dep.ll
@@ -5,7 +5,7 @@
 ; CHECK-DISABLED: t1:
 define <2 x float> @t1(float %f) {
   ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
-  ; CHECK-DISABLED: vmov.32 d0[1], r{{.}}
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
   %i1 = insertelement <2 x float> undef, float %f, i32 1
   %i2 = fadd <2 x float> %i1, %i1
   ret <2 x float> %i2
@@ -15,7 +15,7 @@ define <2 x float> @t1(float %f) {
 ; CHECK-DISABLED: t2:
 define <4 x float> @t2(float %g, float %f) {
   ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]
-  ; CHECK-DISABLED: vmov.32 d0[1], r{{.}}
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
   %i1 = insertelement <4 x float> undef, float %f, i32 1
   %i2 = fadd <4 x float> %i1, %i1
   ret <4 x float> %i2
@@ -25,6 +25,7 @@ define <4 x float> @t2(float %g, float %f) {
 ; CHECK-DISABLED: t3:
 define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
   ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] 
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
   %i1 = insertelement <2 x float> undef, float %f, i32 1
   %i2 = fadd <2 x float> %i1, %i1
   ret <2 x float> %i2
diff --git a/test/CodeGen/ARM/a15-partial-update.ll b/test/CodeGen/ARM/a15-partial-update.ll
new file mode 100644
index 000000000000..6306790d15f0
--- /dev/null
+++ b/test/CodeGen/ARM/a15-partial-update.ll
@@ -0,0 +1,38 @@
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s  | FileCheck %s
+
+; CHECK: t1:
+define <2 x float> @t1(float* %A, <2 x float> %B) {
+; The generated code for this test uses a vld1.32 instruction
+; to write the lane 1 of a D register containing the value of
+; <2 x float> %B. Since the D register is defined, it would
+; be incorrect to fully write it (with a vmov.f64) before the
+; vld1.32 instruction. The test checks that a vmov.f64 was not
+; generated.
+
+; CHECK-NOT: vmov.{{.*}} d{{[0-9]+}},
+  %tmp2 = load float* %A, align 4
+  %tmp3 = insertelement <2 x float> %B, float %tmp2, i32 1
+  ret <2 x float> %tmp3
+}
+
+; CHECK: t2:
+define void @t2(<4 x i8> *%in, <4 x i8> *%out, i32 %n) {
+entry:
+  br label %loop
+loop:
+; The code generated by this test uses a vld1.32 instruction.
+; We check that a dependency breaking vmov* instruction was
+; generated.
+
+; CHECK: vmov.{{.*}} d{{[0-9]+}},
+  %oldcount = phi i32 [0, %entry], [%newcount, %loop]
+  %newcount = add i32 %oldcount, 1
+  %p1 = getelementptr <4 x i8> *%in, i32 %newcount
+  %p2 = getelementptr <4 x i8> *%out, i32 %newcount
+  %tmp1 = load <4 x i8> *%p1, align 4
+  store <4 x i8> %tmp1, <4 x i8> *%p2
+  %cmp = icmp eq i32 %newcount, %n
+  br i1 %cmp, label %loop, label %ret
+ret:
+  ret void
+}
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index d98925ef8ff8..c14f5302d311 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -83,3 +83,34 @@ while.body:
 while.end:
   ret void
 }
+
+; Avoid producing tMOVi8 after a high-latency flag-setting operation.
+; <rdar://problem/13468102>
+define void @t4(i32* nocapture %p, double* nocapture %q) {
+entry:
+; CHECK: t4
+; CHECK: vmrs APSR_nzcv, fpscr
+; CHECK: if.then
+; CHECK-NOT: movs
+  %0 = load double* %q, align 4
+  %cmp = fcmp olt double %0, 1.000000e+01
+  %incdec.ptr1 = getelementptr inbounds i32* %p, i32 1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  store i32 7, i32* %p, align 4
+  %incdec.ptr2 = getelementptr inbounds i32* %p, i32 2
+  store i32 8, i32* %incdec.ptr1, align 4
+  store i32 9, i32* %incdec.ptr2, align 4
+  br label %if.end
+
+if.else:
+  store i32 3, i32* %p, align 4
+  %incdec.ptr5 = getelementptr inbounds i32* %p, i32 2
+  store i32 5, i32* %incdec.ptr1, align 4
+  store i32 6, i32* %incdec.ptr5, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
index 7316452cd617..769ba55eb9eb 100644
--- a/test/CodeGen/ARM/commute-movcc.ll
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
-; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-block-placement < %s | FileCheck %s
 
 ; LLVM IR optimizers canonicalize icmp+select this way.
 ; Make sure that TwoAddressInstructionPass can commute the corresponding
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
new file mode 100644
index 000000000000..e9e0fe3239a7
--- /dev/null
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 | FileCheck %s
+
+; PR15525
+; CHECK: test1:
+; CHECK: ldr.w	[[REG:r[0-9]+]], [sp]
+; CHECK-NEXT: vmov	{{d[0-9]+}}, r1, r2
+; CHECK-NEXT: vmov	{{d[0-9]+}}, r3, [[REG]]
+; CHECK-NEXT: vst1.8	{{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+; CHECK-NEXT: bx	lr
+define void @test1(i8* %arg, [4 x i64] %vec.coerce) {
+bb:
+  %tmp = extractvalue [4 x i64] %vec.coerce, 0
+  %tmp2 = bitcast i64 %tmp to <8 x i8>
+  %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp4 = extractvalue [4 x i64] %vec.coerce, 1
+  %tmp5 = bitcast i64 %tmp4 to <8 x i8>
+  %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> %tmp3, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  tail call void @llvm.arm.neon.vst1.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index 168443f17944..33c8e9daae69 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -31,23 +31,23 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!6 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786451, metadata !0, metadata !"tag_s", metadata !2, i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !11, metadata !12}
-!9 = metadata !{i32 786445, metadata !7, metadata !"x", metadata !2, i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786445, metadata !7, metadata !"y", metadata !2, i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ]
-!12 = metadata !{i32 786445, metadata !7, metadata !"z", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
+!9 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"x", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"y", i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"z", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
 !13 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554443, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 50331659, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 786454, metadata !0, metadata !"UInt64", metadata !2, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ]
-!16 = metadata !{i32 786468, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!15 = metadata !{i32 786454, metadata !32, metadata !0, metadata !"UInt64", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ]
+!16 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !17 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 67108875, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 786689, metadata !1, metadata !"ptr1", metadata !2, i32 83886091, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786689, metadata !1, metadata !"ptr2", metadata !2, i32 100663307, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index ae59f9f83908..d0bfecc5af41 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -94,134 +94,134 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 16, metadata !40, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, null, metadata !148, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786433, metadata !0, metadata !"", metadata !2, i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!0 = metadata !{i32 786449, i32 16, metadata !40, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, null, metadata !148, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !2 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786472, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
-!5 = metadata !{i32 786433, metadata !0, metadata !"Mode", metadata !6, i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!5 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !6 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786472, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
-!9 = metadata !{i32 786433, metadata !0, metadata !"", metadata !10, i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!9 = metadata !{i32 786433, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !10 = metadata !{i32 786473, metadata !149} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12, metadata !13}
 !12 = metadata !{i32 786472, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
 !13 = metadata !{i32 786472, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
-!14 = metadata !{i32 786433, metadata !0, metadata !"", metadata !15, i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!14 = metadata !{i32 786433, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !15 = metadata !{i32 786473, metadata !150} ; [ DW_TAG_file_type ]
 !16 = metadata !{metadata !17, metadata !18}
 !17 = metadata !{i32 786472, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
 !18 = metadata !{i32 786472, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
-!19 = metadata !{i32 786433, metadata !0, metadata !"", metadata !20, i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!19 = metadata !{i32 786433, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !20 = metadata !{i32 786473, metadata !151} ; [ DW_TAG_file_type ]
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 786472, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
-!23 = metadata !{i32 786478, i32 0, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 786478, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ]
 !24 = metadata !{i32 786473, metadata !152} ; [ DW_TAG_file_type ]
-!25 = metadata !{i32 786453, metadata !24, metadata !"", metadata !24, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !26 = metadata !{null}
 !27 = metadata !{i32 786689, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64, null} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
-!29 = metadata !{i32 786451, metadata !24, metadata !"__block_literal_14", metadata !24, i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!28 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
-!31 = metadata !{i32 786445, metadata !24, metadata !"__isa", metadata !24, i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
-!32 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!33 = metadata !{i32 786445, metadata !24, metadata !"__flags", metadata !24, i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
-!34 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 786445, metadata !24, metadata !"__reserved", metadata !24, i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
-!36 = metadata !{i32 786445, metadata !24, metadata !"__FuncPtr", metadata !24, i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
-!37 = metadata !{i32 786445, metadata !24, metadata !"__descriptor", metadata !24, i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
-!39 = metadata !{i32 786451, metadata !0, metadata !"__block_descriptor_withcopydispose", metadata !40, i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!31 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!32 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!35 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__reserved", i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__FuncPtr", i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
+!37 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__descriptor", i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
+!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !40 = metadata !{i32 786473, metadata !153} ; [ DW_TAG_file_type ]
 !41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
-!42 = metadata !{i32 786445, metadata !40, metadata !"reserved", metadata !40, i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
-!43 = metadata !{i32 786468, metadata !0, metadata !"long unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!44 = metadata !{i32 786445, metadata !40, metadata !"Size", metadata !40, i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
-!45 = metadata !{i32 786445, metadata !40, metadata !"CopyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
-!46 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!47 = metadata !{i32 786445, metadata !40, metadata !"DestroyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
-!48 = metadata !{i32 786445, metadata !24, metadata !"mydata", metadata !24, i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
-!49 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
-!50 = metadata !{i32 786451, metadata !24, metadata !"", metadata !24, i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!42 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"reserved", i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
+!43 = metadata !{i32 786468, null, metadata !0, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!44 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"Size", i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
+!45 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"CopyFuncPtr", i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
+!46 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!47 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"DestroyFuncPtr", i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
+!48 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
+!49 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
+!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
-!52 = metadata !{i32 786445, metadata !24, metadata !"__isa", metadata !24, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
-!53 = metadata !{i32 786445, metadata !24, metadata !"__forwarding", metadata !24, i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
-!54 = metadata !{i32 786445, metadata !24, metadata !"__flags", metadata !24, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
-!55 = metadata !{i32 786445, metadata !24, metadata !"__size", metadata !24, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
-!56 = metadata !{i32 786445, metadata !24, metadata !"__copy_helper", metadata !24, i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
-!57 = metadata !{i32 786445, metadata !24, metadata !"__destroy_helper", metadata !24, i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
-!58 = metadata !{i32 786445, metadata !24, metadata !"mydata", metadata !24, i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
-!59 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
-!60 = metadata !{i32 786451, metadata !24, metadata !"UIMydata", metadata !61, i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!52 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!53 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__forwarding", i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
+!54 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!55 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__size", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
+!56 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__copy_helper", i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
+!57 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__destroy_helper", i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
+!58 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
+!59 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
+!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
 !61 = metadata !{i32 786473, metadata !154} ; [ DW_TAG_file_type ]
 !62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
 !63 = metadata !{i32 786460, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!64 = metadata !{i32 786451, metadata !40, metadata !"NSO", metadata !65, i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
 !65 = metadata !{i32 786473, metadata !155} ; [ DW_TAG_file_type ]
 !66 = metadata !{metadata !67}
-!67 = metadata !{i32 786445, metadata !65, metadata !"isa", metadata !65, i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!67 = metadata !{i32 786445, metadata !155, metadata !65, metadata !"isa", i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !68 = metadata !{i32 786454, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
-!69 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
-!70 = metadata !{i32 786451, metadata !0, metadata !"objc_class", metadata !40, i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!71 = metadata !{i32 786445, metadata !61, metadata !"_mydataRef", metadata !61, i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!69 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
+!70 = metadata !{i32 786451, metadata !40, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!71 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataRef", i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !72 = metadata !{i32 786454, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
-!73 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
-!74 = metadata !{i32 786470, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
-!75 = metadata !{i32 786445, metadata !61, metadata !"_scale", metadata !61, i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!73 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
+!74 = metadata !{i32 786470, null, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
+!75 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_scale", i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !76 = metadata !{i32 786454, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
 !77 = metadata !{i32 786473, metadata !156} ; [ DW_TAG_file_type ]
-!78 = metadata !{i32 786468, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!79 = metadata !{i32 786445, metadata !61, metadata !"_mydataFlags", metadata !61, i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!80 = metadata !{i32 786451, metadata !0, metadata !"", metadata !61, i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!78 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!79 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataFlags", i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
-!82 = metadata !{i32 786445, metadata !61, metadata !"named", metadata !61, i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
-!83 = metadata !{i32 786468, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!84 = metadata !{i32 786445, metadata !61, metadata !"mydataO", metadata !61, i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
-!85 = metadata !{i32 786445, metadata !61, metadata !"cached", metadata !61, i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
-!86 = metadata !{i32 786445, metadata !61, metadata !"hasBeenCached", metadata !61, i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
-!87 = metadata !{i32 786445, metadata !61, metadata !"hasPattern", metadata !61, i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
-!88 = metadata !{i32 786445, metadata !61, metadata !"isCIMydata", metadata !61, i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
-!89 = metadata !{i32 786445, metadata !24, metadata !"self", metadata !24, i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
-!90 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
-!91 = metadata !{i32 786451, metadata !40, metadata !"MyWork", metadata !24, i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!82 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"named", i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
+!83 = metadata !{i32 786468, null, metadata !0, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!84 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"mydataO", i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
+!85 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"cached", i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
+!86 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasBeenCached", i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
+!87 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasPattern", i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
+!88 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"isCIMydata", i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
+!89 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"self", i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
+!90 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
+!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
 !92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
 !93 = metadata !{i32 786460, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
-!94 = metadata !{i32 786451, metadata !40, metadata !"twork", metadata !95, i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
 !95 = metadata !{i32 786473, metadata !157} ; [ DW_TAG_file_type ]
 !96 = metadata !{metadata !97}
 !97 = metadata !{i32 786460, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!98 = metadata !{i32 786445, metadata !24, metadata !"_itemID", metadata !24, i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!98 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_itemID", i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !99 = metadata !{i32 786454, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
-!100 = metadata !{i32 786468, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!101 = metadata !{i32 786445, metadata !24, metadata !"_library", metadata !24, i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!102 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
-!103 = metadata !{i32 786451, metadata !40, metadata !"MyLibrary2", metadata !104, i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!100 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!101 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_library", i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!102 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
+!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
 !104 = metadata !{i32 786473, metadata !158} ; [ DW_TAG_file_type ]
 !105 = metadata !{metadata !106}
 !106 = metadata !{i32 786460, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!107 = metadata !{i32 786445, metadata !24, metadata !"_bounds", metadata !24, i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!107 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_bounds", i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !108 = metadata !{i32 786454, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
-!109 = metadata !{i32 786451, metadata !0, metadata !"CR", metadata !77, i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !110 = metadata !{metadata !111, metadata !117}
-!111 = metadata !{i32 786445, metadata !77, metadata !"origin", metadata !77, i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
+!111 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"origin", i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
 !112 = metadata !{i32 786454, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
-!113 = metadata !{i32 786451, metadata !0, metadata !"CP", metadata !77, i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !114 = metadata !{metadata !115, metadata !116}
-!115 = metadata !{i32 786445, metadata !77, metadata !"x", metadata !77, i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
-!116 = metadata !{i32 786445, metadata !77, metadata !"y", metadata !77, i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
-!117 = metadata !{i32 786445, metadata !77, metadata !"size", metadata !77, i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
+!115 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"x", i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!116 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"y", i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!117 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"size", i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
 !118 = metadata !{i32 786454, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
-!119 = metadata !{i32 786451, metadata !0, metadata !"Size", metadata !77, i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !120 = metadata !{metadata !121, metadata !122}
-!121 = metadata !{i32 786445, metadata !77, metadata !"width", metadata !77, i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
-!122 = metadata !{i32 786445, metadata !77, metadata !"height", metadata !77, i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
-!123 = metadata !{i32 786445, metadata !24, metadata !"_data", metadata !24, i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!124 = metadata !{i32 786445, metadata !24, metadata !"semi", metadata !24, i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
+!121 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"width", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!122 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"height", i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!123 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_data", i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!124 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"semi", i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
 !125 = metadata !{i32 786454, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
-!126 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
-!127 = metadata !{i32 786451, metadata !0, metadata !"my_struct", metadata !128, i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!126 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
+!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !128 = metadata !{i32 786473, metadata !159} ; [ DW_TAG_file_type ]
 !129 = metadata !{i32 609, i32 144, metadata !23, null}
 !130 = metadata !{i32 786689, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0, null} ; [ DW_TAG_arg_variable ]
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 6a474c777d59..95e6cf2554a0 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -40,52 +40,52 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786454, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
 !6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
-!7 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
 !10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786478, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ]
 !15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 786453, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null}
 !18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 786443, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
 !27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 786454, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 786455, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
 !34 = metadata !{metadata !35, metadata !37}
-!35 = metadata !{i32 786445, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
-!36 = metadata !{i32 786454, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!37 = metadata !{i32 786445, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 786443, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 786443, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 786443, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 786443, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
 !44 = metadata !{i32 75, i32 5, metadata !42, null}
 !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 786443, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 786443, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
 !48 = metadata !{i32 95, i32 3, metadata !25, null}
 !49 = metadata !{i32 99, i32 3, metadata !25, null}
 !50 = metadata !{metadata !0, metadata !10, metadata !14}
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 20c4b723ce19..e3e4d068932e 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -58,17 +58,17 @@ declare i32 @puts(i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
 !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 786468, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 786468, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !5, metadata !5, metadata !13}
 !13 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
@@ -83,14 +83,14 @@ declare i32 @puts(i8* nocapture) nounwind
 !22 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !23 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 786688, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 786443, metadata !10, i32 18, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 18, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 4, i32 0, metadata !9, null}
 !27 = metadata !{i32 6, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !9, i32 5, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !1, metadata !9, i32 5, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 7, i32 0, metadata !28, null}
 !30 = metadata !{i32 11, i32 0, metadata !0, null}
 !31 = metadata !{i32 13, i32 0, metadata !32, null}
-!32 = metadata !{i32 786443, metadata !0, i32 12, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!32 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !33 = metadata !{i32 14, i32 0, metadata !32, null}
 !34 = metadata !{i32 17, i32 0, metadata !10, null}
 !35 = metadata !{i32 19, i32 0, metadata !25, null}
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 59bac05fc4f3..038c2296cdbe 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -37,54 +37,54 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786454, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
 !6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
-!7 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786478, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786478, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ]
 !15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 786453, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null}
 !18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 786443, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
 !27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 786454, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 786455, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
 !34 = metadata !{metadata !35, metadata !37}
-!35 = metadata !{i32 786445, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
-!36 = metadata !{i32 786454, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!37 = metadata !{i32 786445, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 786443, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 786443, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 786443, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 786443, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
 !44 = metadata !{i32 75, i32 5, metadata !42, null}
 !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 786443, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 786443, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
 !48 = metadata !{i32 95, i32 3, metadata !25, null}
 !49 = metadata !{i32 99, i32 3, metadata !25, null}
 !50 = metadata !{metadata !0, metadata !10, metadata !14}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 270a28c6cd16..f3af0b93c69c 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -63,14 +63,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ]
 !8 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !10 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -86,18 +86,18 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !20 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
 !21 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !22 = metadata !{i32 786688, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0, null} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 786443, metadata !7, i32 18, i32 1, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !1, metadata !7, i32 18, i32 1, i32 2} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 4, i32 22, metadata !0, null}
 !25 = metadata !{i32 4, i32 33, metadata !0, null}
 !26 = metadata !{i32 4, i32 52, metadata !0, null}
 !27 = metadata !{i32 6, i32 3, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !0, i32 5, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 7, i32 3, metadata !28, null}
 !30 = metadata !{i32 11, i32 42, metadata !6, null}
 !31 = metadata !{i32 11, i32 53, metadata !6, null}
 !32 = metadata !{i32 11, i32 72, metadata !6, null}
 !33 = metadata !{i32 13, i32 3, metadata !34, null}
-!34 = metadata !{i32 786443, metadata !6, i32 12, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 786443, metadata !1, metadata !6, i32 12, i32 1, i32 1} ; [ DW_TAG_lexical_block ]
 !35 = metadata !{i32 14, i32 3, metadata !34, null}
 !36 = metadata !{i32 17, i32 15, metadata !7, null}
 !37 = metadata !{i32 17, i32 28, metadata !7, null}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index 87514dff1abb..ae02a245b432 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -41,17 +41,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786688, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 786443, metadata !1, i32 5, i32 12, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 786443, metadata !2, metadata !1, i32 5, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 786468, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 786688, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 786443, metadata !10, i32 7, i32 25, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 786443, metadata !6, i32 7, i32 3, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786443, metadata !2, metadata !10, i32 7, i32 25, i32 2} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 6, i32 18, metadata !6, null}
 !12 = metadata !{i32 7, i32 3, metadata !6, null}
 !13 = metadata !{i32 8, i32 20, metadata !9, null}
diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll
new file mode 100644
index 000000000000..11f3e6db0fe5
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+define void @_Z4testv() {
+entry:
+  tail call void @_Z15throw_exceptionv()
+  ret void
+}
+
+declare void @_Z15throw_exceptionv()
+
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .text
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 80849b80
+; CHECK-NOT: section .ARM.extab
+
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 b0808480
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+
+; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0
diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll
new file mode 100644
index 000000000000..79dba084c044
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM-RELOC
+
+define i32 @_Z3addiiiiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  %add2 = add nsw i32 %add1, %d
+  tail call void @_Z15throw_exceptioni(i32 %add2)
+  %add3 = add nsw i32 %f, %e
+  %add4 = add nsw i32 %add3, %g
+  %add5 = add nsw i32 %add4, %h
+  tail call void @_Z15throw_exceptioni(i32 %add5)
+  %add6 = add nsw i32 %add5, %add2
+  ret i32 %add6
+}
+
+declare void @_Z15throw_exceptioni(i32)
+
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .text
+; CHECK: section .ARM.extab
+; CHECK-NEXT: 0000 419b0181 b0b08384
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 00000000
+; CHECK-NOT: section .ARM.extab
+
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 b0838480
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+
+; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr1
+
+; CHECK-FP-ELIM-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0
diff --git a/test/CodeGen/ARM/ehabi-mc-section-group.ll b/test/CodeGen/ARM/ehabi-mc-section-group.ll
index 5e4b5096c494..616aa1ba46e7 100644
--- a/test/CodeGen/ARM/ehabi-mc-section-group.ll
+++ b/test/CodeGen/ARM/ehabi-mc-section-group.ll
@@ -8,7 +8,7 @@
 ; RUN: llc -mtriple arm-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
-; RUN:   | elf-dump --dump-section-data \
+; RUN:   | llvm-readobj -s -sd \
 ; RUN:   | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
@@ -68,12 +68,21 @@ declare void @__cxa_end_catch()
 
 declare void @_ZSt9terminatev()
 
-; CHECK:      # Section 1
-; CHECK-NEXT: (('sh_name', 0x0000002f) # '.group'
-; CHECK:       ('_section_data', '01000000 0a000000 0c000000 0e000000')
-; CHECK:      # Section 10
-; CHECK-NEXT: (('sh_name', 0x000000e1) # '.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
-; CHECK:      # Section 12
-; CHECK-NEXT: (('sh_name', 0x000000d7) # '.ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
-; CHECK:      # Section 14
-; CHECK-NEXT: (('sh_name', 0x00000065) # '.ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:        Section {
+; CHECK:          Index: 1
+; CHECK-NEXT:     Name: .group (47)
+; CHECK:          SectionData (
+; CHECK-NEXT:       0000: 01000000 09000000 0B000000 0D000000
+; CHECK-NEXT:     )
+
+; CHECK:        Section {
+; CHECK:          Index: 9
+; CHECK-NEXT:     Name: .text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (214)
+
+; CHECK:        Section {
+; CHECK:          Index: 11
+; CHECK-NEXT:     Name: .ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (204)
+
+; CHECK:        Section {
+; CHECK:          Index: 13
+; CHECK-NEXT:     Name: .ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (90)
diff --git a/test/CodeGen/ARM/ehabi-mc-section.ll b/test/CodeGen/ARM/ehabi-mc-section.ll
index fc51b240ff3d..4e6e46829148 100644
--- a/test/CodeGen/ARM/ehabi-mc-section.ll
+++ b/test/CodeGen/ARM/ehabi-mc-section.ll
@@ -1,8 +1,14 @@
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
 ; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
 define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) section ".test_section" {
 entry:
@@ -54,6 +60,12 @@ declare void @_ZSt9terminatev()
 
 ; CHECK: section .test_section
 ; CHECK: section .ARM.extab.test_section
-; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK-NEXT: 0000 00000000 c9409b01 b0818484
 ; CHECK: section .ARM.exidx.test_section
 ; CHECK-NEXT: 0000 00000000 00000000
+
+; CHECK-FP-ELIM: section .test_section
+; CHECK-FP-ELIM: section .ARM.extab.test_section
+; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8
+; CHECK-FP-ELIM: section .ARM.exidx.test_section
+; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-mc-sh_link.ll b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
index f90e5f384c1e..ac0a0fc9309a 100644
--- a/test/CodeGen/ARM/ehabi-mc-sh_link.ll
+++ b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple arm-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
-; RUN:   | elf-dump --dump-section-data \
+; RUN:   | llvm-readobj -s \
 ; RUN:   | FileCheck %s
 
 define void @test1() nounwind {
@@ -20,28 +20,39 @@ entry:
   ret void
 }
 
-; CHECK: # Section 1
-; CHECK-NEXT: (('sh_name', 0x00000010) # '.text'
-
-; CHECK:      (('sh_name', 0x00000005) # '.ARM.exidx'
-; CHECK-NEXT:  ('sh_type', 0x70000001)
-; CHECK-NEXT:  ('sh_flags', 0x00000082)
-; CHECK-NEXT:  ('sh_addr', 0x00000000)
-; CHECK-NEXT:  ('sh_offset', 0x0000005c)
-; CHECK-NEXT:  ('sh_size', 0x00000008)
-; CHECK-NEXT:  ('sh_link',  0x00000001)
-; CHECK-NEXT:  ('sh_info',  0x00000000)
-; CHECK-NEXT:  ('sh_addralign',  0x00000004)
-
-; CHECK: # Section 7
-; CHECK-NEXT: (('sh_name', 0x00000039) # '.test_section'
-
-; CHECK:      (('sh_name', 0x0000002f) # '.ARM.exidx.test_section'
-; CHECK-NEXT:  ('sh_type', 0x70000001)
-; CHECK-NEXT:  ('sh_flags', 0x00000082)
-; CHECK-NEXT:  ('sh_addr', 0x00000000)
-; CHECK-NEXT:  ('sh_offset', 0x00000068)
-; CHECK-NEXT:  ('sh_size', 0x00000008)
-; CHECK-NEXT:  ('sh_link',  0x00000007)
-; CHECK-NEXT:  ('sh_info',  0x00000000)
-; CHECK-NEXT:  ('sh_addralign',  0x00000004)
+; CHECK:      Sections [
+; CHECK:        Section {
+; CHECK:          Index: 1
+; CHECK-NEXT:     Name: .text (16)
+
+; CHECK:        Section {
+; CHECK:          Name: .ARM.exidx (5)
+; CHECK-NEXT:     Type: SHT_ARM_EXIDX
+; CHECK-NEXT:     Flags [ (0x82)
+; CHECK-NEXT:       SHF_ALLOC
+; CHECK-NEXT:       SHF_LINK_ORDER
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Address: 0x0
+; CHECK-NEXT:     Offset: 0x5C
+; CHECK-NEXT:     Size: 8
+; CHECK-NEXT:     Link: 1
+; CHECK-NEXT:     Info: 0
+; CHECK-NEXT:     AddressAlignment: 4
+
+; CHECK:        Section {
+; CHECK:          Index: 7
+; CHECK-NEXT:     Name: .test_section (57)
+
+; CHECK:        Section {
+; CHECK:          Name: .ARM.exidx.test_section (47)
+; CHECK-NEXT:     Type: SHT_ARM_EXIDX
+; CHECK-NEXT:     Flags [ (0x82)
+; CHECK-NEXT:       SHF_ALLOC
+; CHECK-NEXT:       SHF_LINK_ORDER
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Address: 0x0
+; CHECK-NEXT:     Offset: 0x68
+; CHECK-NEXT:     Size: 8
+; CHECK-NEXT:     Link: 7
+; CHECK-NEXT:     Info: 0
+; CHECK-NEXT:     AddressAlignment: 4
diff --git a/test/CodeGen/ARM/ehabi-mc.ll b/test/CodeGen/ARM/ehabi-mc.ll
index 0dc2ef7838f0..83b8425af7c4 100644
--- a/test/CodeGen/ARM/ehabi-mc.ll
+++ b/test/CodeGen/ARM/ehabi-mc.ll
@@ -1,8 +1,14 @@
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
 ; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
 define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
 entry:
@@ -54,6 +60,12 @@ declare void @_ZSt9terminatev()
 
 ; CHECK: section .text
 ; CHECK: section .ARM.extab
-; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK-NEXT: 0000 00000000 c9409b01 b0818484
 ; CHECK: section .ARM.exidx
 ; CHECK-NEXT: 0000 00000000 00000000
+
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM: section .ARM.extab
+; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 48ef5ed88fb0..c7e2f5d094b8 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
@@ -18,6 +20,8 @@ entry:
 ; NFP0: 	vadd.f32	s
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vadd.f32	d
+; CORTEXA8: 	vadd.f32	s
+; CORTEXA8U: test:
+; CORTEXA8U: 	vadd.f32	d
 ; CORTEXA9: test:
-; CORTEXA9: 	vadd.f32	s{{.}}, s{{.}}, s{{.}}
+; CORTEXA9: 	vadd.f32	s
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index 1566a9272db1..f5245c946398 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
@@ -18,9 +20,11 @@ entry:
 ; NFP0: 	vmul.f32	s
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vmul.f32	d
+; CORTEXA8: 	vmul.f32	s
+; CORTEXA8U: test:
+; CORTEXA8U: 	vmul.f32	d
 ; CORTEXA9: test:
-; CORTEXA9: 	vmul.f32	s{{.}}, s{{.}}, s{{.}}
+; CORTEXA9: 	vmul.f32	s
 
 ; VFP2: test2
 define float @test2(float %a) nounwind {
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 418b59803d30..d84690ba4e4b 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test1(float* %a) {
@@ -22,7 +24,10 @@ entry:
 ; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test1:
-; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+; CORTEXA8: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8U: test1:
+; CORTEXA8U: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test1:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
@@ -46,7 +51,10 @@ entry:
 ; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test2:
-; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+; CORTEXA8: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8U: test2:
+; CORTEXA8U: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test2:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 9ce9b7ae7dac..c30806173428 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,7 +1,9 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8U
 
 define float @t1(float %acc, float %a, float %b) nounwind {
 entry:
@@ -11,9 +13,13 @@ entry:
 ; NEON: t1:
 ; NEON: vnmla.f32
 
+; A8U: t1:
+; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
+; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+
 ; A8: t1:
 ; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
-; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
         %2 = fsub float %1, %acc
@@ -28,9 +34,13 @@ entry:
 ; NEON: t2:
 ; NEON: vnmla.f32
 
+; A8U: t2:
+; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
+; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+
 ; A8: t2:
 ; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
-; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
         %2 = fsub float %1, %acc
@@ -45,6 +55,10 @@ entry:
 ; NEON: t3:
 ; NEON: vnmla.f64
 
+; A8U: t3:
+; A8U: vnmul.f64 d
+; A8U: vsub.f64 d
+
 ; A8: t3:
 ; A8: vnmul.f64 d
 ; A8: vsub.f64 d
@@ -62,6 +76,10 @@ entry:
 ; NEON: t4:
 ; NEON: vnmla.f64
 
+; A8U: t4:
+; A8U: vnmul.f64 d
+; A8U: vsub.f64 d
+
 ; A8: t4:
 ; A8: vnmul.f64 d
 ; A8: vsub.f64 d
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 44298b9c5d8d..3c47eb580ff1 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
 
 define i32 @test1(float %a, float %b) {
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index f039e74c8ee6..617b01881a2e 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1U
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
 
 define float @test(float %a, float %b) {
@@ -9,5 +11,6 @@ entry:
 }
 
 ; VFP2: vsub.f32	s
-; NFP1: vsub.f32	d
+; NFP1U: vsub.f32	d
+; NFP1: vsub.f32	s
 ; NFP0: vsub.f32	s
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index 1732df3fa5ef..f88e92796196 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -global-merge-on-const=true | FileCheck %s
 ; Test the ARMGlobalMerge pass.  Use -march=thumb because it has a small
 ; value for the maximum offset (127).
 
@@ -6,6 +6,52 @@
 ; CHECK: g0:
 @g0 = internal global [32 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2 ]
 
+; Global variables marked with "used" attribute must be kept
+; CHECK: g8
+@g8 = internal global i32 0
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @g8 to i8*)], section "llvm.metadata"
+
+; Global used in landing pad instruction must be kept
+; CHECK: ZTIi
+@_ZTIi = internal global i8* null
+
+define i32 @_Z9exceptioni(i32 %arg) {
+bb:
+  %tmp = invoke i32 @_Z14throwSomethingi(i32 %arg)
+          to label %bb9 unwind label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %tmp3 = extractvalue { i8*, i32 } %tmp2, 1
+  %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+  %tmp5 = icmp eq i32 %tmp3, %tmp4
+  br i1 %tmp5, label %bb6, label %bb10
+
+bb6:                                              ; preds = %bb1
+  %tmp7 = extractvalue { i8*, i32 } %tmp2, 0
+  %tmp8 = tail call i8* @__cxa_begin_catch(i8* %tmp7)
+  tail call void @__cxa_end_catch()
+  br label %bb9
+
+bb9:                                              ; preds = %bb6, %bb
+  %res.0 = phi i32 [ 0, %bb6 ], [ %tmp, %bb ]
+  ret i32 %res.0
+
+bb10:                                             ; preds = %bb1
+  resume { i8*, i32 } %tmp2
+}
+
+declare i32 @_Z14throwSomethingi(i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
 ; CHECK: _MergedGlobals:
 @g1 = internal global i32 1
 @g2 = internal global i32 2
@@ -21,3 +67,8 @@
 ; CHECK: _MergedGlobals2
 @g4 = internal global i32 0
 @g5 = internal global i32 0
+
+; Global variables that are constant can be merged together
+; CHECK: _MergedGlobals3
+@g6 = internal constant [12 x i32] zeroinitializer, align 4
+@g7 = internal constant [12 x i32] zeroinitializer, align 4
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
index cb77b09ef4ad..4d75f581a1d2 100644
--- a/test/CodeGen/ARM/lit.local.cfg
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
index 5283f5747d96..248c4bd1beea 100644
--- a/test/CodeGen/ARM/lsr-icmp-imm.ll
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
-; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-block-placement < %s | FileCheck %s
 
 ; LSR should compare against the post-incremented induction variable.
 ; In this case, the immediate value is -2 which requires a cmn instruction.
diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll
new file mode 100644
index 000000000000..c00f0d17c9f5
--- /dev/null
+++ b/test/CodeGen/ARM/neon-spfp.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT
+
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT
+
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT
+
+; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since
+; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected.
+
+@.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1
+
+; CHECK-LINUXA5: main:
+; CHECK-LINUXA8: main:
+; CHECK-LINUXA9: main:
+; CHECK-LINUXA15: main:
+; CHECK-LINUXSWIFT: main:
+; CHECK-UNSAFEA5: main:
+; CHECK-UNSAFEA8: main:
+; CHECK-UNSAFEA9: main:
+; CHECK-UNSAFEA15: main:
+; CHECK-UNSAFESWIFT: main:
+; CHECK-DARWINA5: main:
+; CHECK-DARWINA8: main:
+; CHECK-DARWINA9: main:
+; CHECK-DARWINA15: main:
+; CHECK-DARWINSWIFT: main:
+define i32 @main() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %q.03 = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+  %mul = fmul float %q.03, 0x3FEFAE1480000000
+; CHECK-LINUXA5: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA8: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA9: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA15: vmul.f32 s{{[0-9]*}}
+; Swift is *always* unsafe
+; CHECK-LINUXSWIFT: vmul.f32 d{{[0-9]*}}
+
+; CHECK-UNSAFEA5: vmul.f32 d{{[0-9]*}}
+; CHECK-UNSAFEA8: vmul.f32 d{{[0-9]*}}
+; A9 and A15 don't need this
+; CHECK-UNSAFEA9: vmul.f32 s{{[0-9]*}}
+; CHECK-UNSAFEA15: vmul.f32 s{{[0-9]*}}
+; CHECK-UNSAFESWIFT: vmul.f32 d{{[0-9]*}}
+
+; CHECK-DARWINA5: vmul.f32 d{{[0-9]*}}
+; CHECK-DARWINA8: vmul.f32 d{{[0-9]*}}
+; CHECK-DARWINA9: vmul.f32 s{{[0-9]*}}
+; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}}
+; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}}
+  %conv = fpext float %mul to double
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1
+  %inc = add nsw i32 %i.04, 1
+  %exitcond = icmp eq i32 %inc, 16000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...)
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
index d301c6a4ca90..0a7c8b2b6aae 100644
--- a/test/CodeGen/ARM/neon_minmax.ll
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
 
 define float @fmin_ole(float %x) nounwind {
 ;CHECK: fmin_ole:
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index f93ffe7b339a..94578d82fddc 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -1,10 +1,11 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
-; RUN: grep .Lfoo: %t
-; RUN: egrep bl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep long.*\.Lbaz %t
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; CHECK: .Lfoo:
+; CHECK: bar:
+; CHECK: bl .Lfoo
+; CHECK: .long .Lbaz
+; CHECK: .Lbaz:
 
 define private void @foo() {
         ret void
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index 72d36456fd25..e67b4788a37d 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -165,17 +165,12 @@ declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
 %T1_5 = type <8 x i32>
 ; CHECK: func_cvt5:
 define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
+; CHECK: vmovl.s8
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
   %v0 = load %T0_5* %loadaddr
 ; COST: func_cvt5
-; COST: cost of 24 {{.*}} sext
+; COST: cost of 3 {{.*}} sext
   %r = sext %T0_5 %v0 to %T1_5
   store %T1_5 %r, %T1_5* %storeaddr
   ret void
@@ -186,17 +181,12 @@ define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
 %TA1_5 = type <8 x i32>
 ; CHECK: func_cvt1:
 define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
+; CHECK: vmovl.u8
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
   %v0 = load %TA0_5* %loadaddr
 ; COST: func_cvt1
-; COST: cost of 22 {{.*}} zext
+; COST: cost of 3 {{.*}} zext
   %r = zext %TA0_5 %v0 to %TA1_5
   store %TA1_5 %r, %TA1_5* %storeaddr
   ret void
@@ -228,25 +218,13 @@ define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
 %TT1_5 = type <16 x i32>
 ; CHECK: func_cvt52:
 define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
   %v0 = load %TT0_5* %loadaddr
 ; COST: func_cvt52
-; COST: cost of 48 {{.*}} sext
+; COST: cost of 6 {{.*}} sext
   %r = sext %TT0_5 %v0 to %TT1_5
   store %TT1_5 %r, %TT1_5* %storeaddr
   ret void
@@ -257,25 +235,13 @@ define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
 %TTA1_5 = type <16 x i32>
 ; CHECK: func_cvt12:
 define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
   %v0 = load %TTA0_5* %loadaddr
 ; COST: func_cvt12
-; COST: cost of 44 {{.*}} zext
+; COST: cost of 6 {{.*}} zext
   %r = zext %TTA0_5 %v0 to %TTA1_5
   store %TTA1_5 %r, %TTA1_5* %storeaddr
   ret void
@@ -309,3 +275,56 @@ define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
   store %TT1_51 %r, %TT1_51* %storeaddr
   ret void
 }
+
+; CHECK: sext_v4i16_v4i64:
+define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: sext_v4i16_v4i64
+; COST: cost of 3 {{.*}} sext
+  %r = sext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v4i16_v4i64:
+define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: zext_v4i16_v4i64
+; COST: cost of 3 {{.*}} zext
+  %r = zext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v8i16_v8i64:
+define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: sext_v8i16_v8i64
+; COST: cost of 6 {{.*}} sext
+  %r = sext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v8i16_v8i64:
+define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: zext_v8i16_v8i64
+; COST: cost of 6 {{.*}} zext
+  %r = zext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 679e3f434733..1efbc73650d8 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-code-place | FileCheck %s
+; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
 ; The 1.0e+10 constant is loaded from the constant pool and kept in a register.
diff --git a/test/CodeGen/ARM/zextload_demandedbits.ll b/test/CodeGen/ARM/zextload_demandedbits.ll
new file mode 100644
index 000000000000..3d3269cae236
--- /dev/null
+++ b/test/CodeGen/ARM/zextload_demandedbits.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mtriple="thumbv7-apple-ios3.0.0" | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+%struct.eggs = type { %struct.spam, i16 }
+%struct.spam = type { [3 x i32] }
+%struct.barney = type { [2 x i32], [2 x i32] }
+
+; Make sure that the sext op does not get lost due to ComputeMaskedBits.
+; CHECK: quux
+; CHECK: lsl
+; CHECK: asr
+; CHECK: bl
+; CHECK: pop
+define void @quux(%struct.eggs* %arg) {
+bb:
+  %tmp1 = getelementptr inbounds %struct.eggs* %arg, i32 0, i32 1
+  %0 = load i16* %tmp1, align 2
+  %tobool = icmp eq i16 %0, 0
+  br i1 %tobool, label %bb16, label %bb3
+
+bb3:                                              ; preds = %bb
+  %tmp4 = bitcast i16* %tmp1 to i8*
+  %tmp5 = ptrtoint i16* %tmp1 to i32
+  %tmp6 = shl i32 %tmp5, 20
+  %tmp7 = ashr exact i32 %tmp6, 20
+  %tmp14 = getelementptr inbounds %struct.barney* undef, i32 %tmp7
+  %tmp15 = tail call i32 @widget(%struct.barney* %tmp14, i8* %tmp4, i32 %tmp7)
+  br label %bb16
+
+bb16:                                             ; preds = %bb3, %bb
+  ret void
+}
+
+declare i32 @widget(%struct.barney*, i8*, i32)
diff --git a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
index da1aeb556a39..7ffb734c713a 100644
--- a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
+++ b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s
 ; PR1133
+; XFAIL: hexagon
 define void @test(i32* %X) nounwind  {
 entry:
 	%tmp1 = getelementptr i32* %X, i32 10		; <i32*> [#uses=2]
diff --git a/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll b/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll
new file mode 100644
index 000000000000..a1aed0e3a4b6
--- /dev/null
+++ b/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s
+
+define internal i1 @f(float %s) {
+entry:
+  %c = fcmp ogt float %s, 0x41EFFFFFE0000000
+  ret i1 %c
+}
diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index e709080bfc5a..a135c625fccc 100644
--- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s
-
+; XFAIL: hexagon
 declare { i64, double } @wild()
 
 define void @foo(i64* %p, double* %q) nounwind {
diff --git a/test/CodeGen/Generic/select-cc.ll b/test/CodeGen/Generic/select-cc.ll
index b653e2a46dcf..7510f701b147 100644
--- a/test/CodeGen/Generic/select-cc.ll
+++ b/test/CodeGen/Generic/select-cc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s
 ; PR2504
-
+; XFAIL: hexagon
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
 	%x.lo = extractelement <2 x double> %x, i32 0		; <double> [#uses=1]
 	%x.lo.ge = fcmp oge double %x.lo, 0.000000e+00		; <i1> [#uses=1]
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index 84814a1c128f..bc7c7d00a11c 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
 ; RUN: llc < %s
-
+; XFAIL: hexagon
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/Hexagon/ashift-left-right.ll b/test/CodeGen/Hexagon/ashift-left-right.ll
new file mode 100644
index 000000000000..7c41bc7bbf3b
--- /dev/null
+++ b/test/CodeGen/Hexagon/ashift-left-right.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) nounwind readnone {
+; CHECK: lsl
+; CHECK: aslh
+entry:
+  %shl1 = shl i32 16, %a
+  %shl2 = shl i32 %b, 16
+  %ret = mul i32 %shl1, %shl2
+  ret i32 %ret
+}
+
+define i32 @bar(i32 %a, i32 %b) nounwind readnone {
+; CHECK: asrh
+; CHECK: lsr
+entry:
+  %shl1 = ashr i32 16, %a
+  %shl2 = ashr i32 %b, 16
+  %ret = mul i32 %shl1, %shl2
+  ret i32 %ret
+}
diff --git a/test/CodeGen/Hexagon/gp-rel.ll b/test/CodeGen/Hexagon/gp-rel.ll
new file mode 100644
index 000000000000..561869e8ef35
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-rel.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that gp-relative instructions are being generated.
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+
+define i32 @foo(i32 %p) #0 {
+entry:
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#a)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#b)
+; CHECK: if{{ *}}(p{{[0-3]}}) memw(##c){{ *}}={{ *}}r{{[0-9]+}}
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %add = add nsw i32 %1, %0
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:
+  %.pre = load i32* @c, align 4
+  br label %if.end
+
+if.then:
+  %add1 = add nsw i32 %add, %0
+  store i32 %add1, i32* @c, align 4
+  br label %if.end
+
+if.end:
+  %2 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add1, %if.then ]
+  %cmp2 = icmp eq i32 %add, %2
+  %sel1 = select i1 %cmp2, i32 %2, i32 %1
+  ret i32 %sel1
+}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index eaffa0797a6c..c2e8153b7dff 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -33,7 +33,6 @@ for.end:                                          ; preds = %for.body
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
diff --git a/test/CodeGen/Hexagon/memops.ll b/test/CodeGen/Hexagon/memops.ll
new file mode 100644
index 000000000000..5498848d8560
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops.ll
@@ -0,0 +1,1369 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+define void @memop_unsigned_char_add5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add5_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_short_add5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add5_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_int_add5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add5_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+!2 = metadata !{metadata !"short", metadata !0}
+!3 = metadata !{metadata !"int", metadata !0}
diff --git a/test/CodeGen/Hexagon/memops1.ll b/test/CodeGen/Hexagon/memops1.ll
new file mode 100644
index 000000000000..2babdc848ddc
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops1.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i32* %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1
+  %p.addr = alloca i32*, align 4
+  store i32* %p, i32** %p.addr, align 4
+  %0 = load i32** %p.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %0, i32 10
+  %1 = load i32* %add.ptr, align 4
+  %sub = sub nsw i32 %1, 1
+  store i32 %sub, i32* %add.ptr, align 4
+  ret void
+}
+
+define void @g(i32* %p, i32 %i) nounwind {
+entry:
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1
+  %p.addr = alloca i32*, align 4
+  %i.addr = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32** %p.addr, align 4
+  %1 = load i32* %i.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %0, i32 %1
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 10
+  %2 = load i32* %add.ptr1, align 4
+  %sub = sub nsw i32 %2, 1
+  store i32 %sub, i32* %add.ptr1, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/memops2.ll b/test/CodeGen/Hexagon/memops2.ll
new file mode 100644
index 000000000000..b1b25445c029
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops2.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
+  %add.ptr = getelementptr inbounds i16* %p, i32 10
+  %0 = load i16* %add.ptr, align 2, !tbaa !0
+  %conv2 = zext i16 %0 to i32
+  %sub = add nsw i32 %conv2, 65535
+  %conv1 = trunc i32 %sub to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !0
+  ret void
+}
+
+define void @g(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
+  %add.ptr.sum = add i32 %i, 10
+  %add.ptr1 = getelementptr inbounds i16* %p, i32 %add.ptr.sum
+  %0 = load i16* %add.ptr1, align 2, !tbaa !0
+  %conv3 = zext i16 %0 to i32
+  %sub = add nsw i32 %conv3, 65535
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr1, align 2, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/memops3.ll b/test/CodeGen/Hexagon/memops3.ll
new file mode 100644
index 000000000000..5b8bd6c87bfb
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops3.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
+  %add.ptr = getelementptr inbounds i8* %p, i32 10
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %sub = add nsw i32 %conv, 255
+  %conv1 = trunc i32 %sub to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @g(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
+  %add.ptr.sum = add i32 %i, 10
+  %add.ptr1 = getelementptr inbounds i8* %p, i32 %add.ptr.sum
+  %0 = load i8* %add.ptr1, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %sub = add nsw i32 %conv, 255
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr1, align 1, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
index 3f244f689111..d5162b964a08 100644
--- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -12,11 +12,10 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
-!11 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120996)", i1 false, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -25,3 +24,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !8 = metadata !{i32 786443, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
 !9 = metadata !{i32 3, i32 11, metadata !8, null}
 !10 = metadata !{i32 4, i32 2, metadata !8, null}
+!11 = metadata !{metadata !0}
+!12 = metadata !{metadata !"/tmp/x.c", metadata !"/Users/manav"}
diff --git a/test/CodeGen/MBlaze/DbgValueOtherTargets.test b/test/CodeGen/MBlaze/DbgValueOtherTargets.test
index 4032d7b81c2f..8b850f51105b 100644
--- a/test/CodeGen/MBlaze/DbgValueOtherTargets.test
+++ b/test/CodeGen/MBlaze/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=mblaze -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=mblaze -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
index e236200d7572..ff4928de4b9c 100644
--- a/test/CodeGen/MBlaze/lit.local.cfg
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'MBlaze' in targets:
diff --git a/test/CodeGen/MSP430/DbgValueOtherTargets.test b/test/CodeGen/MSP430/DbgValueOtherTargets.test
index f8c747ec92de..7adfbcafa35b 100644
--- a/test/CodeGen/MSP430/DbgValueOtherTargets.test
+++ b/test/CodeGen/MSP430/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=msp430 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=msp430 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
index 972732ebad30..0ca9fc9c6912 100644
--- a/test/CodeGen/MSP430/lit.local.cfg
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'MSP430' in targets:
diff --git a/test/CodeGen/Mips/DbgValueOtherTargets.test b/test/CodeGen/Mips/DbgValueOtherTargets.test
index 9c351ace680e..da20e7ef5224 100644
--- a/test/CodeGen/Mips/DbgValueOtherTargets.test
+++ b/test/CodeGen/Mips/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=mips -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=mips -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll
new file mode 100644
index 000000000000..c2aeab5c10b1
--- /dev/null
+++ b/test/CodeGen/Mips/dsp-patterns.ll
@@ -0,0 +1,205 @@
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=R1
+; RUN: llc -march=mips -mattr=dspr2 < %s | FileCheck %s -check-prefix=R2
+
+; R1: test_lbux:
+; R1: lbux ${{[0-9]+}}
+
+define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i8* %b, i32 %i
+  %0 = load i8* %add.ptr, align 1
+  ret i8 %0
+}
+
+; R1: test_lhx:
+; R1: lhx ${{[0-9]+}}
+
+define signext i16 @test_lhx(i16* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i16* %b, i32 %i
+  %0 = load i16* %add.ptr, align 2
+  ret i16 %0
+}
+
+; R1: test_lwx:
+; R1: lwx ${{[0-9]+}}
+
+define i32 @test_lwx(i32* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i32* %b, i32 %i
+  %0 = load i32* %add.ptr, align 4
+  ret i32 %0
+}
+
+; R1: test_add_v2q15_:
+; R1: addq.ph ${{[0-9]+}}
+
+define { i32 } @test_add_v2q15_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %add = add <2 x i16> %0, %1
+  %2 = bitcast <2 x i16> %add to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: test_sub_v2q15_:
+; R1: subq.ph ${{[0-9]+}}
+
+define { i32 } @test_sub_v2q15_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %sub = sub <2 x i16> %0, %1
+  %2 = bitcast <2 x i16> %sub to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R2: test_mul_v2q15_:
+; R2: mul.ph ${{[0-9]+}}
+
+; mul.ph is an R2 instruction. Check that multiply node gets expanded.
+; R1: test_mul_v2q15_:
+; R1: mul ${{[0-9]+}}
+; R1: mul ${{[0-9]+}}
+
+define { i32 } @test_mul_v2q15_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %mul = mul <2 x i16> %0, %1
+  %2 = bitcast <2 x i16> %mul to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: test_add_v4i8_:
+; R1: addu.qb ${{[0-9]+}}
+
+define { i32 } @test_add_v4i8_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = bitcast i32 %b.coerce to <4 x i8>
+  %add = add <4 x i8> %0, %1
+  %2 = bitcast <4 x i8> %add to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: test_sub_v4i8_:
+; R1: subu.qb ${{[0-9]+}}
+
+define { i32 } @test_sub_v4i8_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = bitcast i32 %b.coerce to <4 x i8>
+  %sub = sub <4 x i8> %0, %1
+  %2 = bitcast <4 x i8> %sub to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; DSP-ASE doesn't have a v4i8 multiply instruction. Check that multiply node gets expanded.
+; R2: test_mul_v4i8_:
+; R2: mul ${{[0-9]+}}
+; R2: mul ${{[0-9]+}}
+; R2: mul ${{[0-9]+}}
+; R2: mul ${{[0-9]+}}
+
+define { i32 } @test_mul_v4i8_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = bitcast i32 %b.coerce to <4 x i8>
+  %mul = mul <4 x i8> %0, %1
+  %2 = bitcast <4 x i8> %mul to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: test_addsc:
+; R1: addsc ${{[0-9]+}}
+; R1: addwc ${{[0-9]+}}
+
+define i64 @test_addsc(i64 %a, i64 %b) {
+entry:
+  %add = add nsw i64 %b, %a
+  ret i64 %add
+}
+
+; R1: shift1_v2i16_shl_:
+; R1: shll.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
+
+define { i32 } @shift1_v2i16_shl_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %shl = shl <2 x i16> %0, <i16 15, i16 15>
+  %1 = bitcast <2 x i16> %shl to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v2i16_sra_:
+; R1: shra.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
+
+define { i32 } @shift1_v2i16_sra_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %shr = ashr <2 x i16> %0, <i16 15, i16 15>
+  %1 = bitcast <2 x i16> %shr to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v2ui16_srl_:
+; R1-NOT: shrl.ph
+; R2: shift1_v2ui16_srl_:
+; R2: shrl.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
+
+define { i32 } @shift1_v2ui16_srl_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %shr = lshr <2 x i16> %0, <i16 15, i16 15>
+  %1 = bitcast <2 x i16> %shr to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v4i8_shl_:
+; R1: shll.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
+
+define { i32 } @shift1_v4i8_shl_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %shl = shl <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
+  %1 = bitcast <4 x i8> %shl to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v4i8_sra_:
+; R1-NOT: shra.qb
+; R2: shift1_v4i8_sra_:
+; R2: shra.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
+
+define { i32 } @shift1_v4i8_sra_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %shr = ashr <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
+  %1 = bitcast <4 x i8> %shr to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v4ui8_srl_:
+; R1: shrl.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
+
+define { i32 } @shift1_v4ui8_srl_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %shr = lshr <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
+  %1 = bitcast <4 x i8> %shr to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll
index fe8a40475c2d..c3003b34b162 100644
--- a/test/CodeGen/Mips/eh-return32.ll
+++ b/test/CodeGen/Mips/eh-return32.ll
@@ -37,7 +37,9 @@ entry:
 ; CHECK:        lw      $7, [[offset3]]($sp)
 
 ; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
 ; CHECK:        addiu   $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
 ; CHECK:        jr      $ra
 ; CHECK:        addu    $sp, $sp, $3
@@ -74,7 +76,9 @@ entry:
 ; CHECK:        lw      $7, [[offset3]]($sp)
 
 ; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
 ; CHECK:        addiu   $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
 ; CHECK:        jr      $ra
 ; CHECK:        addu    $sp, $sp, $3
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
index 0b76b95e24c7..373a9a114453 100644
--- a/test/CodeGen/Mips/eh-return64.ll
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -37,7 +37,9 @@ entry:
 ; CHECK:        ld      $7, [[offset3]]($sp)
 
 ; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
 ; CHECK:        daddiu  $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
 ; CHECK:        jr      $ra
 ; CHECK:        daddu   $sp, $sp, $3
@@ -75,7 +77,9 @@ entry:
 ; CHECK:        ld      $7, [[offset3]]($sp)
 
 ; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
 ; CHECK:        daddiu  $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
 ; CHECK:        jr      $ra
 ; CHECK:        daddu   $sp, $sp, $3
diff --git a/test/CodeGen/Mips/fpneeded.ll b/test/CodeGen/Mips/fpneeded.ll
new file mode 100644
index 000000000000..623883a0d5c0
--- /dev/null
+++ b/test/CodeGen/Mips/fpneeded.ll
@@ -0,0 +1,149 @@
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-os16  | FileCheck %s -check-prefix=32
+
+@x = global float 1.000000e+00, align 4
+@y = global float 2.000000e+00, align 4
+@zz = common global float 0.000000e+00, align 4
+@z = common global float 0.000000e+00, align 4
+
+define float @fv() #0 {
+entry:
+  ret float 1.000000e+00
+}
+
+; 32: 	.set	nomips16                  # @fv
+; 32: 	.ent	fv
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	fv
+
+define double @dv() #0 {
+entry:
+  ret double 2.000000e+00
+}
+
+; 32: 	.set	nomips16                  # @dv
+; 32: 	.ent	dv
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	dv
+
+define void @vf(float %x) #0 {
+entry:
+  %x.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @vf
+; 32: 	.ent	vf
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	vf
+
+define void @vd(double %x) #0 {
+entry:
+  %x.addr = alloca double, align 8
+  store double %x, double* %x.addr, align 8
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @vd
+; 32: 	.ent	vd
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	vd
+
+define void @foo1() #0 {
+entry:
+  store float 1.000000e+00, float* @zz, align 4
+  %0 = load float* @y, align 4
+  %1 = load float* @x, align 4
+  %add = fadd float %0, %1
+  store float %add, float* @z, align 4
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @foo1
+; 32: 	.ent	foo1
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	foo1
+
+define void @foo2() #0 {
+entry:
+  %0 = load float* @x, align 4
+  call void @vf(float %0)
+  ret void
+}
+
+
+; 32: 	.set	nomips16                  # @foo2
+; 32: 	.ent	foo2
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	foo2
+
+define void @foo3() #0 {
+entry:
+  %call = call float @fv()
+  store float %call, float* @x, align 4
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @foo3
+; 32: 	.ent	foo3
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	foo3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+define void @vv() #0 {
+entry:
+  ret void
+}
+
+; 32: 	.set	mips16                  # @vv
+; 32: 	.ent	vv
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	vv
+
+
+
diff --git a/test/CodeGen/Mips/fpnotneeded.ll b/test/CodeGen/Mips/fpnotneeded.ll
new file mode 100644
index 000000000000..dc2ec10817f3
--- /dev/null
+++ b/test/CodeGen/Mips/fpnotneeded.ll
@@ -0,0 +1,77 @@
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-os16  | FileCheck %s -check-prefix=32
+
+@i = global i32 1, align 4
+@f = global float 1.000000e+00, align 4
+
+define void @vv() #0 {
+entry:
+  ret void
+}
+
+; 32: 	.set	mips16                  # @vv
+; 32: 	.ent	vv
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	vv
+
+define i32 @iv() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  ret i32 %0
+}
+
+; 32: 	.set	mips16                  # @iv
+; 32: 	.ent	iv
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	iv
+
+define void @vif(i32 %i, float %f) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  %f.addr = alloca float, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store float %f, float* %f.addr, align 4
+  ret void
+}
+
+; 32: 	.set	mips16                  # @vif
+; 32: 	.ent	vif
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	vif
+
+define void @foo() #0 {
+entry:
+  store float 2.000000e+00, float* @f, align 4
+  ret void
+}
+
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
+define float @fv() #0 {
+entry:
+  ret float 1.000000e+00
+}
+
+; 32: 	.set	nomips16                  # @fv
+; 32: 	.ent	fv
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	fv
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index 1c7c4437b892..a08a0243b8b9 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s
 
+; Simple memory
 @g1 = external global i32
 
 define i32 @f1(i32 %x) nounwind {
@@ -21,3 +22,42 @@ entry:
   ret i32 %0
 }
 
+; "D": Second word of double word. This works for any memory element
+; double or single.
+; CHECK: #APP
+; CHECK-NEXT: lw ${{[0-9]+}},4(${{[0-9]+}});
+; CHECK-NEXT: #NO_APP
+
+; No "D": First word of double word. This works for any memory element 
+; double or single.
+; CHECK: #APP
+; CHECK-NEXT: lw ${{[0-9]+}},0(${{[0-9]+}});
+; CHECK-NEXT: #NO_APP
+
+;int b[8] = {0,1,2,3,4,5,6,7};
+;int main()
+;{
+;  int i;
+; 
+;  // The first word. Notice, no 'D'
+;  { asm (
+;    "lw    %0,%1;\n"
+;    : "=r" (i) : "m" (*(b+4)));}
+; 
+;  // The second word
+;  { asm (
+;    "lw    %0,%D1;\n"
+;    : "=r" (i) "m" (*(b+4)));}
+;}
+
+@b = common global [20 x i32] zeroinitializer, align 4
+
+define void @main() {
+entry:
+  tail call void asm sideeffect "    lw    $0,${1:D};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32]* @b, i32 0, i32 3))
+  tail call void asm sideeffect "    lw    $0,${1};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32]* @b, i32 0, i32 3))
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
index 0587d3243e6b..e157c540b538 100644
--- a/test/CodeGen/Mips/lit.local.cfg
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index 0aeabb30e289..0dbb2c27b8f9 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -1,6 +1,9 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=DSP
+; RUN: llc -march=mips -mcpu=mips16 < %s
 
-; CHECK: madd 
+; 32: madd ${{[0-9]+}}
+; DSP: madd $ac
 define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -11,7 +14,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: maddu
+; 32: maddu ${{[0-9]+}}
+; DSP: maddu $ac
 define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %a to i64
@@ -22,7 +26,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: madd
+; 32: madd ${{[0-9]+}}
+; DSP: madd $ac
 define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -32,7 +37,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: msub
+; 32: msub ${{[0-9]+}}
+; DSP: msub $ac
 define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %c to i64
@@ -43,7 +49,8 @@ entry:
   ret i64 %sub
 }
 
-; CHECK: msubu 
+; 32: msubu ${{[0-9]+}}
+; DSP: msubu $ac
 define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %c to i64
@@ -54,7 +61,8 @@ entry:
   ret i64 %sub
 }
 
-; CHECK: msub 
+; 32: msub ${{[0-9]+}}
+; DSP: msub $ac
 define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
diff --git a/test/CodeGen/Mips/mips16_32_1.ll b/test/CodeGen/Mips/mips16_32_1.ll
new file mode 100644
index 000000000000..6f4826ea9600
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_1.ll
@@ -0,0 +1,14 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s -mips-mixed-16-32  | FileCheck %s 
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=pic -O3 < %s -mips-mixed-16-32  | FileCheck %s 
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; CHECK: 	.set	mips16                  # @foo
+; CHECK:	.ent	foo
+; CHECK:	save	{{.+}}
+; CHECK:	restore	{{.+}} 
+; CHECK:	.end	foo
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_10.ll b/test/CodeGen/Mips/mips16_32_10.ll
new file mode 100644
index 000000000000..330dbfec63b9
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_10.ll
@@ -0,0 +1,59 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+; 16: 	.set	nomips16                  # @foo
+; 16: 	.ent	foo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	foo
+
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @nofoo
+; 16: 	.ent	nofoo
+
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	nofoo
+
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	nomips16                  # @main
+; 16: 	.ent	main
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	main
+
+
+
+
+
+
+
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_3.ll b/test/CodeGen/Mips/mips16_32_3.ll
new file mode 100644
index 000000000000..8874a8872534
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_3.ll
@@ -0,0 +1,70 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	mips16                  # @main
+; 16: 	.ent	main
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	main
+; 32: 	.set	nomips16                  # @main
+; 32: 	.ent	main
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	addiu	$2, $zero, 0
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	main
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_4.ll b/test/CodeGen/Mips/mips16_32_4.ll
new file mode 100644
index 000000000000..cdaed6c71be0
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_4.ll
@@ -0,0 +1,65 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	mips16                  # @main
+; 16: 	.ent	main
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	main
+; 32: 	.set	mips16                  # @main
+; 32: 	.ent	main
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	main
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_5.ll b/test/CodeGen/Mips/mips16_32_5.ll
new file mode 100644
index 000000000000..45e0bf49ddd2
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_5.ll
@@ -0,0 +1,80 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	nomips16                  # @main
+; 16: 	.ent	main
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	addiu	$2, $zero, 0
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	main
+
+; 32: 	.set	nomips16                  # @main
+; 32: 	.ent	main
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	addiu	$2, $zero, 0
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	main
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_6.ll b/test/CodeGen/Mips/mips16_32_6.ll
new file mode 100644
index 000000000000..f4b8e7a91adc
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_6.ll
@@ -0,0 +1,86 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	nomips16                  # @foo
+; 32: 	.ent	foo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end    foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	nomips16                  # @main
+; 16: 	.ent	main
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	addiu	$2, $zero, 0
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	main
+
+; 32: 	.set	nomips16                  # @main
+; 32: 	.ent	main
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	addiu	$2, $zero, 0
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	main
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_7.ll b/test/CodeGen/Mips/mips16_32_7.ll
new file mode 100644
index 000000000000..f8726eadc70c
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_7.ll
@@ -0,0 +1,76 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	nomips16                  # @foo
+; 32: 	.ent	foo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end    foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	mips16                  # @main
+; 16: 	.ent	main
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	main
+
+; 32: 	.set	mips16                  # @main
+; 32: 	.ent	main
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	main
+
+
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_8.ll b/test/CodeGen/Mips/mips16_32_8.ll
new file mode 100644
index 000000000000..e51f296f9df3
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_8.ll
@@ -0,0 +1,74 @@
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+@x = global float 1.000000e+00, align 4
+@y = global float 0x4007333340000000, align 4
+@i = common global i32 0, align 4
+@f = common global float 0.000000e+00, align 4
+@.str = private unnamed_addr constant [8 x i8] c"f = %f\0A\00", align 1
+@.str1 = private unnamed_addr constant [11 x i8] c"hello %i \0A\00", align 1
+@.str2 = private unnamed_addr constant [13 x i8] c"goodbye %i \0A\00", align 1
+
+define void @foo() #0 {
+entry:
+  store i32 10, i32* @i, align 4
+  ret void
+}
+
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+
+define void @nofoo() #1 {
+entry:
+  store i32 20, i32* @i, align 4
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %add = fadd float %0, %1
+  store float %add, float* @f, align 4
+  %2 = load float* @f, align 4
+  %conv = fpext float %2 to double
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), double %conv)
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	add.s	{{.+}}
+; 32:	mfc1    {{.+}}
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+declare i32 @printf(i8*, ...) #2
+
+define i32 @main() #3 {
+entry:
+  call void @foo()
+  %0 = load i32* @i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str1, i32 0, i32 0), i32 %0)
+  call void @nofoo()
+  %1 = load i32* @i, align 4
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str2, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+; 32: 	.set	nomips16                  # @main
+; 32: 	.ent	main
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	main
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_9.ll b/test/CodeGen/Mips/mips16_32_9.ll
new file mode 100644
index 000000000000..f5ff36849015
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_9.ll
@@ -0,0 +1,51 @@
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 32: 	.set	mips16                  # @main
+; 32: 	.ent	main
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	main
+
+
+
+
+
+
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll
index d93f688ef1fd..39d52d382663 100644
--- a/test/CodeGen/NVPTX/annotations.ll
+++ b/test/CodeGen/NVPTX/annotations.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
deleted file mode 100644
index 73c77f56bc9c..000000000000
--- a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-;; These tests should run for all targets
-
-;;===-- Basic instruction selection tests ---------------------------------===;;
-
-
-;;; f64
-
-define double @fadd_f64(double %a, double %b) {
-; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fadd double %a, %b
-  ret double %ret
-}
-
-define double @fsub_f64(double %a, double %b) {
-; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fsub double %a, %b
-  ret double %ret
-}
-
-define double @fmul_f64(double %a, double %b) {
-; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fmul double %a, %b
-  ret double %ret
-}
-
-define double @fdiv_f64(double %a, double %b) {
-; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fdiv double %a, %b
-  ret double %ret
-}
-
-;; PTX does not have a floating-point rem instruction
-
-
-;;; f32
-
-define float @fadd_f32(float %a, float %b) {
-; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fadd float %a, %b
-  ret float %ret
-}
-
-define float @fsub_f32(float %a, float %b) {
-; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fsub float %a, %b
-  ret float %ret
-}
-
-define float @fmul_f32(float %a, float %b) {
-; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fmul float %a, %b
-  ret float %ret
-}
-
-define float @fdiv_f32(float %a, float %b) {
-; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fdiv float %a, %b
-  ret float %ret
-}
-
-;; PTX does not have a floating-point rem instruction
diff --git a/test/CodeGen/NVPTX/arithmetic-int.ll b/test/CodeGen/NVPTX/arithmetic-int.ll
index 529f84900afd..8d73b7e6c4c6 100644
--- a/test/CodeGen/NVPTX/arithmetic-int.ll
+++ b/test/CodeGen/NVPTX/arithmetic-int.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll
index 968203e5f70e..190a1462adbc 100644
--- a/test/CodeGen/NVPTX/calling-conv.ll
+++ b/test/CodeGen/NVPTX/calling-conv.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll
index 12fc7548212c..16af0a336ddc 100644
--- a/test/CodeGen/NVPTX/compare-int.ll
+++ b/test/CodeGen/NVPTX/compare-int.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/convert-fp.ll b/test/CodeGen/NVPTX/convert-fp.ll
index 21c84379b062..1882121fa724 100644
--- a/test/CodeGen/NVPTX/convert-fp.ll
+++ b/test/CodeGen/NVPTX/convert-fp.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/convert-int-sm10.ll b/test/CodeGen/NVPTX/convert-int-sm10.ll
deleted file mode 100644
index 20716f982e3b..000000000000
--- a/test/CodeGen/NVPTX/convert-int-sm10.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-
-; i16
-
-define i16 @cvt_i16_i32(i32 %x) {
-; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i32 %x to i16
-  ret i16 %a
-}
-
-define i16 @cvt_i16_i64(i64 %x) {
-; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i64 %x to i16
-  ret i16 %a
-}
-
-
-
-; i32
-
-define i32 @cvt_i32_i16(i16 %x) {
-; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: ret
-  %a = zext i16 %x to i32
-  ret i32 %a
-}
-
-define i32 @cvt_i32_i64(i64 %x) {
-; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i64 %x to i32
-  ret i32 %a
-}
-
-
-
-; i64
-
-define i64 @cvt_i64_i16(i16 %x) {
-; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: ret
-  %a = zext i16 %x to i64
-  ret i64 %a
-}
-
-define i64 @cvt_i64_i32(i32 %x) {
-; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
-  %a = zext i32 %x to i64
-  ret i64 %a
-}
diff --git a/test/CodeGen/NVPTX/intrinsic-old.ll b/test/CodeGen/NVPTX/intrinsic-old.ll
index 1c9879c4178b..53a28f333798 100644
--- a/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll
index afab60ca96a8..8b0357be87cb 100644
--- a/test/CodeGen/NVPTX/intrinsics.ll
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
index d1f5093df223..3265868d3c52 100644
--- a/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
 
 
diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll
new file mode 100644
index 000000000000..0d02194651e3
--- /dev/null
+++ b/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0
+; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1
+
+@str = private addrspace(4) unnamed_addr constant [8 x i8] c"USE_MUL\00"
+
+declare i32 @__nvvm_reflect(i8*)
+declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+
+define float @foo(float %a, float %b) {
+; USE_MUL_0: define float @foo
+; USE_MUL_0-NOT: call i32 @__nvvm_reflect
+; USE_MUL_1: define float @foo
+; USE_MUL_1-NOT: call i32 @__nvvm_reflect
+  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
+  %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+  %cmp = icmp ugt i32 %reflect, 0
+  br i1 %cmp, label %use_mul, label %use_add
+
+use_mul:
+; USE_MUL_1: fmul float %a, %b
+; USE_MUL_0-NOT: fadd float %a, %b
+  %ret1 = fmul float %a, %b
+  br label %exit
+
+use_add:
+; USE_MUL_0: fadd float %a, %b
+; USE_MUL_1-NOT: fmul float %a, %b
+  %ret2 = fadd float %a, %b
+  br label %exit
+
+exit:
+  %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
+  ret float %ret
+}
diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll
new file mode 100644
index 000000000000..03ab635e73b9
--- /dev/null
+++ b/test/CodeGen/NVPTX/sched1.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Ensure source scheduling is working
+
+define void @foo(i32* %a) {
+; CHECK: .func foo
+; CHECK: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+  %ptr0 = getelementptr i32* %a, i32 0
+  %val0 = load i32* %ptr0
+  %ptr1 = getelementptr i32* %a, i32 1
+  %val1 = load i32* %ptr1
+  %ptr2 = getelementptr i32* %a, i32 2
+  %val2 = load i32* %ptr2
+  %ptr3 = getelementptr i32* %a, i32 3
+  %val3 = load i32* %ptr3
+
+  %t0 = add i32 %val0, %val1
+  %t1 = add i32 %t0, %val2
+  %t2 = add i32 %t1, %val3
+
+  store i32 %t2, i32* %a
+
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll
new file mode 100644
index 000000000000..71a9a4963faf
--- /dev/null
+++ b/test/CodeGen/NVPTX/sched2.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+define void @foo(<2 x i32>* %a) {
+; CHECK: .func foo
+; CHECK: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+  %ptr0 = getelementptr <2 x i32>* %a, i32 0
+  %val0 = load <2 x i32>* %ptr0
+  %ptr1 = getelementptr <2 x i32>* %a, i32 1
+  %val1 = load <2 x i32>* %ptr1
+  %ptr2 = getelementptr <2 x i32>* %a, i32 2
+  %val2 = load <2 x i32>* %ptr2
+  %ptr3 = getelementptr <2 x i32>* %a, i32 3
+  %val3 = load <2 x i32>* %ptr3
+
+  %t0 = add <2 x i32> %val0, %val1
+  %t1 = add <2 x i32> %t0, %val2
+  %t2 = add <2 x i32> %t1, %val3
+
+  store <2 x i32> %t2, <2 x i32>* %a
+
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/sm-version-10.ll b/test/CodeGen/NVPTX/sm-version-10.ll
deleted file mode 100644
index 9324a3780986..000000000000
--- a/test/CodeGen/NVPTX/sm-version-10.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-
-; CHECK: .target sm_10
-
diff --git a/test/CodeGen/NVPTX/sm-version-11.ll b/test/CodeGen/NVPTX/sm-version-11.ll
deleted file mode 100644
index 9033a4eba5e4..000000000000
--- a/test/CodeGen/NVPTX/sm-version-11.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_11 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_11 | FileCheck %s
-
-
-; CHECK: .target sm_11
-
diff --git a/test/CodeGen/NVPTX/sm-version-12.ll b/test/CodeGen/NVPTX/sm-version-12.ll
deleted file mode 100644
index d8ee85c9010e..000000000000
--- a/test/CodeGen/NVPTX/sm-version-12.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_12 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_12 | FileCheck %s
-
-
-; CHECK: .target sm_12
-
diff --git a/test/CodeGen/NVPTX/sm-version-13.ll b/test/CodeGen/NVPTX/sm-version-13.ll
deleted file mode 100644
index ad67d642ce30..000000000000
--- a/test/CodeGen/NVPTX/sm-version-13.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_13 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_13 | FileCheck %s
-
-
-; CHECK: .target sm_13
-
diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll
index 54e04ae6106d..0b26d802df84 100644
--- a/test/CodeGen/NVPTX/st-addrspace.ll
+++ b/test/CodeGen/NVPTX/st-addrspace.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
 
 
diff --git a/test/CodeGen/NVPTX/tuple-literal.ll b/test/CodeGen/NVPTX/tuple-literal.ll
index 5c0cb2c15c2b..2b1f2c4b6680 100644
--- a/test/CodeGen/NVPTX/tuple-literal.ll
+++ b/test/CodeGen/NVPTX/tuple-literal.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_13
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
 
 define ptx_device void @test_function({i8, i8}*) {
   ret void
diff --git a/test/CodeGen/NVPTX/vector-args.ll b/test/CodeGen/NVPTX/vector-args.ll
new file mode 100644
index 000000000000..80deae46935a
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-args.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+define float @foo(<2 x float> %a) {
+; CHECK: .func (.param .b32 func_retval0) foo
+; CHECK: .param .align 8 .b8 foo_param_0[8]
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+  %t1 = fmul <2 x float> %a, %a
+  %t2 = extractelement <2 x float> %t1, i32 0
+  %t3 = extractelement <2 x float> %t1, i32 1
+  %t4 = fadd float %t2, %t3
+  ret float %t4
+}
+
+
+define float @bar(<4 x float> %a) {
+; CHECK: .func (.param .b32 func_retval0) bar
+; CHECK: .param .align 16 .b8 bar_param_0[16]
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+  %t1 = fmul <4 x float> %a, %a
+  %t2 = extractelement <4 x float> %t1, i32 0
+  %t3 = extractelement <4 x float> %t1, i32 1
+  %t4 = fadd float %t2, %t3
+  ret float %t4
+}
diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll
index f5a1795e3c28..58882bf16668 100644
--- a/test/CodeGen/NVPTX/vector-loads.ll
+++ b/test/CodeGen/NVPTX/vector-loads.ll
@@ -9,7 +9,7 @@
 
 define void @foo(<2 x float>* %a) {
 ; CHECK: .func foo
-; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
   %t1 = load <2 x float>* %a
   %t2 = fmul <2 x float> %t1, %t1
   store <2 x float> %t2, <2 x float>* %a
@@ -18,7 +18,7 @@ define void @foo(<2 x float>* %a) {
 
 define void @foo2(<4 x float>* %a) {
 ; CHECK: .func foo2
-; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
   %t1 = load <4 x float>* %a
   %t2 = fmul <4 x float> %t1, %t1
   store <4 x float> %t2, <4 x float>* %a
@@ -27,8 +27,8 @@ define void @foo2(<4 x float>* %a) {
 
 define void @foo3(<8 x float>* %a) {
 ; CHECK: .func foo3
-; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
-; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16];
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
   %t1 = load <8 x float>* %a
   %t2 = fmul <8 x float> %t1, %t1
   store <8 x float> %t2, <8 x float>* %a
@@ -39,7 +39,7 @@ define void @foo3(<8 x float>* %a) {
 
 define void @foo4(<2 x i32>* %a) {
 ; CHECK: .func foo4
-; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
   %t1 = load <2 x i32>* %a
   %t2 = mul <2 x i32> %t1, %t1
   store <2 x i32> %t2, <2 x i32>* %a
@@ -48,7 +48,7 @@ define void @foo4(<2 x i32>* %a) {
 
 define void @foo5(<4 x i32>* %a) {
 ; CHECK: .func foo5
-; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
   %t1 = load <4 x i32>* %a
   %t2 = mul <4 x i32> %t1, %t1
   store <4 x i32> %t2, <4 x i32>* %a
@@ -57,8 +57,8 @@ define void @foo5(<4 x i32>* %a) {
 
 define void @foo6(<8 x i32>* %a) {
 ; CHECK: .func foo6
-; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
-; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16];
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
   %t1 = load <8 x i32>* %a
   %t2 = mul <8 x i32> %t1, %t1
   store <8 x i32> %t2, <8 x i32>* %a
diff --git a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
index ea7de9847ea7..ba5c8c172f11 100644
--- a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
+++ b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=ppc64 | grep lwzx
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+; Temporarily XFAIL this test until LSA stops creating single-use
+; virtual base registers.
+; XFAIL: *
 
         %struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] }
         %struct.__mutex_t = type { i32 }
@@ -11,6 +15,10 @@ entry:
         %tmp = load i32* %ttype, align 4                ; <i32> [#uses=1]
         %tmp1 = call i32 (...)* @bork( i32 %tmp )               ; <i32> [#uses=0]
         ret void
+
+; CHECK: @foo
+; CHECK: lwzx
+; CHECK: blr
 }
 
 declare i32 @bork(...)
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 0da6e4351bd2..097611a7619c 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -2,21 +2,21 @@
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
-; This formerly used R0 for both the stack address and CR.
 
 define void @foo() nounwind {
 entry:
-;CHECK:  mfcr r0
-;CHECK:  lis r2, 1
-;CHECK:  rlwinm r0, r0, 8, 0, 31
-;CHECK:  ori r2, r2, 34540
-;CHECK:  stwx r0, r1, r2
-; Make sure that the register scavenger returns the same temporary register.
-;CHECK:  lis r2, 1
-;CHECK:  mfcr r0
-;CHECK:  ori r2, r2, 34536
-;CHECK:  rlwinm r0, r0, 12, 0, 31
-;CHECK:  stwx r0, r1, r2
+; Note that part of what is being checked here is proper register reuse.
+; CHECK: mfcr [[T1:r[0-9]+]]                         ; cr2
+; CHECK: lis [[T2:r[0-9]+]], 1
+; CHECK: addi r3, r1, 72
+; CHECK: rlwinm [[T1]], [[T1]], 8, 0, 31
+; CHECK: ori [[T2]], [[T2]], 34540
+; CHECK: stwx [[T1]], r1, [[T2]]
+; CHECK: lis [[T3:r[0-9]+]], 1
+; CHECK: mfcr [[T4:r[0-9]+]]                         ; cr3
+; CHECK: ori [[T3]], [[T3]], 34536
+; CHECK: rlwinm [[T4]], [[T4]], 12, 0, 31
+; CHECK: stwx [[T4]], r1, [[T3]]
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
@@ -25,11 +25,16 @@ entry:
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r2, 1
-;CHECK:  ori r2, r2, 34540
-;CHECK:  lwzx r0, r1, r2
-;CHECK:  rlwinm r0, r0, 24, 0, 31
-;CHECK:  mtcrf 32, r0
+; CHECK: lis [[T1:r[0-9]+]], 1
+; CHECK: ori [[T1]], [[T1]], 34536
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 20, 0, 31
+; CHECK: mtcrf 16, [[T1]]
+; CHECK: lis [[T1]], 1
+; CHECK: ori [[T1]], [[T1]], 34540
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 24, 0, 31
+; CHECK: mtcrf 32, [[T1]]
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/DbgValueOtherTargets.test b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
index b1b338776b43..9702934f7e68 100644
--- a/test/CodeGen/PowerPC/DbgValueOtherTargets.test
+++ b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/PowerPC/allocate-r0.ll b/test/CodeGen/PowerPC/allocate-r0.ll
new file mode 100644
index 000000000000..1cf4cec07695
--- /dev/null
+++ b/test/CodeGen/PowerPC/allocate-r0.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; Because r0 is allocatable, we can use it to hold r3 without spilling.
+; CHECK: mr 0, 3
+; CHECK: mr 3, 0
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll
new file mode 100644
index 000000000000..d04a6c98ee19
--- /dev/null
+++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This tests that the GPRC/GPRC_NOR0 intersection subclass relationship with
+; GPRC is handled correctly. When it was not, this test would assert.
+
+@gen_random.last = external unnamed_addr global i64, align 8
+@.str = external unnamed_addr constant [4 x i8], align 1
+
+declare double @gen_random(double) #0
+
+declare void @benchmark_heapsort(i32 signext, double* nocapture) #0
+
+define signext i32 @main(i32 signext %argc, i8** nocapture %argv) #0 {
+entry:
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i32 [ 0, %cond.true ], [ 8000000, %entry ]
+  %add = add i32 %cond, 1
+  %conv = sext i32 %add to i64
+  %mul = shl nsw i64 %conv, 3
+  %call1 = tail call noalias i8* @malloc(i64 %mul) #1
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %cond.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %add
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %cond.end
+  ret i32 0
+}
+
+declare noalias i8* @malloc(i64) #0
+
+declare signext i32 @printf(i8* nocapture, ...) #0
+
+declare void @free(i8* nocapture) #0
+
+declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index cbfa4094fb4e..838db20ddd1b 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=ppc32 |  FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 |  FileCheck %s
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 ; CHECK: exchange_and_add:
-; CHECK: lwarx
+; CHECK: lwarx {{r[0-9]+}}, 0, {{r[0-9]+}}
   %tmp = atomicrmw add i32* %mem, i32 %val monotonic
-; CHECK: stwcx.
+; CHECK: stwcx. {{r[0-9]+}}, 0, {{r[0-9]+}}
   ret i32 %tmp
 }
 
diff --git a/test/CodeGen/PowerPC/bdzlr.ll b/test/CodeGen/PowerPC/bdzlr.ll
new file mode 100644
index 000000000000..656a85860df0
--- /dev/null
+++ b/test/CodeGen/PowerPC/bdzlr.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.lua_TValue.17.692 = type { %union.Value.16.691, i32 }
+%union.Value.16.691 = type { %union.GCObject.15.690* }
+%union.GCObject.15.690 = type { %struct.lua_State.14.689 }
+%struct.lua_State.14.689 = type { %union.GCObject.15.690*, i8, i8, i8, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.global_State.10.685*, %struct.CallInfo.11.686*, i32*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.CallInfo.11.686*, %struct.CallInfo.11.686*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State.14.689*, %struct.lua_Debug.12.687*)*, %struct.lua_TValue.17.692, %struct.lua_TValue.17.692, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.lua_longjmp.13.688*, i64 }
+%struct.global_State.10.685 = type { %struct.stringtable.0.675, i8* (i8*, i8*, i64, i64)*, i8*, i8, i8, i32, %union.GCObject.15.690*, %union.GCObject.15.690**, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.Mbuffer.1.676, i64, i64, i64, i64, i32, i32, i32 (%struct.lua_State.14.689*)*, %struct.lua_TValue.17.692, %struct.lua_State.14.689*, %struct.UpVal.3.678, [9 x %struct.Table.7.682*], [17 x %union.TString.9.684*] }
+%struct.stringtable.0.675 = type { %union.GCObject.15.690**, i32, i32 }
+%struct.Mbuffer.1.676 = type { i8*, i64, i64 }
+%struct.UpVal.3.678 = type { %union.GCObject.15.690*, i8, i8, %struct.lua_TValue.17.692*, %union.anon.2.677 }
+%union.anon.2.677 = type { %struct.lua_TValue.17.692 }
+%struct.Table.7.682 = type { %union.GCObject.15.690*, i8, i8, i8, i8, %struct.Table.7.682*, %struct.lua_TValue.17.692*, %struct.Node.6.681*, %struct.Node.6.681*, %union.GCObject.15.690*, i32 }
+%struct.Node.6.681 = type { %struct.lua_TValue.17.692, %union.TKey.5.680 }
+%union.TKey.5.680 = type { %struct.anon.0.4.679 }
+%struct.anon.0.4.679 = type { %union.Value.16.691, i32, %struct.Node.6.681* }
+%union.TString.9.684 = type { %struct.anon.1.8.683 }
+%struct.anon.1.8.683 = type { %union.GCObject.15.690*, i8, i8, i8, i32, i64 }
+%struct.CallInfo.11.686 = type { %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, i32*, i32, i32 }
+%struct.lua_Debug.12.687 = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+%struct.lua_longjmp.13.688 = type opaque
+
+define void @lua_xmove(i32 signext %n) #0 {
+entry:
+  br i1 undef, label %for.end, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %if.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.for.body_crit_edge, %for.body.lr.ph
+  %0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
+  %tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
+  %1 = load i32* %tt, align 4, !tbaa !0
+  store i32 %1, i32* undef, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %.pre = load %struct.lua_TValue.17.692** undef, align 8, !tbaa !3
+  br label %for.body
+
+for.end:                                          ; preds = %for.body, %if.end, %entry
+  ret void
+
+; CHECK: @lua_xmove
+; CHECK: bnelr
+; CHECK: bnelr
+; CHECK: bdzlr
+; CHECK-NOT: blr
+}
+
+attributes #0 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index 4f6bfc729913..53bbc52167c4 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc32 -mcpu=ppc32 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=ppc64 -mcpu=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7
+; RUN: llc < %s -march=ppc32 -mcpu=pwr7 | FileCheck %s -check-prefix=X32
 
 
 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
@@ -34,18 +36,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
         ret i16 %tmp6
 }
 
+define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i )                ; <i64> [#uses=1]
+        store i64 %tmp13, i64* %tmp1.upgrd.1
+        ret void
+}
+
+define i64 @LDBRX(i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp = load i64* %tmp1.upgrd.2          ; <i64> [#uses=1]
+        %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp )              ; <i64> [#uses=1]
+        ret i64 %tmp14
+}
+
 declare i32 @llvm.bswap.i32(i32)
 
 declare i16 @llvm.bswap.i16(i16)
 
+declare i64 @llvm.bswap.i64(i64)
+
 
 ; X32: stwbrx
 ; X32: lwbrx
 ; X32: sthbrx
 ; X32: lhbrx
+; X32-NOT: ldbrx
+; X32-NOT: stdbrx
 
 ; X64: stwbrx
 ; X64: lwbrx
 ; X64: sthbrx
 ; X64: lhbrx
+; X64-NOT: ldbrx
+; X64-NOT: stdbrx
+
+; PWR7: stwbrx
+; PWR7: lwbrx
+; PWR7: sthbrx
+; PWR7: lhbrx
+; PWR7: stdbrx
+; PWR7: ldbrx
 
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
new file mode 100644
index 000000000000..d6df7a237668
--- /dev/null
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -0,0 +1,409 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This test case triggers several functions related to cr spilling, both in
+; frame lowering and to handle cr register pressure. When the register kill
+; flags were not being set correctly, this would cause the register scavenger to
+; assert.
+
+@SetupFastFullPelSearch.orig_pels = external unnamed_addr global [768 x i16], align 2
+@weight_luma = external global i32
+@offset_luma = external global i32
+@wp_luma_round = external global i32, align 4
+@luma_log_weight_denom = external global i32, align 4
+
+define void @SetupFastFullPelSearch() #0 {
+entry:
+  %mul10 = mul nsw i32 undef, undef
+  br i1 undef, label %land.end, label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %entry
+  switch i32 0, label %land.end [
+    i32 0, label %land.rhs
+    i32 3, label %land.rhs
+  ]
+
+land.rhs:                                         ; preds = %land.lhs.true, %land.lhs.true
+  %tobool21 = icmp ne i32 undef, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %land.lhs.true, %entry
+  %0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ]
+  %cond = load i32** undef, align 8
+  br i1 undef, label %if.then95, label %for.body.lr.ph
+
+if.then95:                                        ; preds = %land.end
+  %cmp.i4.i1427 = icmp slt i32 undef, undef
+  br label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.then95, %land.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  br i1 undef, label %for.body, label %for.body252
+
+for.body252:                                      ; preds = %for.inc997, %for.body
+  %shl263 = add i32 undef, 80
+  br i1 %0, label %for.cond286.preheader, label %for.cond713.preheader
+
+for.cond286.preheader:                            ; preds = %for.body252
+  br label %for.cond290.preheader
+
+for.cond290.preheader:                            ; preds = %for.end520, %for.cond286.preheader
+  %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
+  %1 = load i32* undef, align 4, !tbaa !0
+  %2 = load i32* @weight_luma, align 4, !tbaa !0
+  %3 = load i32* @wp_luma_round, align 4, !tbaa !0
+  %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0
+  %5 = load i32* @offset_luma, align 4, !tbaa !0
+  %incdec.ptr502.sum = add i64 undef, 16
+  br label %for.body293
+
+for.body293:                                      ; preds = %for.body293, %for.cond290.preheader
+  %srcptr.41591 = phi i16* [ %srcptr.31595, %for.cond290.preheader ], [ undef, %for.body293 ]
+  %refptr.11590 = phi i16* [ undef, %for.cond290.preheader ], [ %add.ptr517, %for.body293 ]
+  %LineSadBlk0.01588 = phi i32 [ 0, %for.cond290.preheader ], [ %add346, %for.body293 ]
+  %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
+  %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
+  %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
+  %6 = load i16* %refptr.11590, align 2, !tbaa !3
+  %conv294 = zext i16 %6 to i32
+  %mul295 = mul nsw i32 %conv294, %2
+  %add296 = add nsw i32 %mul295, %3
+  %shr = ashr i32 %add296, %4
+  %add297 = add nsw i32 %shr, %5
+  %cmp.i.i1513 = icmp sgt i32 %add297, 0
+  %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
+  %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
+  %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
+  %7 = load i16* %srcptr.41591, align 2, !tbaa !3
+  %conv300 = zext i16 %7 to i32
+  %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
+  %idxprom302 = sext i32 %sub301 to i64
+  %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
+  %8 = load i32* %arrayidx303, align 4, !tbaa !0
+  %add304 = add nsw i32 %8, %LineSadBlk0.01588
+  %9 = load i32* undef, align 4, !tbaa !0
+  %add318 = add nsw i32 %add304, %9
+  %10 = load i16* undef, align 2, !tbaa !3
+  %conv321 = zext i16 %10 to i32
+  %mul322 = mul nsw i32 %conv321, %2
+  %add323 = add nsw i32 %mul322, %3
+  %shr324 = ashr i32 %add323, %4
+  %add325 = add nsw i32 %shr324, %5
+  %cmp.i.i1505 = icmp sgt i32 %add325, 0
+  %cond.i.i1506 = select i1 %cmp.i.i1505, i32 %add325, i32 0
+  %cmp.i4.i1507 = icmp slt i32 %cond.i.i1506, %1
+  %cond.i5.i1508 = select i1 %cmp.i4.i1507, i32 %cond.i.i1506, i32 %1
+  %sub329 = sub nsw i32 %cond.i5.i1508, 0
+  %idxprom330 = sext i32 %sub329 to i64
+  %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
+  %11 = load i32* %arrayidx331, align 4, !tbaa !0
+  %add332 = add nsw i32 %add318, %11
+  %cmp.i.i1501 = icmp sgt i32 undef, 0
+  %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
+  %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
+  %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
+  %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
+  %12 = load i16* null, align 2, !tbaa !3
+  %conv342 = zext i16 %12 to i32
+  %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
+  %idxprom344 = sext i32 %sub343 to i64
+  %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
+  %13 = load i32* %arrayidx345, align 4, !tbaa !0
+  %add346 = add nsw i32 %add332, %13
+  %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
+  %14 = load i16* null, align 2, !tbaa !3
+  %conv349 = zext i16 %14 to i32
+  %mul350 = mul nsw i32 %conv349, %2
+  %add351 = add nsw i32 %mul350, %3
+  %shr352 = ashr i32 %add351, %4
+  %add353 = add nsw i32 %shr352, %5
+  %cmp.i.i1497 = icmp sgt i32 %add353, 0
+  %cond.i.i1498 = select i1 %cmp.i.i1497, i32 %add353, i32 0
+  %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
+  %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
+  %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
+  %15 = load i16* %incdec.ptr341, align 2, !tbaa !3
+  %conv356 = zext i16 %15 to i32
+  %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
+  %idxprom358 = sext i32 %sub357 to i64
+  %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
+  %16 = load i32* %arrayidx359, align 4, !tbaa !0
+  %add360 = add nsw i32 %16, %LineSadBlk1.01587
+  %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
+  %17 = load i16* %incdec.ptr348, align 2, !tbaa !3
+  %conv363 = zext i16 %17 to i32
+  %mul364 = mul nsw i32 %conv363, %2
+  %add365 = add nsw i32 %mul364, %3
+  %shr366 = ashr i32 %add365, %4
+  %add367 = add nsw i32 %shr366, %5
+  %cmp.i.i1493 = icmp sgt i32 %add367, 0
+  %cond.i.i1494 = select i1 %cmp.i.i1493, i32 %add367, i32 0
+  %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
+  %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
+  %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
+  %18 = load i16* %incdec.ptr355, align 2, !tbaa !3
+  %conv370 = zext i16 %18 to i32
+  %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
+  %idxprom372 = sext i32 %sub371 to i64
+  %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
+  %19 = load i32* %arrayidx373, align 4, !tbaa !0
+  %add374 = add nsw i32 %add360, %19
+  %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
+  %20 = load i16* %incdec.ptr362, align 2, !tbaa !3
+  %conv377 = zext i16 %20 to i32
+  %mul378 = mul nsw i32 %conv377, %2
+  %add379 = add nsw i32 %mul378, %3
+  %shr380 = ashr i32 %add379, %4
+  %add381 = add nsw i32 %shr380, %5
+  %cmp.i.i1489 = icmp sgt i32 %add381, 0
+  %cond.i.i1490 = select i1 %cmp.i.i1489, i32 %add381, i32 0
+  %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
+  %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
+  %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
+  %21 = load i16* %incdec.ptr369, align 2, !tbaa !3
+  %conv384 = zext i16 %21 to i32
+  %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
+  %idxprom386 = sext i32 %sub385 to i64
+  %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
+  %22 = load i32* %arrayidx387, align 4, !tbaa !0
+  %add388 = add nsw i32 %add374, %22
+  %23 = load i16* %incdec.ptr376, align 2, !tbaa !3
+  %conv391 = zext i16 %23 to i32
+  %mul392 = mul nsw i32 %conv391, %2
+  %add395 = add nsw i32 0, %5
+  %cmp.i.i1485 = icmp sgt i32 %add395, 0
+  %cond.i.i1486 = select i1 %cmp.i.i1485, i32 %add395, i32 0
+  %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
+  %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
+  %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
+  %24 = load i16* %incdec.ptr383, align 2, !tbaa !3
+  %conv398 = zext i16 %24 to i32
+  %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
+  %idxprom400 = sext i32 %sub399 to i64
+  %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
+  %25 = load i32* %arrayidx401, align 4, !tbaa !0
+  %add402 = add nsw i32 %add388, %25
+  %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
+  %cmp.i4.i1483 = icmp slt i32 undef, %1
+  %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
+  %26 = load i16* %incdec.ptr397, align 2, !tbaa !3
+  %conv412 = zext i16 %26 to i32
+  %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
+  %idxprom414 = sext i32 %sub413 to i64
+  %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
+  %27 = load i32* %arrayidx415, align 4, !tbaa !0
+  %add416 = add nsw i32 %27, %LineSadBlk2.01585
+  %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
+  %28 = load i16* %incdec.ptr404, align 2, !tbaa !3
+  %conv419 = zext i16 %28 to i32
+  %mul420 = mul nsw i32 %conv419, %2
+  %add421 = add nsw i32 %mul420, %3
+  %shr422 = ashr i32 %add421, %4
+  %add423 = add nsw i32 %shr422, %5
+  %cmp.i.i1477 = icmp sgt i32 %add423, 0
+  %cond.i.i1478 = select i1 %cmp.i.i1477, i32 %add423, i32 0
+  %cmp.i4.i1479 = icmp slt i32 %cond.i.i1478, %1
+  %cond.i5.i1480 = select i1 %cmp.i4.i1479, i32 %cond.i.i1478, i32 %1
+  %incdec.ptr425 = getelementptr inbounds i16* %srcptr.41591, i64 10
+  %sub427 = sub nsw i32 %cond.i5.i1480, 0
+  %idxprom428 = sext i32 %sub427 to i64
+  %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
+  %29 = load i32* %arrayidx429, align 4, !tbaa !0
+  %add430 = add nsw i32 %add416, %29
+  %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
+  %30 = load i16* %incdec.ptr418, align 2, !tbaa !3
+  %conv433 = zext i16 %30 to i32
+  %mul434 = mul nsw i32 %conv433, %2
+  %add435 = add nsw i32 %mul434, %3
+  %shr436 = ashr i32 %add435, %4
+  %add437 = add nsw i32 %shr436, %5
+  %cmp.i.i1473 = icmp sgt i32 %add437, 0
+  %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
+  %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
+  %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
+  %31 = load i16* %incdec.ptr425, align 2, !tbaa !3
+  %conv440 = zext i16 %31 to i32
+  %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
+  %idxprom442 = sext i32 %sub441 to i64
+  %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
+  %32 = load i32* %arrayidx443, align 4, !tbaa !0
+  %add444 = add nsw i32 %add430, %32
+  %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
+  %33 = load i16* %incdec.ptr432, align 2, !tbaa !3
+  %conv447 = zext i16 %33 to i32
+  %mul448 = mul nsw i32 %conv447, %2
+  %add449 = add nsw i32 %mul448, %3
+  %shr450 = ashr i32 %add449, %4
+  %add451 = add nsw i32 %shr450, %5
+  %cmp.i.i1469 = icmp sgt i32 %add451, 0
+  %cond.i.i1470 = select i1 %cmp.i.i1469, i32 %add451, i32 0
+  %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
+  %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
+  %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12
+  %34 = load i16* undef, align 2, !tbaa !3
+  %conv454 = zext i16 %34 to i32
+  %sub455 = sub nsw i32 %cond.i5.i1472, %conv454
+  %idxprom456 = sext i32 %sub455 to i64
+  %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456
+  %35 = load i32* %arrayidx457, align 4, !tbaa !0
+  %add458 = add nsw i32 %add444, %35
+  %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13
+  %36 = load i16* %incdec.ptr446, align 2, !tbaa !3
+  %conv461 = zext i16 %36 to i32
+  %mul462 = mul nsw i32 %conv461, %2
+  %add463 = add nsw i32 %mul462, %3
+  %shr464 = ashr i32 %add463, %4
+  %add465 = add nsw i32 %shr464, %5
+  %cmp.i.i1465 = icmp sgt i32 %add465, 0
+  %cond.i.i1466 = select i1 %cmp.i.i1465, i32 %add465, i32 0
+  %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
+  %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
+  %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13
+  %37 = load i16* %incdec.ptr453, align 2, !tbaa !3
+  %conv468 = zext i16 %37 to i32
+  %sub469 = sub nsw i32 %cond.i5.i1468, %conv468
+  %idxprom470 = sext i32 %sub469 to i64
+  %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470
+  %38 = load i32* %arrayidx471, align 4, !tbaa !0
+  %add472 = add nsw i32 %38, %LineSadBlk3.01586
+  %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14
+  %add477 = add nsw i32 0, %3
+  %shr478 = ashr i32 %add477, %4
+  %add479 = add nsw i32 %shr478, %5
+  %cmp.i.i1461 = icmp sgt i32 %add479, 0
+  %cond.i.i1462 = select i1 %cmp.i.i1461, i32 %add479, i32 0
+  %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
+  %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
+  %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14
+  %39 = load i16* %incdec.ptr467, align 2, !tbaa !3
+  %conv482 = zext i16 %39 to i32
+  %sub483 = sub nsw i32 %cond.i5.i1464, %conv482
+  %idxprom484 = sext i32 %sub483 to i64
+  %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484
+  %40 = load i32* %arrayidx485, align 4, !tbaa !0
+  %add486 = add nsw i32 %add472, %40
+  %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15
+  %41 = load i16* %incdec.ptr474, align 2, !tbaa !3
+  %conv489 = zext i16 %41 to i32
+  %mul490 = mul nsw i32 %conv489, %2
+  %add491 = add nsw i32 %mul490, %3
+  %shr492 = ashr i32 %add491, %4
+  %add493 = add nsw i32 %shr492, %5
+  %cmp.i.i1457 = icmp sgt i32 %add493, 0
+  %cond.i.i1458 = select i1 %cmp.i.i1457, i32 %add493, i32 0
+  %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
+  %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
+  %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15
+  %42 = load i16* %incdec.ptr481, align 2, !tbaa !3
+  %conv496 = zext i16 %42 to i32
+  %sub497 = sub nsw i32 %cond.i5.i1460, %conv496
+  %idxprom498 = sext i32 %sub497 to i64
+  %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498
+  %43 = load i32* %arrayidx499, align 4, !tbaa !0
+  %add500 = add nsw i32 %add486, %43
+  %44 = load i16* %incdec.ptr488, align 2, !tbaa !3
+  %conv503 = zext i16 %44 to i32
+  %mul504 = mul nsw i32 %conv503, %2
+  %add505 = add nsw i32 %mul504, %3
+  %shr506 = ashr i32 %add505, %4
+  %add507 = add nsw i32 %shr506, %5
+  %cmp.i.i1453 = icmp sgt i32 %add507, 0
+  %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
+  %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
+  %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
+  %45 = load i16* %incdec.ptr495, align 2, !tbaa !3
+  %conv510 = zext i16 %45 to i32
+  %sub511 = sub nsw i32 %cond.i5.i1456, %conv510
+  %idxprom512 = sext i32 %sub511 to i64
+  %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512
+  %46 = load i32* %arrayidx513, align 4, !tbaa !0
+  %add514 = add nsw i32 %add500, %46
+  %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum
+  %exitcond1692 = icmp eq i32 undef, 4
+  br i1 %exitcond1692, label %for.end520, label %for.body293
+
+for.end520:                                       ; preds = %for.body293
+  store i32 %add346, i32* undef, align 4, !tbaa !0
+  store i32 %add402, i32* undef, align 4, !tbaa !0
+  store i32 %add458, i32* undef, align 4, !tbaa !0
+  store i32 %add514, i32* null, align 4, !tbaa !0
+  br i1 undef, label %for.end543, label %for.cond290.preheader
+
+for.end543:                                       ; preds = %for.end520
+  br i1 undef, label %for.inc997, label %for.body549
+
+for.body549:                                      ; preds = %for.inc701, %for.end543
+  %call554 = call i16* null(i16**** null, i32 signext undef, i32 signext %shl263) #1
+  br label %for.cond559.preheader
+
+for.cond559.preheader:                            ; preds = %for.cond559.preheader, %for.body549
+  br i1 undef, label %for.inc701, label %for.cond559.preheader
+
+for.inc701:                                       ; preds = %for.cond559.preheader
+  br i1 undef, label %for.inc997, label %for.body549
+
+for.cond713.preheader:                            ; preds = %for.end850, %for.body252
+  br label %for.body716
+
+for.body716:                                      ; preds = %for.body716, %for.cond713.preheader
+  br i1 undef, label %for.end850, label %for.body716
+
+for.end850:                                       ; preds = %for.body716
+  br i1 undef, label %for.end873, label %for.cond713.preheader
+
+for.end873:                                       ; preds = %for.end850
+  br i1 undef, label %for.inc997, label %for.body879
+
+for.body879:                                      ; preds = %for.inc992, %for.end873
+  br label %for.cond889.preheader
+
+for.cond889.preheader:                            ; preds = %for.end964, %for.body879
+  br i1 undef, label %for.cond894.preheader.lr.ph, label %for.end964
+
+for.cond894.preheader.lr.ph:                      ; preds = %for.cond889.preheader
+  br label %for.body898.lr.ph.us
+
+for.end957.us:                                    ; preds = %for.body946.us
+  br i1 undef, label %for.body898.lr.ph.us, label %for.end964
+
+for.body946.us:                                   ; preds = %for.body930.us, %for.body946.us
+  br i1 false, label %for.body946.us, label %for.end957.us
+
+for.body930.us:                                   ; preds = %for.body914.us, %for.body930.us
+  br i1 undef, label %for.body930.us, label %for.body946.us
+
+for.body914.us:                                   ; preds = %for.body898.us, %for.body914.us
+  br i1 undef, label %for.body914.us, label %for.body930.us
+
+for.body898.us:                                   ; preds = %for.body898.lr.ph.us, %for.body898.us
+  br i1 undef, label %for.body898.us, label %for.body914.us
+
+for.body898.lr.ph.us:                             ; preds = %for.end957.us, %for.cond894.preheader.lr.ph
+  br label %for.body898.us
+
+for.end964:                                       ; preds = %for.end957.us, %for.cond889.preheader
+  %inc990 = add nsw i32 undef, 1
+  br i1 false, label %for.inc992, label %for.cond889.preheader
+
+for.inc992:                                       ; preds = %for.end964
+  br i1 false, label %for.inc997, label %for.body879
+
+for.inc997:                                       ; preds = %for.inc992, %for.end873, %for.inc701, %for.end543
+  %cmp250 = icmp slt i32 undef, %mul10
+  br i1 %cmp250, label %for.body252, label %for.end999
+
+for.end999:                                       ; preds = %for.inc997
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll
index 3e98dbd254d9..d698ab031df0 100644
--- a/test/CodeGen/PowerPC/crsave.ll
+++ b/test/CodeGen/PowerPC/crsave.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
-; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64
 
 declare void @foo()
 
@@ -19,9 +19,11 @@ entry:
 ; PPC32-NEXT: mtcrf 32, 12
 
 ; PPC64: mfcr 12
-; PPC64-NEXT: stw 12, 8(1)
+; PPC64: stw 12, 8(1)
+; PPC64: stdu 1, -[[AMT:[0-9]+]](1)
+; PPC64: addi 1, 1, [[AMT]]
 ; PPC64: lwz 12, 8(1)
-; PPC64-NEXT: mtcrf 32, 12
+; PPC64: mtcrf 32, 12
 
 define i32 @test_cr234() nounwind {
 entry:
@@ -41,9 +43,11 @@ entry:
 ; PPC32-NEXT: mtcrf 8, 12
 
 ; PPC64: mfcr 12
-; PPC64-NEXT: stw 12, 8(1)
+; PPC64: stw 12, 8(1)
+; PPC64: stdu 1, -[[AMT:[0-9]+]](1)
+; PPC64: addi 1, 1, [[AMT]]
 ; PPC64: lwz 12, 8(1)
-; PPC64-NEXT: mtcrf 32, 12
-; PPC64-NEXT: mtcrf 16, 12
-; PPC64-NEXT: mtcrf 8, 12
+; PPC64: mtcrf 32, 12
+; PPC64: mtcrf 16, 12
+; PPC64: mtcrf 8, 12
 
diff --git a/test/CodeGen/PowerPC/ctr-cleanup.ll b/test/CodeGen/PowerPC/ctr-cleanup.ll
new file mode 100644
index 000000000000..04e4ffb0d48d
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctr-cleanup.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main
+; CHECK: li {{[0-9]+}}, 4
+; CHECK-NOT: li {{[0-9]+}}, 4
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 1d365d47a877..3757fa3e2f29 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,10 +1,12 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llc < %s -march=ppc32 | grep -i cntlzw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
 
 declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @bar(i32 %x) {
 entry:
+; CHECK: @bar
+; CHECK: cntlzw
         %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index 220e7ed49486..21e36618c5c1 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -16,10 +16,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !6, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !9, metadata !10}
diff --git a/test/CodeGen/PowerPC/early-ret.ll b/test/CodeGen/PowerPC/early-ret.ll
new file mode 100644
index 000000000000..ac4fe05c9131
--- /dev/null
+++ b/test/CodeGen/PowerPC/early-ret.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @foo(i32* %P) #0 {
+entry:
+  %tobool = icmp eq i32* %P, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %P, align 4, !tbaa !0
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+
+; CHECK: @foo
+; CHECK: beqlr
+; CHECK: blr
+}
+
+define void @bar(i32* %P, i32* %Q) #0 {
+entry:
+  %tobool = icmp eq i32* %P, null
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %P, align 4, !tbaa !0
+  %tobool1 = icmp eq i32* %Q, null
+  br i1 %tobool1, label %if.end3, label %if.then2
+
+if.then2:                                         ; preds = %if.then
+  store i32 1, i32* %Q, align 4, !tbaa !0
+  br label %if.end3
+
+if.else:                                          ; preds = %entry
+  store i32 0, i32* %Q, align 4, !tbaa !0
+  br label %if.end3
+
+if.end3:                                          ; preds = %if.then, %if.then2, %if.else
+  ret void
+
+; CHECK: @bar
+; CHECK: beqlr
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/early-ret2.ll b/test/CodeGen/PowerPC/early-ret2.ll
new file mode 100644
index 000000000000..a274e2c2658f
--- /dev/null
+++ b/test/CodeGen/PowerPC/early-ret2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @_Z8example3iPiS_() #0 {
+entry:
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %entry
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.body, %while.body.lr.ph
+  br i1 false, label %while.end, label %while.body, !llvm.vectorizer.already_vectorized !0
+
+while.end:                                        ; preds = %while.body, %while.body.lr.ph, %entry
+  ret void
+
+; CHECK: @_Z8example3iPiS_
+; CHECK: bnelr
+}
+
+attributes #0 = { noinline nounwind }
+
+!0 = metadata !{}
+
diff --git a/test/CodeGen/PowerPC/float-to-int.ll b/test/CodeGen/PowerPC/float-to-int.ll
new file mode 100644
index 000000000000..39cd4f929f8d
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-to-int.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(float %a) nounwind {
+  %x = fptosi float %a to i64
+  ret i64 %x
+
+; CHECK: @foo
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo2(double %a) nounwind {
+  %x = fptosi double %a to i64
+  ret i64 %x
+
+; CHECK: @foo2
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo3(float %a) nounwind {
+  %x = fptoui float %a to i64
+  ret i64 %x
+
+; CHECK: @foo3
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo4(double %a) nounwind {
+  %x = fptoui double %a to i64
+  ret i64 %x
+
+; CHECK: @foo4
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i32 @goo(float %a) nounwind {
+  %x = fptosi float %a to i32
+  ret i32 %x
+
+; CHECK: @goo
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo2(double %a) nounwind {
+  %x = fptosi double %a to i32
+  ret i32 %x
+
+; CHECK: @goo2
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo3(float %a) nounwind {
+  %x = fptoui float %a to i32
+  ret i32 %x
+
+; CHECK: @goo3
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo4(double %a) nounwind {
+  %x = fptoui double %a to i32
+  ret i32 %x
+
+; CHECK: @goo4
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index 27496f7937e6..a173c9154041 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,22 +1,30 @@
-; RUN: llc < %s -march=ppc32 -fp-contract=fast | \
-; RUN:   egrep "fn?madd|fn?msub" | count 8
+; RUN: llc < %s -march=ppc32 -fp-contract=fast | FileCheck %s
 
 define double @test_FMADD1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
+; CHECK: test_FMADD1:
+; CHECK: fmadd
+; CHECK-NEXT: blr
 }
 
 define double @test_FMADD2(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
+; CHECK: test_FMADD2:
+; CHECK: fmadd
+; CHECK-NEXT: blr
 }
 
 define double @test_FMSUB(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fsub double %D, %C		; <double> [#uses=1]
 	ret double %E
+; CHECK: test_FMSUB:
+; CHECK: fmsub
+; CHECK-NEXT: blr
 }
 
 define double @test_FNMADD1(double %A, double %B, double %C) {
@@ -24,6 +32,9 @@ define double @test_FNMADD1(double %A, double %B, double %C) {
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
+; CHECK: test_FNMADD1:
+; CHECK: fnmadd
+; CHECK-NEXT: blr
 }
 
 define double @test_FNMADD2(double %A, double %B, double %C) {
@@ -31,12 +42,18 @@ define double @test_FNMADD2(double %A, double %B, double %C) {
 	%E = fadd double %C, %D		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
+; CHECK: test_FNMADD2:
+; CHECK: fnmadd
+; CHECK-NEXT: blr
 }
 
 define double @test_FNMSUB1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fsub double %C, %D		; <double> [#uses=1]
 	ret double %E
+; CHECK: test_FNMSUB1:
+; CHECK: fnmsub
+; CHECK-NEXT: blr
 }
 
 define double @test_FNMSUB2(double %A, double %B, double %C) {
@@ -44,6 +61,9 @@ define double @test_FNMSUB2(double %A, double %B, double %C) {
 	%E = fsub double %D, %C		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
+; CHECK: test_FNMSUB2:
+; CHECK: fnmsub
+; CHECK-NEXT: blr
 }
 
 define float @test_FNMSUBS(float %A, float %B, float %C) {
@@ -51,4 +71,7 @@ define float @test_FNMSUBS(float %A, float %B, float %C) {
 	%E = fsub float %D, %C		; <float> [#uses=1]
 	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
 	ret float %F
+; CHECK: test_FNMSUBS:
+; CHECK: fnmsubs
+; CHECK-NEXT: blr
 }
diff --git a/test/CodeGen/PowerPC/fold-zero.ll b/test/CodeGen/PowerPC/fold-zero.ll
new file mode 100644
index 000000000000..c7ec6fade53e
--- /dev/null
+++ b/test/CodeGen/PowerPC/fold-zero.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test1(i1 %a, i32 %c) nounwind  {
+  %x = select i1 %a, i32 %c, i32 0
+  ret i32 %x
+
+; CHECK: @test1
+; CHECK-NOT: li {{[0-9]+}}, 0
+; CHECK: isel 3, 0,
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/frameaddr.ll b/test/CodeGen/PowerPC/frameaddr.ll
new file mode 100644
index 000000000000..eabd4a68aa83
--- /dev/null
+++ b/test/CodeGen/PowerPC/frameaddr.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define i8* @main() #0 {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK: @main
+; CHECK: mr 3, 1
+}
+
+define i8* @foo() #3 { ; naked
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK: @foo
+; CHECK: mr 3, 1
+}
+
+define i8* @bar() #0 {
+entry:
+  %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
+  %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
+  call void @use(i8* %x1) nounwind
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; Note that if we start eliminating non-leaf frame pointers by default, this
+; will need to be updated.
+; CHECK: @bar
+; CHECK: mr 3, 31
+}
+
+declare void @use(i8*)
+
+declare i8* @llvm.frameaddress(i32) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/PowerPC/fsel.ll b/test/CodeGen/PowerPC/fsel.ll
new file mode 100644
index 000000000000..8cd43e616bf6
--- /dev/null
+++ b/test/CodeGen/PowerPC/fsel.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @zerocmp1(double %a, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp ult double %a, 0.000000e+00
+  %z.y = select i1 %cmp, double %z, double %y
+  ret double %z.y
+
+; CHECK: @zerocmp1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp1
+; CHECK-FM: fsel 1, 1, 2, 3
+; CHECK-FM: blr
+}
+
+define double @zerocmp2(double %a, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp ogt double %a, 0.000000e+00
+  %y.z = select i1 %cmp, double %y, double %z
+  ret double %y.z
+
+; CHECK: @zerocmp2
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp2
+; CHECK-FM: fneg [[REG:[0-9]+]], 1
+; CHECK-FM: fsel 1, [[REG]], 3, 2
+; CHECK-FM: blr
+}
+
+define double @zerocmp3(double %a, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp oeq double %a, 0.000000e+00
+  %y.z = select i1 %cmp, double %y, double %z
+  ret double %y.z
+
+; CHECK: @zerocmp3
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp3
+; CHECK-FM: fsel [[REG:[0-9]+]], 1, 2, 3
+; CHECK-FM: fneg [[REG2:[0-9]+]], 1
+; CHECK-FM: fsel 1, [[REG2]], [[REG]], 3
+; CHECK-FM: blr
+}
+
+define double @min1(double %a, double %b) #0 {
+entry:
+  %cmp = fcmp ole double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+
+; CHECK: @min1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @min1
+; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1
+; CHECK-FM: fsel 1, [[REG]], 1, 2
+; CHECK-FM: blr
+}
+
+define double @max1(double %a, double %b) #0 {
+entry:
+  %cmp = fcmp oge double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+
+; CHECK: @max1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @max1
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel 1, [[REG]], 1, 2
+; CHECK-FM: blr
+}
+
+define double @cmp1(double %a, double %b, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp ult double %a, %b
+  %z.y = select i1 %cmp, double %z, double %y
+  ret double %z.y
+
+; CHECK: @cmp1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp1
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel 1, [[REG]], 3, 4
+; CHECK-FM: blr
+}
+
+define double @cmp2(double %a, double %b, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp ogt double %a, %b
+  %y.z = select i1 %cmp, double %y, double %z
+  ret double %y.z
+
+; CHECK: @cmp2
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp2
+; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1
+; CHECK-FM: fsel 1, [[REG]], 4, 3
+; CHECK-FM: blr
+}
+
+define double @cmp3(double %a, double %b, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp oeq double %a, %b
+  %y.z = select i1 %cmp, double %y, double %z
+  ret double %y.z
+
+; CHECK: @cmp3
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp3
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel [[REG2:[0-9]+]], [[REG]], 3, 4
+; CHECK-FM: fneg [[REG3:[0-9]+]], [[REG]]
+; CHECK-FM: fsel 1, [[REG3]], [[REG2]], 4
+; CHECK-FM: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll
new file mode 100644
index 000000000000..2707d0352de1
--- /dev/null
+++ b/test/CodeGen/PowerPC/i32-to-float.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr6 | FileCheck -check-prefix=CHECK-PWR6 %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i32 %a) nounwind {
+entry:
+  %x = sitofp i32 %a to float
+  ret float %x
+
+; CHECK: @foo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
+; CHECK: frsp 1, [[REG3]]
+; CHECK: blr
+
+; CHECK-PWR6: @foo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid [[REG2:[0-9]+]], [[REG]]
+; CHECK-PWR6: frsp 1, [[REG2]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @foo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfids 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goo(i32 %a) nounwind {
+entry:
+  %x = sitofp i32 %a to double
+  ret double %x
+
+; CHECK: @goo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid 1, [[REG2]]
+; CHECK: blr
+
+; CHECK-PWR6: @goo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid 1, [[REG]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @goo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define float @foou(i32 %a) nounwind {
+entry:
+  %x = uitofp i32 %a to float
+  ret float %x
+
+; CHECK-A2: @foou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidus 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goou(i32 %a) nounwind {
+entry:
+  %x = uitofp i32 %a to double
+  ret double %x
+
+; CHECK-A2: @goou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidu 1, [[REG]]
+; CHECK-A2: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64-to-float.ll b/test/CodeGen/PowerPC/i64-to-float.ll
new file mode 100644
index 000000000000..b81d109e7f45
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64-to-float.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i64 %a) nounwind {
+entry:
+  %x = sitofp i64 %a to float
+  ret float %x
+
+; CHECK: @foo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfids 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goo(i64 %a) nounwind {
+entry:
+  %x = sitofp i64 %a to double
+  ret double %x
+
+; CHECK: @goo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfid 1, [[REG]]
+; CHECK: blr
+}
+
+define float @foou(i64 %a) nounwind {
+entry:
+  %x = uitofp i64 %a to float
+  ret float %x
+
+; CHECK: @foou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidus 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goou(i64 %a) nounwind {
+entry:
+  %x = uitofp i64 %a to double
+  ret double %x
+
+; CHECK: @goou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidu 1, [[REG]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index 5ae1be8953cc..d2a3239ab865 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -22,6 +22,6 @@ entry:
 ; Also check that with -enable-unsafe-fp-math we do not get that extra
 ; code sequence.  Simply verify that there is no "isel" present.
 
-; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
 ; CHECK-UNSAFE-NOT: isel
 
diff --git a/test/CodeGen/PowerPC/ifcvt.ll b/test/CodeGen/PowerPC/ifcvt.ll
new file mode 100644
index 000000000000..9c966c95b72d
--- /dev/null
+++ b/test/CodeGen/PowerPC/ifcvt.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -verify-machineinstrs | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  %sext82 = shl i32 %d, 16
+  %conv29 = ashr exact i32 %sext82, 16
+  %cmp = icmp slt i32 %sext82, 0
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %sw.epilog
+  %and33 = and i32 %conv29, 32767
+  %sub34 = sub nsw i32 %a, %and33
+  br label %cond.end
+
+cond.false:                                       ; preds = %sw.epilog
+  %add37 = add nsw i32 %conv29, %a
+  br label %cond.end
+
+; CHECK: @test
+; CHECK: add [[REG:[0-9]+]], 
+; CHECK: subf [[REG2:[0-9]+]],
+; CHECK: isel {{[0-9]+}}, [[REG]], [[REG2]],
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %sub34, %cond.true ], [ %add37, %cond.false ]
+  %sext83 = shl i32 %cond, 16
+  %conv39 = ashr exact i32 %sext83, 16
+  %add41 = sub i32 %b, %a
+  %sub43 = add i32 %add41, %conv39
+  ret i32 %sub43
+}
+
diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll
index 62aa7cf929f8..a10c5ddb36fb 100644
--- a/test/CodeGen/PowerPC/jaggedstructs.ll
+++ b/test/CodeGen/PowerPC/jaggedstructs.ll
@@ -23,22 +23,22 @@ entry:
 ; CHECK: std 4, 200(1)
 ; CHECK: std 3, 192(1)
 ; CHECK: lbz {{[0-9]+}}, 199(1)
-; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: sth {{[0-9]+}}, 53(1)
 ; CHECK: lbz {{[0-9]+}}, 207(1)
-; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: stw {{[0-9]+}}, 59(1)
 ; CHECK: lhz {{[0-9]+}}, 214(1)
-; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: stw {{[0-9]+}}, 66(1)
 ; CHECK: lbz {{[0-9]+}}, 223(1)
-; CHECK: stb {{[0-9]+}}, 79(1)
 ; CHECK: lhz {{[0-9]+}}, 221(1)
-; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: stb {{[0-9]+}}, 79(1)
+; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: stw {{[0-9]+}}, 73(1)
 ; CHECK: ld 6, 72(1)
 ; CHECK: ld 5, 64(1)
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index 4019eca0bb88..aaa31d93d5f2 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
diff --git a/test/CodeGen/PowerPC/lsa.ll b/test/CodeGen/PowerPC/lsa.ll
new file mode 100644
index 000000000000..3daeccbf40f5
--- /dev/null
+++ b/test/CodeGen/PowerPC/lsa.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @foo() #0 {
+entry:
+  %v = alloca [8200 x i32], align 4
+  %w = alloca [8200 x i32], align 4
+  %q = alloca [8200 x i32], align 4
+  %0 = bitcast [8200 x i32]* %v to i8*
+  call void @llvm.lifetime.start(i64 32800, i8* %0) #0
+  %1 = bitcast [8200 x i32]* %w to i8*
+  call void @llvm.lifetime.start(i64 32800, i8* %1) #0
+  %2 = bitcast [8200 x i32]* %q to i8*
+  call void @llvm.lifetime.start(i64 32800, i8* %2) #0
+  %arraydecay = getelementptr inbounds [8200 x i32]* %q, i64 0, i64 0
+  %arraydecay1 = getelementptr inbounds [8200 x i32]* %v, i64 0, i64 0
+  %arraydecay2 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 0
+  call void @bar(i32* %arraydecay, i32* %arraydecay1, i32* %arraydecay2) #0
+  %3 = load i32* %arraydecay2, align 4, !tbaa !0
+  %arrayidx3 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 1
+  %4 = load i32* %arrayidx3, align 4, !tbaa !0
+
+; CHECK: @foo
+; CHECK-NOT: lwzx
+; CHECK: lwz {{[0-9]+}}, 4([[REG:[0-9]+]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG]])
+; CHECK: blr
+
+  %add = add nsw i32 %4, %3
+  call void @llvm.lifetime.end(i64 32800, i8* %2) #0
+  call void @llvm.lifetime.end(i64 32800, i8* %1) #0
+  call void @llvm.lifetime.end(i64 32800, i8* %0) #0
+  ret i32 %add
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+declare void @bar(i32*, i32*, i32*)
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+attributes #0 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
index 2dd1718ba75a..bc60b3baf2bb 100644
--- a/test/CodeGen/PowerPC/mcm-obj-2.ll
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -r | FileCheck %s
 
 ; FIXME: When asm-parse is available, could make this an assembly test.
 
@@ -19,18 +19,11 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing function-scoped variable si.
 ;
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
-; CHECK-NEXT:  'r_type', 0x00000030
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
-; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
 
 @gi = global i32 5, align 4
 
@@ -45,18 +38,9 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing file-scope variable gi.
 ;
-; CHECK:       Relocation 3
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 4
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
-; CHECK-NEXT:  'r_type', 0x00000030
-; CHECK:       Relocation 5
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
-; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
 
 define double @test_double_const() nounwind {
 entry:
@@ -66,12 +50,5 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing a constant.
 ;
-; CHECK:       Relocation 6
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 7
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM4]]
-; CHECK-NEXT:  'r_type', 0x00000030
-
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM4]]
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
index 117c3b334346..720c5fb6dd65 100644
--- a/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=MEDIUM %s
+; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM %s
 ; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=LARGE %s
+; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
 
 ; FIXME: When asm-parse is available, could make this an assembly test.
 
@@ -21,25 +21,15 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing external variable ei.
 ;
-; MEDIUM:       '.rela.text'
-; MEDIUM:       Relocation 0
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 1
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM1]]
-; MEDIUM-NEXT:  'r_type', 0x00000040
+; MEDIUM:      Relocations [
+; MEDIUM:        Section (1) .text {
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
 ;
-; LARGE:       '.rela.text'
-; LARGE:       Relocation 0
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 1
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM1]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE:       Relocations [
+; LARGE:         Section (1) .text {
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
 
 @test_fn_static.si = internal global i32 0, align 4
 
@@ -54,26 +44,14 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing function-scoped variable si.
 ;
-; MEDIUM:       Relocation 2
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 3
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM2]]
-; MEDIUM-NEXT:  'r_type', 0x00000030
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
 ;
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing function-scoped variable si.
 ;
-; LARGE:       Relocation 2
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 3
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM2]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM2]]
 
 @gi = global i32 5, align 4
 
@@ -88,26 +66,14 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing file-scope variable gi.
 ;
-; MEDIUM:       Relocation 4
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 5
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM3]]
-; MEDIUM-NEXT:  'r_type', 0x00000030
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
 ;
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing file-scope variable gi.
 ;
-; LARGE:       Relocation 4
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 5
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM3]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM3]]
 
 define double @test_double_const() nounwind {
 entry:
@@ -117,26 +83,14 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing a constant.
 ;
-; MEDIUM:       Relocation 6
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 7
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM4]]
-; MEDIUM-NEXT:  'r_type', 0x00000030
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM4]]
 ;
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing a constant.
 ;
-; LARGE:       Relocation 6
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 7
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM4]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
 
 define signext i32 @test_jump_table(i32 signext %i) nounwind {
 entry:
@@ -185,23 +139,11 @@ sw.epilog:                                        ; preds = %sw.bb3, %sw.default
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing a jump table address.
 ;
-; MEDIUM:       Relocation 8
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 9
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM5]]
-; MEDIUM-NEXT:  'r_type', 0x00000040
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
 ;
-; LARGE:       Relocation 8
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 9
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM5]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
 
 @ti = common global i32 0, align 4
 
@@ -216,23 +158,11 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing tentatively declared variable ti.
 ;
-; MEDIUM:       Relocation 10
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 11
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM6]]
-; MEDIUM-NEXT:  'r_type', 0x00000040
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
 ;
-; LARGE:       Relocation 10
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 11
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM6]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
 
 define i8* @test_fnaddr() nounwind {
 entry:
@@ -248,21 +178,8 @@ declare signext i32 @foo(i32 signext)
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing function address foo.
 ;
-; MEDIUM:       Relocation 12
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 13
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM7]]
-; MEDIUM-NEXT:  'r_type', 0x00000040
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
 ;
-; LARGE:       Relocation 12
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 13
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM7]]
-; LARGE-NEXT:  'r_type', 0x00000040
-
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
diff --git a/test/CodeGen/PowerPC/negctr.ll b/test/CodeGen/PowerPC/negctr.ll
new file mode 100644
index 000000000000..2f6995c65dd8
--- /dev/null
+++ b/test/CodeGen/PowerPC/negctr.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mcpu=a2 -disable-lsr | FileCheck -check-prefix=NOLSR %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+; FIXME: We currently can't form the 32-bit unsigned trip count necessary here!
+; CHECK: @main
+; CHECK-NOT: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main1() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main1
+; CHECK: li [[REG:[0-9]+]], -1
+; CHECK: mtctr [[REG]]
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main2() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, -100000
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main2
+; CHECK: lis [[REG:[0-9]+]], -2
+; CHECK: ori [[REG2:[0-9]+]], [[REG]], 31071
+; CHECK: mtctr [[REG2]]
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main3() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 127984, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, -16
+  %exitcond = icmp eq i64 %indvars.iv.next, -16
+  br i1 %exitcond, label %for.end, label %for.body
+
+; NOLSR: @main3
+; NOLSR: li [[REG:[0-9]+]], 8000
+; NOLSR: mtctr [[REG]]
+; NOLSR: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/optcmp.ll b/test/CodeGen/PowerPC/optcmp.ll
new file mode 100644
index 000000000000..1fce464dd335
--- /dev/null
+++ b/test/CodeGen/PowerPC/optcmp.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -disable-ppc-cmp-opt=0 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @foo(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i32 %a, %b
+  store i32 %sub, i32* %c, align 4, !tbaa !0
+  %cmp = icmp sgt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+
+; CHECK: @foo
+; CHECK-NOT: subf.
+}
+
+define signext i32 @foo2(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 {
+entry:
+  %shl = shl i32 %a, %b
+  store i32 %shl, i32* %c, align 4, !tbaa !0
+  %cmp = icmp sgt i32 %shl, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+
+; CHECK: @foo2
+; CHECK-NOT: slw.
+}
+
+define i64 @fool(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %a, %b
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp sgt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @fool
+; CHECK: subf. [[REG:[0-9]+]], 4, 3
+; CHECK: isel 3, 3, 4, 1
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foolb(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %a, %b
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp sle i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @foolb
+; CHECK: subf. [[REG:[0-9]+]], 4, 3
+; CHECK: isel 3, 4, 3, 1
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foolc(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %b, %a
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp sgt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @foolc
+; CHECK: subf. [[REG:[0-9]+]], 3, 4
+; CHECK: isel 3, 3, 4, 0
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foold(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %b, %a
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp eq i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @foold
+; CHECK: subf. [[REG:[0-9]+]], 3, 4
+; CHECK: isel 3, 3, 4, 2
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foold2(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %a, %b
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp eq i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @foold2
+; CHECK: subf. [[REG:[0-9]+]], 4, 3
+; CHECK: isel 3, 3, 4, 2
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foo2l(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %shl = shl i64 %a, %b
+  store i64 %shl, i64* %c, align 8, !tbaa !3
+  %cmp = icmp sgt i64 %shl, 0
+  %conv1 = zext i1 %cmp to i64
+  ret i64 %conv1
+
+; CHECK: @foo2l
+; CHECK: sld. 4, 3, 4
+; CHECK: std 4, 0(5)
+}
+
+define double @food(double %a, double %b, double* nocapture %c) #0 {
+entry:
+  %sub = fsub double %a, %b
+  store double %sub, double* %c, align 8, !tbaa !3
+  %cmp = fcmp ogt double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+
+; CHECK: @food
+; CHECK: fsub. 0, 1, 2
+; CHECK: stfd 0, 0(5)
+}
+
+define float @foof(float %a, float %b, float* nocapture %c) #0 {
+entry:
+  %sub = fsub float %a, %b
+  store float %sub, float* %c, align 4, !tbaa !3
+  %cmp = fcmp ogt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+
+; CHECK: @foof
+; CHECK: fsubs. 0, 1, 2
+; CHECK: stfs 0, 0(5)
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"long", metadata !1}
+!4 = metadata !{metadata !"any pointer", metadata !1}
+
diff --git a/test/CodeGen/PowerPC/popcnt.ll b/test/CodeGen/PowerPC/popcnt.ll
new file mode 100644
index 000000000000..b304d72aede2
--- /dev/null
+++ b/test/CodeGen/PowerPC/popcnt.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: @cnt8
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: @cnt16
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: @cnt32
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: @cnt64
+; CHECK: popcntd
+; CHECK: blr
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/PowerPC/pr15359.ll b/test/CodeGen/PowerPC/pr15359.ll
index 12fa3e5ffbdd..df02dfcb5c1e 100644
--- a/test/CodeGen/PowerPC/pr15359.ll
+++ b/test/CodeGen/PowerPC/pr15359.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -mcpu=pwr7 -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -t | FileCheck %s
 
 target datalayout = "E-p:64:64:64-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
@@ -14,7 +14,9 @@ entry:
 
 ; Verify that nextIdx has symbol type TLS.
 ;
-; CHECK:    '.symtab'
-; CHECK:    'nextIdx'
-; CHECK:    'st_type', 0x6
-
+; CHECK:      Symbol {
+; CHECK:        Name: nextIdx
+; CHECK-NEXT:   Value:
+; CHECK-NEXT:   Size:
+; CHECK-NEXT:   Binding:
+; CHECK-NEXT:   Type: TLS
diff --git a/test/CodeGen/PowerPC/pr15630.ll b/test/CodeGen/PowerPC/pr15630.ll
new file mode 100644
index 000000000000..c5ba8a4d4f04
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15630.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define weak_odr void @_D4core6atomic49__T11atomicStoreVE4core6atomic11MemoryOrder3ThThZ11atomicStoreFNaNbKOhhZv(i8* %val_arg, i8 zeroext %newval_arg) {
+entry:
+  %newval = alloca i8
+  %ordering = alloca i32, align 4
+  store i8 %newval_arg, i8* %newval
+  %tmp = load i8* %newval
+  store atomic volatile i8 %tmp, i8* %val_arg seq_cst, align 1
+  ret void
+}
+
+; CHECK: stwcx.
diff --git a/test/CodeGen/PowerPC/pr15632.ll b/test/CodeGen/PowerPC/pr15632.ll
new file mode 100644
index 000000000000..3ea83468b6d7
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15632.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @other(ppc_fp128 %tmp70)
+
+define void @bug() {
+entry:
+  %tmp70 = frem ppc_fp128 0xM00000000000000000000000000000000, undef
+  call void @other(ppc_fp128 %tmp70)
+  unreachable
+}
+
+; CHECK: bl fmodl
diff --git a/test/CodeGen/PowerPC/r31.ll b/test/CodeGen/PowerPC/r31.ll
new file mode 100644
index 000000000000..7ce12f600b41
--- /dev/null
+++ b/test/CodeGen/PowerPC/r31.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; CHECK: mr 31, 3
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll
new file mode 100644
index 000000000000..89705faa46e9
--- /dev/null
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-SAFE %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare double @llvm.sqrt.f64(double)
+declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+  %x = call double @llvm.sqrt.f64(double %b)
+  %r = fdiv double %a, %x
+  ret double %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define double @foof(double %a, float %b) nounwind {
+entry:
+  %x = call float @llvm.sqrt.f32(float %b)
+  %y = fpext float %x to double
+  %r = fdiv double %a, %y
+  ret double %r
+
+; CHECK: @foof
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foof
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @food(float %a, double %b) nounwind {
+entry:
+  %x = call double @llvm.sqrt.f64(double %b)
+  %y = fptrunc double %x to float
+  %r = fdiv float %a, %y
+  ret float %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: frsp
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define float @goo(float %a, float %b) nounwind {
+entry:
+  %x = call float @llvm.sqrt.f32(float %b)
+  %r = fdiv float %a, %x
+  ret float %r
+
+; CHECK: @goo
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+  %r = fdiv <4 x float> %a, %x
+  ret <4 x float> %r
+
+; CHECK: @hoo
+; CHECK: vrsqrtefp
+
+; CHECK-SAFE: @hoo
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
+define double @foo2(double %a, double %b) nounwind {
+entry:
+  %r = fdiv double %a, %b
+  ret double %r
+
+; CHECK: @foo2
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo2
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @goo2(float %a, float %b) nounwind {
+entry:
+  %r = fdiv float %a, %b
+  ret float %r
+
+; CHECK: @goo2
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo2
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %r = fdiv <4 x float> %a, %b
+  ret <4 x float> %r
+
+; CHECK: @hoo2
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo2
+; CHECK-SAFE-NOT: vrefp
+; CHECK-SAFE: blr
+}
+
+define double @foo3(double %a) nounwind {
+entry:
+  %r = call double @llvm.sqrt.f64(double %a)
+  ret double %r
+
+; CHECK: @foo3
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: blr
+
+; CHECK-SAFE: @foo3
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: blr
+}
+
+define float @goo3(float %a) nounwind {
+entry:
+  %r = call float @llvm.sqrt.f32(float %a)
+  ret float %r
+
+; CHECK: @goo3
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: blr
+
+; CHECK-SAFE: @goo3
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo3(<4 x float> %a) nounwind {
+entry:
+  %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+  ret <4 x float> %r
+
+; CHECK: @hoo3
+; CHECK: vrsqrtefp
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo3
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
new file mode 100644
index 000000000000..2b5e1c9a289b
--- /dev/null
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @test1(float %x) nounwind  {
+  %call = tail call float @floorf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test1:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: frim 1, 1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind  {
+  %call = tail call double @floor(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test2:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: frim 1, 1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind  {
+  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test3:
+; CHECK-NOT: frin
+
+; CHECK-FM: test3:
+; CHECK-FM: frin 1, 1
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind  {
+  %call = tail call double @nearbyint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test4:
+; CHECK-NOT: frin
+
+; CHECK-FM: test4:
+; CHECK-FM: frin 1, 1
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind  {
+  %call = tail call float @ceilf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test5:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: frip 1, 1
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind  {
+  %call = tail call double @ceil(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test6:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: frip 1, 1
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test9(float %x) nounwind  {
+  %call = tail call float @truncf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test9:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: friz 1, 1
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind  {
+  %call = tail call double @trunc(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test10:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: friz 1, 1
+}
+
+declare double @trunc(double) nounwind readnone
+
+define void @test11(float %x, float* %y) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  store float %call, float* %y
+  ret void
+
+; CHECK: test11:
+; CHECK-NOT: frin
+
+; CHECK-FM: test11:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare float @rintf(float) nounwind readnone
+
+define void @test12(double %x, double* %y) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  store double %call, double* %y
+  ret void
+
+; CHECK: test12:
+; CHECK-NOT: frin
+
+; CHECK-FM: test12:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare double @rint(double) nounwind readnone
+
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
new file mode 100644
index 000000000000..7ea35dafc3fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-NOAV %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] }
+%struct.__sigset_t = type { [16 x i64] }
+
+@env_sigill = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+define void @foo() #0 {
+entry:
+  call void @llvm.eh.sjlj.longjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  unreachable
+
+; CHECK: @foo
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: ld 31, 0([[REG]])
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG]])
+; CHECK: ld 1, 16([[REG]])
+; CHECK: mtctr [[REG2]]
+; CHECK: ld 2, 24([[REG]])
+; CHECK: bctr
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define signext i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**)
+  %1 = call i8* @llvm.stacksave()
+  store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
+  %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  %tobool = icmp ne i32 %2, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+if.else:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %3 = load i32* %retval
+  ret i32 %3
+
+; FIXME: We should be saving VRSAVE on Darwin, but we're not!
+
+; CHECK: @main
+; CHECK: std
+; Make sure that we're not saving VRSAVE on non-Darwin:
+; CHECK-NOT: mfspr
+; CHECK: stfd
+; CHECK: stvx
+
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: std 31, env_sigill@toc@l([[REG]])
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: std [[REG]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
+; CHECK: std 1, 16([[REG]])
+; CHECK: std 2, 24([[REG]])
+; CHECK: bcl 20, 31, .LBB1_1
+; CHECK: li 3, 1
+; CHECK: #EH_SjLj_Setup	.LBB1_1
+; CHECK: b .LBB1_2
+
+; CHECK: .LBB1_1:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31)                   # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
+; CHECK: .LBB1_2:
+
+; CHECK: lfd
+; CHECK: lvx
+; CHECK: ld
+; CHECK: blr
+
+; CHECK-NOAV: @main
+; CHECK-NOAV-NOT: stvx
+; CHECK-NOAV: bcl
+; CHECK-NOAV: mflr
+; CHECK-NOAV: bl foo
+; CHECK-NOAV-NOT: lvx
+; CHECK-NOAV: blr
+}
+
+declare i8* @llvm.frameaddress(i32) #2
+
+declare i8* @llvm.stacksave() #3
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*) #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll
index c49b25cc2303..7786fc17eacb 100644
--- a/test/CodeGen/PowerPC/stfiwx-2.ll
+++ b/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -1,11 +1,14 @@
-; This cannot be a stfiwx
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=g5 | FileCheck %s
 
 define void @test(float %F, i8* %P) {
 	%I = fptosi float %F to i32
 	%X = trunc i32 %I to i8
 	store i8 %X, i8* %P
 	ret void
+; CHECK: fctiwz 0, 1
+; CHECK: stfiwx 0, 0, 4
+; CHECK: lwz 4, 12(1)
+; CHECK: stb 4, 0(3)
+; CHECK: blr
 }
 
diff --git a/test/CodeGen/PowerPC/store-update.ll b/test/CodeGen/PowerPC/store-update.ll
new file mode 100644
index 000000000000..538ed24fbc46
--- /dev/null
+++ b/test/CodeGen/PowerPC/store-update.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @stbu(i8* %base, i8 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i64 16
+  store i8 %val, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+  store i8 %val, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu(i16* %base, i16 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16* %base, i64 16
+  store i16 %val, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+  store i16 %val, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu(i32* %base, i32 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i32* %base, i64 16
+  store i32 %val, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+  store i32 %val, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i8* @stbu8(i8* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i8
+  %arrayidx = getelementptr inbounds i8* %base, i64 16
+  store i8 %conv, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux8(i8* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i8
+  %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+  store i8 %conv, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu8(i16* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i16
+  %arrayidx = getelementptr inbounds i16* %base, i64 16
+  store i16 %conv, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux8(i16* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i16
+  %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+  store i16 %conv, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu8(i32* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i32
+  %arrayidx = getelementptr inbounds i32* %base, i64 16
+  store i32 %conv, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux8(i32* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i32
+  %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+  store i32 %conv, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i64* @stdu(i64* %base, i64 %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i64 16
+  store i64 %val, i64* %arrayidx, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @stdu
+; CHECK: %entry
+; CHECK-NEXT: stdu
+; CHECK-NEXT: blr
+
+define i64* @stdux(i64* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i64 %offset
+  store i64 %val, i64* %arrayidx, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @stdux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stdux
+; CHECK-NEXT: blr
+
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
index a0a15d3e5e6c..2a17e740ea01 100644
--- a/test/CodeGen/PowerPC/structsinmem.ll
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
@@ -114,8 +114,8 @@ entry:
   ret i32 %add13
 
 ; CHECK: lha {{[0-9]+}}, 126(1)
-; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lha {{[0-9]+}}, 132(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lwz {{[0-9]+}}, 140(1)
 ; CHECK: lwz {{[0-9]+}}, 144(1)
 ; CHECK: lwz {{[0-9]+}}, 152(1)
@@ -205,19 +205,11 @@ entry:
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
-; CHECK: lbz {{[0-9]+}}, 149(1)
-; CHECK: lbz {{[0-9]+}}, 150(1)
-; CHECK: lbz {{[0-9]+}}, 147(1)
-; CHECK: lbz {{[0-9]+}}, 148(1)
-; CHECK: lbz {{[0-9]+}}, 133(1)
-; CHECK: lbz {{[0-9]+}}, 134(1)
 ; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lha {{[0-9]+}}, 133(1)
 ; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lwz {{[0-9]+}}, 140(1)
-; CHECK: lhz {{[0-9]+}}, 154(1)
-; CHECK: lhz {{[0-9]+}}, 156(1)
-; CHECK: lbz {{[0-9]+}}, 163(1)
-; CHECK: lbz {{[0-9]+}}, 164(1)
-; CHECK: lbz {{[0-9]+}}, 161(1)
-; CHECK: lbz {{[0-9]+}}, 162(1)
+; CHECK: lwz {{[0-9]+}}, 147(1)
+; CHECK: lwz {{[0-9]+}}, 154(1)
+; CHECK: lwz {{[0-9]+}}, 161(1)
 }
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index b88dde249cc5..54de6060d0f0 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
@@ -105,8 +105,8 @@ entry:
 ; CHECK: sth 4, 62(1)
 ; CHECK: stb 3, 55(1)
 ; CHECK: lha {{[0-9]+}}, 62(1)
-; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lha {{[0-9]+}}, 68(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lwz {{[0-9]+}}, 76(1)
 ; CHECK: lwz {{[0-9]+}}, 80(1)
 ; CHECK: lwz {{[0-9]+}}, 88(1)
@@ -191,19 +191,11 @@ entry:
 ; CHECK: std 5, 64(1)
 ; CHECK: sth 4, 62(1)
 ; CHECK: stb 3, 55(1)
-; CHECK: lbz {{[0-9]+}}, 85(1)
-; CHECK: lbz {{[0-9]+}}, 86(1)
-; CHECK: lbz {{[0-9]+}}, 83(1)
-; CHECK: lbz {{[0-9]+}}, 84(1)
-; CHECK: lbz {{[0-9]+}}, 69(1)
-; CHECK: lbz {{[0-9]+}}, 70(1)
 ; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lha {{[0-9]+}}, 69(1)
 ; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lwz {{[0-9]+}}, 76(1)
-; CHECK: lhz {{[0-9]+}}, 90(1)
-; CHECK: lhz {{[0-9]+}}, 92(1)
-; CHECK: lbz {{[0-9]+}}, 99(1)
-; CHECK: lbz {{[0-9]+}}, 100(1)
-; CHECK: lbz {{[0-9]+}}, 97(1)
-; CHECK: lbz {{[0-9]+}}, 98(1)
+; CHECK: lwz {{[0-9]+}}, 83(1)
+; CHECK: lwz {{[0-9]+}}, 90(1)
+; CHECK: lwz {{[0-9]+}}, 97(1)
 }
diff --git a/test/CodeGen/PowerPC/tls-gd-obj.ll b/test/CodeGen/PowerPC/tls-gd-obj.ll
index 00b537d5325b..ffc0db0d14cb 100644
--- a/test/CodeGen/PowerPC/tls-gd-obj.ll
+++ b/test/CodeGen/PowerPC/tls-gd-obj.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -r | FileCheck %s
 
 ; Test correct relocation generation for thread-local storage using
 ; the general dynamic model and integrated assembly.
@@ -21,21 +21,11 @@ entry:
 ; and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
 ; for the call to __tls_get_addr.
 ;
-; CHECK:       '.rela.text'
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
-; CHECK-NEXT:  'r_type', 0x00000052
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000050
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x0000006b
-; CHECK:       Relocation 3
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
-; CHECK-NEXT:  'r_type', 0x0000000a
-
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_HA a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_LO a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSGD          a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
+; CHECK:   }
+; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-gd.ll b/test/CodeGen/PowerPC/tls-gd.ll
index fb8dfaf04a92..5f0ef9a050da 100644
--- a/test/CodeGen/PowerPC/tls-gd.ll
+++ b/test/CodeGen/PowerPC/tls-gd.ll
@@ -18,6 +18,6 @@ entry:
 
 ; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsgd@ha
 ; CHECK-NEXT: addi 3, [[REG]], a@got@tlsgd@l
-; CHECK-NEXT: bl __tls_get_addr(a@tlsgd)
+; CHECK:      bl __tls_get_addr(a@tlsgd)
 ; CHECK-NEXT: nop
 
diff --git a/test/CodeGen/PowerPC/tls-ie-obj.ll b/test/CodeGen/PowerPC/tls-ie-obj.ll
index 3600cc52ba54..0f7a35295234 100644
--- a/test/CodeGen/PowerPC/tls-ie-obj.ll
+++ b/test/CodeGen/PowerPC/tls-ie-obj.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -r | FileCheck %s
 
 ; Test correct relocation generation for thread-local storage
 ; using the initial-exec model and integrated assembly.
@@ -20,17 +20,10 @@ entry:
 ; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
 ; accessing external variable a.
 ;
-; CHECK:       '.rela.text'
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
-; CHECK-NEXT:  'r_type', 0x0000005a
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000058
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000043
-
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA    a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLS               a
+; CHECK:   }
+; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll
index 4954afeb24f8..4399b330ea47 100644
--- a/test/CodeGen/PowerPC/tls-ld-2.ll
+++ b/test/CodeGen/PowerPC/tls-ld-2.ll
@@ -18,7 +18,7 @@ entry:
 
 ; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
 ; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
-; CHECK-NEXT: bl __tls_get_addr(a@tlsld)
+; CHECK:      bl __tls_get_addr(a@tlsld)
 ; CHECK-NEXT: nop
-; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK:      addis [[REG2:[0-9]+]], 3, a@dtprel@ha
 ; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
diff --git a/test/CodeGen/PowerPC/tls-ld-obj.ll b/test/CodeGen/PowerPC/tls-ld-obj.ll
index c521ae405f46..29ee87684552 100644
--- a/test/CodeGen/PowerPC/tls-ld-obj.ll
+++ b/test/CodeGen/PowerPC/tls-ld-obj.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -r | FileCheck %s
 
 ; Test correct relocation generation for thread-local storage using
 ; the local dynamic model.
@@ -22,29 +22,13 @@ entry:
 ; accessing external variable a, and R_PPC64_REL24 for the call to
 ; __tls_get_addr.
 ;
-; CHECK:       '.rela.text'
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
-; CHECK-NEXT:  'r_type', 0x00000056
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000054
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x0000006c
-; CHECK:       Relocation 3
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
-; CHECK-NEXT:  'r_type', 0x0000000a
-; CHECK:       Relocation 4
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x0000004d
-; CHECK:       Relocation 5
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x0000004b
-
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_HA a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_LO a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSLD          a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_HA    a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_LO    a
+; CHECK:   }
+; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-ld.ll b/test/CodeGen/PowerPC/tls-ld.ll
index 1ebc6129e2aa..db02a56f6a22 100644
--- a/test/CodeGen/PowerPC/tls-ld.ll
+++ b/test/CodeGen/PowerPC/tls-ld.ll
@@ -18,7 +18,7 @@ entry:
 
 ; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
 ; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
-; CHECK-NEXT: bl __tls_get_addr(a@tlsld)
+; CHECK:      bl __tls_get_addr(a@tlsld)
 ; CHECK-NEXT: nop
-; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK:      addis [[REG2:[0-9]+]], 3, a@dtprel@ha
 ; CHECK-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 151b4b7ddab2..2daa60ab37f2 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -12,7 +12,7 @@ entry:
 ;OPT0:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
 ;OPT0-NEXT:     li [[REG2:[0-9]+]], 42
 ;OPT0-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
-;OPT0-NEXT:     stw [[REG2]], 0([[REG1]])
+;OPT0:          stw [[REG2]], 0([[REG1]])
 ;OPT1:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
 ;OPT1-NEXT:     li [[REG2:[0-9]+]], 42
 ;OPT1-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll
new file mode 100644
index 000000000000..169bd787c0c1
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc void @copy_to_conceal() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end210
+
+if.then:                                          ; preds = %entry
+  br label %vector.body.i
+
+vector.body.i:                                    ; preds = %vector.body.i, %if.then
+  %index.i = phi i64 [ 0, %vector.body.i ], [ 0, %if.then ]
+  store <8 x i16> zeroinitializer, <8 x i16>* undef, align 2
+  br label %vector.body.i
+
+if.end210:                                        ; preds = %entry
+  ret void
+
+; This will generate two align-1 i64 stores. Make sure that they are
+; indexed stores and not in r+i form (which require the offset to be
+; a multiple of 4).
+; CHECK: @copy_to_conceal
+; CHECK: stdx {{[0-9]+}}, 0,
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll
index f41faa0339a2..7c55638620a9 100644
--- a/test/CodeGen/PowerPC/vec_rounding.ll
+++ b/test/CodeGen/PowerPC/vec_rounding.ll
@@ -13,8 +13,8 @@ define <2 x double> @floor_v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
 ; CHECK: floor_v2f64:
-; CHECK: bl floor
-; CHECK: bl floor
+; CHECK: frim
+; CHECK: frim
 
 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
 define <4 x double> @floor_v4f64(<4 x double> %p)
@@ -23,10 +23,10 @@ define <4 x double> @floor_v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
 ; CHECK: floor_v4f64:
-; CHECK: bl floor
-; CHECK: bl floor
-; CHECK: bl floor
-; CHECK: bl floor
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
 
 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
 define <2 x double> @ceil_v2f64(<2 x double> %p)
@@ -35,8 +35,8 @@ define <2 x double> @ceil_v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
 ; CHECK: ceil_v2f64:
-; CHECK: bl ceil
-; CHECK: bl ceil
+; CHECK: frip
+; CHECK: frip
 
 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
 define <4 x double> @ceil_v4f64(<4 x double> %p)
@@ -45,10 +45,10 @@ define <4 x double> @ceil_v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
 ; CHECK: ceil_v4f64:
-; CHECK: bl ceil
-; CHECK: bl ceil
-; CHECK: bl ceil
-; CHECK: bl ceil
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
 
 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
 define <2 x double> @trunc_v2f64(<2 x double> %p)
@@ -57,8 +57,8 @@ define <2 x double> @trunc_v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
 ; CHECK: trunc_v2f64:
-; CHECK: bl trunc
-; CHECK: bl trunc
+; CHECK: friz
+; CHECK: friz
 
 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
 define <4 x double> @trunc_v4f64(<4 x double> %p)
@@ -67,10 +67,10 @@ define <4 x double> @trunc_v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
 ; CHECK: trunc_v4f64:
-; CHECK: bl trunc
-; CHECK: bl trunc
-; CHECK: bl trunc
-; CHECK: bl trunc
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
 
 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
 define <2 x double> @nearbyint_v2f64(<2 x double> %p)
diff --git a/test/CodeGen/PowerPC/vrsave-spill.ll b/test/CodeGen/PowerPC/vrsave-spill.ll
new file mode 100644
index 000000000000..c73206d8fc86
--- /dev/null
+++ b/test/CodeGen/PowerPC/vrsave-spill.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-apple-darwin"
+
+define <4 x float> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %c = fadd <4 x float> %a, %b
+  %d = fmul <4 x float> %c, %a
+  call void asm sideeffect "", "~{VRsave}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; CHECK: mfspr r{{[0-9]+}}, 256
+; CHECK: mtspr 256, r{{[0-9]+}}
+
+return:                                           ; preds = %entry
+  ret <4 x float> %d
+}
+
diff --git a/test/CodeGen/R600/README b/test/CodeGen/R600/README
new file mode 100644
index 000000000000..96998bba28f2
--- /dev/null
+++ b/test/CodeGen/R600/README
@@ -0,0 +1,21 @@
++==============================================================================+
+| How to organize the lit tests                                                |
++==============================================================================+
+
+- If you write a test for matching a single DAG opcode or intrinsic, it should
+  go in a file called {opcode_name,intrinsic_name}.ll (e.g. fadd.ll)
+
+- If you write a test that matches several DAG opcodes and checks for a single
+  ISA instruction, then that test should go in a file called {ISA_name}.ll (e.g.
+  bfi_int.ll
+
+- For all other tests, use your best judgement for organizing tests and naming
+  the files.
+
++==============================================================================+
+| Naming conventions                                                           |
++==============================================================================+
+
+- Use dash '-' and not underscore '_' to separate words in file names, unless
+  the file is named after a DAG opcode or ISA instruction that has an
+  underscore '_' in its name.
diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.ll
similarity index 100%
rename from test/CodeGen/R600/add.v4i32.ll
rename to test/CodeGen/R600/add.ll
diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll
new file mode 100644
index 000000000000..afefcd9f78b0
--- /dev/null
+++ b/test/CodeGen/R600/alu-split.ll
@@ -0,0 +1,850 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ALU
+;CHECK: ALU
+;CHECK: ALU
+;CHECK-NOT: ALU
+
+define void @main() #0 {
+main_body:
+  %0 = call float @llvm.R600.load.input(i32 4)
+  %1 = call float @llvm.R600.load.input(i32 5)
+  %2 = call float @llvm.R600.load.input(i32 6)
+  %3 = call float @llvm.R600.load.input(i32 7)
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = fcmp une float 0x4016F2B020000000, %5
+  %7 = select i1 %6, float 1.000000e+00, float 0.000000e+00
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = fcmp une float 0x401FDCC640000000, %9
+  %11 = select i1 %10, float 1.000000e+00, float 0.000000e+00
+  %12 = fsub float -0.000000e+00, %7
+  %13 = fptosi float %12 to i32
+  %14 = fsub float -0.000000e+00, %11
+  %15 = fptosi float %14 to i32
+  %16 = bitcast i32 %13 to float
+  %17 = bitcast i32 %15 to float
+  %18 = bitcast float %16 to i32
+  %19 = bitcast float %17 to i32
+  %20 = or i32 %18, %19
+  %21 = bitcast i32 %20 to float
+  %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
+  %23 = extractelement <4 x float> %22, i32 0
+  %24 = fcmp une float 0xC00574BC60000000, %23
+  %25 = select i1 %24, float 1.000000e+00, float 0.000000e+00
+  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
+  %27 = extractelement <4 x float> %26, i32 1
+  %28 = fcmp une float 0x40210068E0000000, %27
+  %29 = select i1 %28, float 1.000000e+00, float 0.000000e+00
+  %30 = fsub float -0.000000e+00, %25
+  %31 = fptosi float %30 to i32
+  %32 = fsub float -0.000000e+00, %29
+  %33 = fptosi float %32 to i32
+  %34 = bitcast i32 %31 to float
+  %35 = bitcast i32 %33 to float
+  %36 = bitcast float %34 to i32
+  %37 = bitcast float %35 to i32
+  %38 = or i32 %36, %37
+  %39 = bitcast i32 %38 to float
+  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
+  %41 = extractelement <4 x float> %40, i32 0
+  %42 = fcmp une float 0xBFC9A6B500000000, %41
+  %43 = select i1 %42, float 1.000000e+00, float 0.000000e+00
+  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
+  %45 = extractelement <4 x float> %44, i32 1
+  %46 = fcmp une float 0xC0119BDA60000000, %45
+  %47 = select i1 %46, float 1.000000e+00, float 0.000000e+00
+  %48 = fsub float -0.000000e+00, %43
+  %49 = fptosi float %48 to i32
+  %50 = fsub float -0.000000e+00, %47
+  %51 = fptosi float %50 to i32
+  %52 = bitcast i32 %49 to float
+  %53 = bitcast i32 %51 to float
+  %54 = bitcast float %52 to i32
+  %55 = bitcast float %53 to i32
+  %56 = or i32 %54, %55
+  %57 = bitcast i32 %56 to float
+  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
+  %59 = extractelement <4 x float> %58, i32 0
+  %60 = fcmp une float 0xC02085D640000000, %59
+  %61 = select i1 %60, float 1.000000e+00, float 0.000000e+00
+  %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
+  %63 = extractelement <4 x float> %62, i32 1
+  %64 = fcmp une float 0xBFD7C1BDA0000000, %63
+  %65 = select i1 %64, float 1.000000e+00, float 0.000000e+00
+  %66 = fsub float -0.000000e+00, %61
+  %67 = fptosi float %66 to i32
+  %68 = fsub float -0.000000e+00, %65
+  %69 = fptosi float %68 to i32
+  %70 = bitcast i32 %67 to float
+  %71 = bitcast i32 %69 to float
+  %72 = bitcast float %70 to i32
+  %73 = bitcast float %71 to i32
+  %74 = or i32 %72, %73
+  %75 = bitcast i32 %74 to float
+  %76 = insertelement <4 x float> undef, float %21, i32 0
+  %77 = insertelement <4 x float> %76, float %39, i32 1
+  %78 = insertelement <4 x float> %77, float %57, i32 2
+  %79 = insertelement <4 x float> %78, float %75, i32 3
+  %80 = insertelement <4 x float> undef, float %21, i32 0
+  %81 = insertelement <4 x float> %80, float %39, i32 1
+  %82 = insertelement <4 x float> %81, float %57, i32 2
+  %83 = insertelement <4 x float> %82, float %75, i32 3
+  %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83)
+  %85 = bitcast float %84 to i32
+  %86 = icmp ne i32 %85, 0
+  %87 = sext i1 %86 to i32
+  %88 = bitcast i32 %87 to float
+  %89 = bitcast float %88 to i32
+  %90 = xor i32 %89, -1
+  %91 = bitcast i32 %90 to float
+  %92 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
+  %93 = extractelement <4 x float> %92, i32 0
+  %94 = fcmp une float 0x401FDCC640000000, %93
+  %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
+  %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
+  %97 = extractelement <4 x float> %96, i32 1
+  %98 = fcmp une float 0xC00574BC60000000, %97
+  %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
+  %100 = fsub float -0.000000e+00, %95
+  %101 = fptosi float %100 to i32
+  %102 = fsub float -0.000000e+00, %99
+  %103 = fptosi float %102 to i32
+  %104 = bitcast i32 %101 to float
+  %105 = bitcast i32 %103 to float
+  %106 = bitcast float %104 to i32
+  %107 = bitcast float %105 to i32
+  %108 = or i32 %106, %107
+  %109 = bitcast i32 %108 to float
+  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
+  %111 = extractelement <4 x float> %110, i32 0
+  %112 = fcmp une float 0x40210068E0000000, %111
+  %113 = select i1 %112, float 1.000000e+00, float 0.000000e+00
+  %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
+  %115 = extractelement <4 x float> %114, i32 1
+  %116 = fcmp une float 0xBFC9A6B500000000, %115
+  %117 = select i1 %116, float 1.000000e+00, float 0.000000e+00
+  %118 = fsub float -0.000000e+00, %113
+  %119 = fptosi float %118 to i32
+  %120 = fsub float -0.000000e+00, %117
+  %121 = fptosi float %120 to i32
+  %122 = bitcast i32 %119 to float
+  %123 = bitcast i32 %121 to float
+  %124 = bitcast float %122 to i32
+  %125 = bitcast float %123 to i32
+  %126 = or i32 %124, %125
+  %127 = bitcast i32 %126 to float
+  %128 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
+  %129 = extractelement <4 x float> %128, i32 0
+  %130 = fcmp une float 0xC0119BDA60000000, %129
+  %131 = select i1 %130, float 1.000000e+00, float 0.000000e+00
+  %132 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
+  %133 = extractelement <4 x float> %132, i32 1
+  %134 = fcmp une float 0xC02085D640000000, %133
+  %135 = select i1 %134, float 1.000000e+00, float 0.000000e+00
+  %136 = fsub float -0.000000e+00, %131
+  %137 = fptosi float %136 to i32
+  %138 = fsub float -0.000000e+00, %135
+  %139 = fptosi float %138 to i32
+  %140 = bitcast i32 %137 to float
+  %141 = bitcast i32 %139 to float
+  %142 = bitcast float %140 to i32
+  %143 = bitcast float %141 to i32
+  %144 = or i32 %142, %143
+  %145 = bitcast i32 %144 to float
+  %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %147 = extractelement <4 x float> %146, i32 0
+  %148 = fcmp une float 0xBFD7C1BDA0000000, %147
+  %149 = select i1 %148, float 1.000000e+00, float 0.000000e+00
+  %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %151 = extractelement <4 x float> %150, i32 1
+  %152 = fcmp une float 0x401E1D7DC0000000, %151
+  %153 = select i1 %152, float 1.000000e+00, float 0.000000e+00
+  %154 = fsub float -0.000000e+00, %149
+  %155 = fptosi float %154 to i32
+  %156 = fsub float -0.000000e+00, %153
+  %157 = fptosi float %156 to i32
+  %158 = bitcast i32 %155 to float
+  %159 = bitcast i32 %157 to float
+  %160 = bitcast float %158 to i32
+  %161 = bitcast float %159 to i32
+  %162 = or i32 %160, %161
+  %163 = bitcast i32 %162 to float
+  %164 = insertelement <4 x float> undef, float %109, i32 0
+  %165 = insertelement <4 x float> %164, float %127, i32 1
+  %166 = insertelement <4 x float> %165, float %145, i32 2
+  %167 = insertelement <4 x float> %166, float %163, i32 3
+  %168 = insertelement <4 x float> undef, float %109, i32 0
+  %169 = insertelement <4 x float> %168, float %127, i32 1
+  %170 = insertelement <4 x float> %169, float %145, i32 2
+  %171 = insertelement <4 x float> %170, float %163, i32 3
+  %172 = call float @llvm.AMDGPU.dp4(<4 x float> %167, <4 x float> %171)
+  %173 = bitcast float %172 to i32
+  %174 = icmp ne i32 %173, 0
+  %175 = sext i1 %174 to i32
+  %176 = bitcast i32 %175 to float
+  %177 = bitcast float %176 to i32
+  %178 = xor i32 %177, -1
+  %179 = bitcast i32 %178 to float
+  %180 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %181 = extractelement <4 x float> %180, i32 0
+  %182 = fcmp une float 0x401FDCC640000000, %181
+  %183 = select i1 %182, float 1.000000e+00, float 0.000000e+00
+  %184 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %185 = extractelement <4 x float> %184, i32 1
+  %186 = fcmp une float 0xC00574BC60000000, %185
+  %187 = select i1 %186, float 1.000000e+00, float 0.000000e+00
+  %188 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %189 = extractelement <4 x float> %188, i32 2
+  %190 = fcmp une float 0x40210068E0000000, %189
+  %191 = select i1 %190, float 1.000000e+00, float 0.000000e+00
+  %192 = fsub float -0.000000e+00, %183
+  %193 = fptosi float %192 to i32
+  %194 = fsub float -0.000000e+00, %187
+  %195 = fptosi float %194 to i32
+  %196 = fsub float -0.000000e+00, %191
+  %197 = fptosi float %196 to i32
+  %198 = bitcast i32 %193 to float
+  %199 = bitcast i32 %195 to float
+  %200 = bitcast i32 %197 to float
+  %201 = bitcast float %199 to i32
+  %202 = bitcast float %200 to i32
+  %203 = or i32 %201, %202
+  %204 = bitcast i32 %203 to float
+  %205 = bitcast float %198 to i32
+  %206 = bitcast float %204 to i32
+  %207 = or i32 %205, %206
+  %208 = bitcast i32 %207 to float
+  %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %210 = extractelement <4 x float> %209, i32 0
+  %211 = fcmp une float 0xBFC9A6B500000000, %210
+  %212 = select i1 %211, float 1.000000e+00, float 0.000000e+00
+  %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %214 = extractelement <4 x float> %213, i32 1
+  %215 = fcmp une float 0xC0119BDA60000000, %214
+  %216 = select i1 %215, float 1.000000e+00, float 0.000000e+00
+  %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %218 = extractelement <4 x float> %217, i32 2
+  %219 = fcmp une float 0xC02085D640000000, %218
+  %220 = select i1 %219, float 1.000000e+00, float 0.000000e+00
+  %221 = fsub float -0.000000e+00, %212
+  %222 = fptosi float %221 to i32
+  %223 = fsub float -0.000000e+00, %216
+  %224 = fptosi float %223 to i32
+  %225 = fsub float -0.000000e+00, %220
+  %226 = fptosi float %225 to i32
+  %227 = bitcast i32 %222 to float
+  %228 = bitcast i32 %224 to float
+  %229 = bitcast i32 %226 to float
+  %230 = bitcast float %228 to i32
+  %231 = bitcast float %229 to i32
+  %232 = or i32 %230, %231
+  %233 = bitcast i32 %232 to float
+  %234 = bitcast float %227 to i32
+  %235 = bitcast float %233 to i32
+  %236 = or i32 %234, %235
+  %237 = bitcast i32 %236 to float
+  %238 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %239 = extractelement <4 x float> %238, i32 0
+  %240 = fcmp une float 0xBFD7C1BDA0000000, %239
+  %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00
+  %242 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %243 = extractelement <4 x float> %242, i32 1
+  %244 = fcmp une float 0x401E1D7DC0000000, %243
+  %245 = select i1 %244, float 1.000000e+00, float 0.000000e+00
+  %246 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %247 = extractelement <4 x float> %246, i32 2
+  %248 = fcmp une float 0xC019893740000000, %247
+  %249 = select i1 %248, float 1.000000e+00, float 0.000000e+00
+  %250 = fsub float -0.000000e+00, %241
+  %251 = fptosi float %250 to i32
+  %252 = fsub float -0.000000e+00, %245
+  %253 = fptosi float %252 to i32
+  %254 = fsub float -0.000000e+00, %249
+  %255 = fptosi float %254 to i32
+  %256 = bitcast i32 %251 to float
+  %257 = bitcast i32 %253 to float
+  %258 = bitcast i32 %255 to float
+  %259 = bitcast float %257 to i32
+  %260 = bitcast float %258 to i32
+  %261 = or i32 %259, %260
+  %262 = bitcast i32 %261 to float
+  %263 = bitcast float %256 to i32
+  %264 = bitcast float %262 to i32
+  %265 = or i32 %263, %264
+  %266 = bitcast i32 %265 to float
+  %267 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %268 = extractelement <4 x float> %267, i32 0
+  %269 = fcmp une float 0x40220F0D80000000, %268
+  %270 = select i1 %269, float 1.000000e+00, float 0.000000e+00
+  %271 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %272 = extractelement <4 x float> %271, i32 1
+  %273 = fcmp une float 0xC018E2EB20000000, %272
+  %274 = select i1 %273, float 1.000000e+00, float 0.000000e+00
+  %275 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %276 = extractelement <4 x float> %275, i32 2
+  %277 = fcmp une float 0xBFEA8DB8C0000000, %276
+  %278 = select i1 %277, float 1.000000e+00, float 0.000000e+00
+  %279 = fsub float -0.000000e+00, %270
+  %280 = fptosi float %279 to i32
+  %281 = fsub float -0.000000e+00, %274
+  %282 = fptosi float %281 to i32
+  %283 = fsub float -0.000000e+00, %278
+  %284 = fptosi float %283 to i32
+  %285 = bitcast i32 %280 to float
+  %286 = bitcast i32 %282 to float
+  %287 = bitcast i32 %284 to float
+  %288 = bitcast float %286 to i32
+  %289 = bitcast float %287 to i32
+  %290 = or i32 %288, %289
+  %291 = bitcast i32 %290 to float
+  %292 = bitcast float %285 to i32
+  %293 = bitcast float %291 to i32
+  %294 = or i32 %292, %293
+  %295 = bitcast i32 %294 to float
+  %296 = insertelement <4 x float> undef, float %208, i32 0
+  %297 = insertelement <4 x float> %296, float %237, i32 1
+  %298 = insertelement <4 x float> %297, float %266, i32 2
+  %299 = insertelement <4 x float> %298, float %295, i32 3
+  %300 = insertelement <4 x float> undef, float %208, i32 0
+  %301 = insertelement <4 x float> %300, float %237, i32 1
+  %302 = insertelement <4 x float> %301, float %266, i32 2
+  %303 = insertelement <4 x float> %302, float %295, i32 3
+  %304 = call float @llvm.AMDGPU.dp4(<4 x float> %299, <4 x float> %303)
+  %305 = bitcast float %304 to i32
+  %306 = icmp ne i32 %305, 0
+  %307 = sext i1 %306 to i32
+  %308 = bitcast i32 %307 to float
+  %309 = bitcast float %308 to i32
+  %310 = xor i32 %309, -1
+  %311 = bitcast i32 %310 to float
+  %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %313 = extractelement <4 x float> %312, i32 0
+  %314 = fcmp une float 0xC00574BC60000000, %313
+  %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00
+  %316 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %317 = extractelement <4 x float> %316, i32 1
+  %318 = fcmp une float 0x40210068E0000000, %317
+  %319 = select i1 %318, float 1.000000e+00, float 0.000000e+00
+  %320 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %321 = extractelement <4 x float> %320, i32 2
+  %322 = fcmp une float 0xBFC9A6B500000000, %321
+  %323 = select i1 %322, float 1.000000e+00, float 0.000000e+00
+  %324 = fsub float -0.000000e+00, %315
+  %325 = fptosi float %324 to i32
+  %326 = fsub float -0.000000e+00, %319
+  %327 = fptosi float %326 to i32
+  %328 = fsub float -0.000000e+00, %323
+  %329 = fptosi float %328 to i32
+  %330 = bitcast i32 %325 to float
+  %331 = bitcast i32 %327 to float
+  %332 = bitcast i32 %329 to float
+  %333 = bitcast float %331 to i32
+  %334 = bitcast float %332 to i32
+  %335 = or i32 %333, %334
+  %336 = bitcast i32 %335 to float
+  %337 = bitcast float %330 to i32
+  %338 = bitcast float %336 to i32
+  %339 = or i32 %337, %338
+  %340 = bitcast i32 %339 to float
+  %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %342 = extractelement <4 x float> %341, i32 0
+  %343 = fcmp une float 0xC0119BDA60000000, %342
+  %344 = select i1 %343, float 1.000000e+00, float 0.000000e+00
+  %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %346 = extractelement <4 x float> %345, i32 1
+  %347 = fcmp une float 0xC02085D640000000, %346
+  %348 = select i1 %347, float 1.000000e+00, float 0.000000e+00
+  %349 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %350 = extractelement <4 x float> %349, i32 2
+  %351 = fcmp une float 0xBFD7C1BDA0000000, %350
+  %352 = select i1 %351, float 1.000000e+00, float 0.000000e+00
+  %353 = fsub float -0.000000e+00, %344
+  %354 = fptosi float %353 to i32
+  %355 = fsub float -0.000000e+00, %348
+  %356 = fptosi float %355 to i32
+  %357 = fsub float -0.000000e+00, %352
+  %358 = fptosi float %357 to i32
+  %359 = bitcast i32 %354 to float
+  %360 = bitcast i32 %356 to float
+  %361 = bitcast i32 %358 to float
+  %362 = bitcast float %360 to i32
+  %363 = bitcast float %361 to i32
+  %364 = or i32 %362, %363
+  %365 = bitcast i32 %364 to float
+  %366 = bitcast float %359 to i32
+  %367 = bitcast float %365 to i32
+  %368 = or i32 %366, %367
+  %369 = bitcast i32 %368 to float
+  %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %371 = extractelement <4 x float> %370, i32 0
+  %372 = fcmp une float 0x401E1D7DC0000000, %371
+  %373 = select i1 %372, float 1.000000e+00, float 0.000000e+00
+  %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %375 = extractelement <4 x float> %374, i32 1
+  %376 = fcmp une float 0xC019893740000000, %375
+  %377 = select i1 %376, float 1.000000e+00, float 0.000000e+00
+  %378 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %379 = extractelement <4 x float> %378, i32 2
+  %380 = fcmp une float 0x40220F0D80000000, %379
+  %381 = select i1 %380, float 1.000000e+00, float 0.000000e+00
+  %382 = fsub float -0.000000e+00, %373
+  %383 = fptosi float %382 to i32
+  %384 = fsub float -0.000000e+00, %377
+  %385 = fptosi float %384 to i32
+  %386 = fsub float -0.000000e+00, %381
+  %387 = fptosi float %386 to i32
+  %388 = bitcast i32 %383 to float
+  %389 = bitcast i32 %385 to float
+  %390 = bitcast i32 %387 to float
+  %391 = bitcast float %389 to i32
+  %392 = bitcast float %390 to i32
+  %393 = or i32 %391, %392
+  %394 = bitcast i32 %393 to float
+  %395 = bitcast float %388 to i32
+  %396 = bitcast float %394 to i32
+  %397 = or i32 %395, %396
+  %398 = bitcast i32 %397 to float
+  %399 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %400 = extractelement <4 x float> %399, i32 0
+  %401 = fcmp une float 0xC018E2EB20000000, %400
+  %402 = select i1 %401, float 1.000000e+00, float 0.000000e+00
+  %403 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %404 = extractelement <4 x float> %403, i32 1
+  %405 = fcmp une float 0xBFEA8DB8C0000000, %404
+  %406 = select i1 %405, float 1.000000e+00, float 0.000000e+00
+  %407 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %408 = extractelement <4 x float> %407, i32 2
+  %409 = fcmp une float 0x4015236E20000000, %408
+  %410 = select i1 %409, float 1.000000e+00, float 0.000000e+00
+  %411 = fsub float -0.000000e+00, %402
+  %412 = fptosi float %411 to i32
+  %413 = fsub float -0.000000e+00, %406
+  %414 = fptosi float %413 to i32
+  %415 = fsub float -0.000000e+00, %410
+  %416 = fptosi float %415 to i32
+  %417 = bitcast i32 %412 to float
+  %418 = bitcast i32 %414 to float
+  %419 = bitcast i32 %416 to float
+  %420 = bitcast float %418 to i32
+  %421 = bitcast float %419 to i32
+  %422 = or i32 %420, %421
+  %423 = bitcast i32 %422 to float
+  %424 = bitcast float %417 to i32
+  %425 = bitcast float %423 to i32
+  %426 = or i32 %424, %425
+  %427 = bitcast i32 %426 to float
+  %428 = insertelement <4 x float> undef, float %340, i32 0
+  %429 = insertelement <4 x float> %428, float %369, i32 1
+  %430 = insertelement <4 x float> %429, float %398, i32 2
+  %431 = insertelement <4 x float> %430, float %427, i32 3
+  %432 = insertelement <4 x float> undef, float %340, i32 0
+  %433 = insertelement <4 x float> %432, float %369, i32 1
+  %434 = insertelement <4 x float> %433, float %398, i32 2
+  %435 = insertelement <4 x float> %434, float %427, i32 3
+  %436 = call float @llvm.AMDGPU.dp4(<4 x float> %431, <4 x float> %435)
+  %437 = bitcast float %436 to i32
+  %438 = icmp ne i32 %437, 0
+  %439 = sext i1 %438 to i32
+  %440 = bitcast i32 %439 to float
+  %441 = bitcast float %440 to i32
+  %442 = xor i32 %441, -1
+  %443 = bitcast i32 %442 to float
+  %444 = load <4 x float> addrspace(8)* null
+  %445 = extractelement <4 x float> %444, i32 0
+  %446 = fcmp une float 0xC00574BC60000000, %445
+  %447 = select i1 %446, float 1.000000e+00, float 0.000000e+00
+  %448 = load <4 x float> addrspace(8)* null
+  %449 = extractelement <4 x float> %448, i32 1
+  %450 = fcmp une float 0x40210068E0000000, %449
+  %451 = select i1 %450, float 1.000000e+00, float 0.000000e+00
+  %452 = load <4 x float> addrspace(8)* null
+  %453 = extractelement <4 x float> %452, i32 2
+  %454 = fcmp une float 0xBFC9A6B500000000, %453
+  %455 = select i1 %454, float 1.000000e+00, float 0.000000e+00
+  %456 = load <4 x float> addrspace(8)* null
+  %457 = extractelement <4 x float> %456, i32 3
+  %458 = fcmp une float 0xC0119BDA60000000, %457
+  %459 = select i1 %458, float 1.000000e+00, float 0.000000e+00
+  %460 = fsub float -0.000000e+00, %447
+  %461 = fptosi float %460 to i32
+  %462 = fsub float -0.000000e+00, %451
+  %463 = fptosi float %462 to i32
+  %464 = fsub float -0.000000e+00, %455
+  %465 = fptosi float %464 to i32
+  %466 = fsub float -0.000000e+00, %459
+  %467 = fptosi float %466 to i32
+  %468 = bitcast i32 %461 to float
+  %469 = bitcast i32 %463 to float
+  %470 = bitcast i32 %465 to float
+  %471 = bitcast i32 %467 to float
+  %472 = bitcast float %468 to i32
+  %473 = bitcast float %469 to i32
+  %474 = or i32 %472, %473
+  %475 = bitcast i32 %474 to float
+  %476 = bitcast float %470 to i32
+  %477 = bitcast float %471 to i32
+  %478 = or i32 %476, %477
+  %479 = bitcast i32 %478 to float
+  %480 = bitcast float %475 to i32
+  %481 = bitcast float %479 to i32
+  %482 = or i32 %480, %481
+  %483 = bitcast i32 %482 to float
+  %484 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %485 = extractelement <4 x float> %484, i32 0
+  %486 = fcmp une float 0xC02085D640000000, %485
+  %487 = select i1 %486, float 1.000000e+00, float 0.000000e+00
+  %488 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %489 = extractelement <4 x float> %488, i32 1
+  %490 = fcmp une float 0xBFD7C1BDA0000000, %489
+  %491 = select i1 %490, float 1.000000e+00, float 0.000000e+00
+  %492 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %493 = extractelement <4 x float> %492, i32 2
+  %494 = fcmp une float 0x401E1D7DC0000000, %493
+  %495 = select i1 %494, float 1.000000e+00, float 0.000000e+00
+  %496 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %497 = extractelement <4 x float> %496, i32 3
+  %498 = fcmp une float 0xC019893740000000, %497
+  %499 = select i1 %498, float 1.000000e+00, float 0.000000e+00
+  %500 = fsub float -0.000000e+00, %487
+  %501 = fptosi float %500 to i32
+  %502 = fsub float -0.000000e+00, %491
+  %503 = fptosi float %502 to i32
+  %504 = fsub float -0.000000e+00, %495
+  %505 = fptosi float %504 to i32
+  %506 = fsub float -0.000000e+00, %499
+  %507 = fptosi float %506 to i32
+  %508 = bitcast i32 %501 to float
+  %509 = bitcast i32 %503 to float
+  %510 = bitcast i32 %505 to float
+  %511 = bitcast i32 %507 to float
+  %512 = bitcast float %508 to i32
+  %513 = bitcast float %509 to i32
+  %514 = or i32 %512, %513
+  %515 = bitcast i32 %514 to float
+  %516 = bitcast float %510 to i32
+  %517 = bitcast float %511 to i32
+  %518 = or i32 %516, %517
+  %519 = bitcast i32 %518 to float
+  %520 = bitcast float %515 to i32
+  %521 = bitcast float %519 to i32
+  %522 = or i32 %520, %521
+  %523 = bitcast i32 %522 to float
+  %524 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %525 = extractelement <4 x float> %524, i32 0
+  %526 = fcmp une float 0x40220F0D80000000, %525
+  %527 = select i1 %526, float 1.000000e+00, float 0.000000e+00
+  %528 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %529 = extractelement <4 x float> %528, i32 1
+  %530 = fcmp une float 0xC018E2EB20000000, %529
+  %531 = select i1 %530, float 1.000000e+00, float 0.000000e+00
+  %532 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %533 = extractelement <4 x float> %532, i32 2
+  %534 = fcmp une float 0xBFEA8DB8C0000000, %533
+  %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00
+  %536 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %537 = extractelement <4 x float> %536, i32 3
+  %538 = fcmp une float 0x4015236E20000000, %537
+  %539 = select i1 %538, float 1.000000e+00, float 0.000000e+00
+  %540 = fsub float -0.000000e+00, %527
+  %541 = fptosi float %540 to i32
+  %542 = fsub float -0.000000e+00, %531
+  %543 = fptosi float %542 to i32
+  %544 = fsub float -0.000000e+00, %535
+  %545 = fptosi float %544 to i32
+  %546 = fsub float -0.000000e+00, %539
+  %547 = fptosi float %546 to i32
+  %548 = bitcast i32 %541 to float
+  %549 = bitcast i32 %543 to float
+  %550 = bitcast i32 %545 to float
+  %551 = bitcast i32 %547 to float
+  %552 = bitcast float %548 to i32
+  %553 = bitcast float %549 to i32
+  %554 = or i32 %552, %553
+  %555 = bitcast i32 %554 to float
+  %556 = bitcast float %550 to i32
+  %557 = bitcast float %551 to i32
+  %558 = or i32 %556, %557
+  %559 = bitcast i32 %558 to float
+  %560 = bitcast float %555 to i32
+  %561 = bitcast float %559 to i32
+  %562 = or i32 %560, %561
+  %563 = bitcast i32 %562 to float
+  %564 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %565 = extractelement <4 x float> %564, i32 0
+  %566 = fcmp une float 0x4016ED5D00000000, %565
+  %567 = select i1 %566, float 1.000000e+00, float 0.000000e+00
+  %568 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %569 = extractelement <4 x float> %568, i32 1
+  %570 = fcmp une float 0x402332FEC0000000, %569
+  %571 = select i1 %570, float 1.000000e+00, float 0.000000e+00
+  %572 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %573 = extractelement <4 x float> %572, i32 2
+  %574 = fcmp une float 0xC01484B5E0000000, %573
+  %575 = select i1 %574, float 1.000000e+00, float 0.000000e+00
+  %576 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %577 = extractelement <4 x float> %576, i32 3
+  %578 = fcmp une float 0x400179A6C0000000, %577
+  %579 = select i1 %578, float 1.000000e+00, float 0.000000e+00
+  %580 = fsub float -0.000000e+00, %567
+  %581 = fptosi float %580 to i32
+  %582 = fsub float -0.000000e+00, %571
+  %583 = fptosi float %582 to i32
+  %584 = fsub float -0.000000e+00, %575
+  %585 = fptosi float %584 to i32
+  %586 = fsub float -0.000000e+00, %579
+  %587 = fptosi float %586 to i32
+  %588 = bitcast i32 %581 to float
+  %589 = bitcast i32 %583 to float
+  %590 = bitcast i32 %585 to float
+  %591 = bitcast i32 %587 to float
+  %592 = bitcast float %588 to i32
+  %593 = bitcast float %589 to i32
+  %594 = or i32 %592, %593
+  %595 = bitcast i32 %594 to float
+  %596 = bitcast float %590 to i32
+  %597 = bitcast float %591 to i32
+  %598 = or i32 %596, %597
+  %599 = bitcast i32 %598 to float
+  %600 = bitcast float %595 to i32
+  %601 = bitcast float %599 to i32
+  %602 = or i32 %600, %601
+  %603 = bitcast i32 %602 to float
+  %604 = insertelement <4 x float> undef, float %483, i32 0
+  %605 = insertelement <4 x float> %604, float %523, i32 1
+  %606 = insertelement <4 x float> %605, float %563, i32 2
+  %607 = insertelement <4 x float> %606, float %603, i32 3
+  %608 = insertelement <4 x float> undef, float %483, i32 0
+  %609 = insertelement <4 x float> %608, float %523, i32 1
+  %610 = insertelement <4 x float> %609, float %563, i32 2
+  %611 = insertelement <4 x float> %610, float %603, i32 3
+  %612 = call float @llvm.AMDGPU.dp4(<4 x float> %607, <4 x float> %611)
+  %613 = bitcast float %612 to i32
+  %614 = icmp ne i32 %613, 0
+  %615 = sext i1 %614 to i32
+  %616 = bitcast i32 %615 to float
+  %617 = bitcast float %616 to i32
+  %618 = xor i32 %617, -1
+  %619 = bitcast i32 %618 to float
+  %620 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %621 = extractelement <4 x float> %620, i32 0
+  %622 = fcmp une float 0x40210068E0000000, %621
+  %623 = select i1 %622, float 1.000000e+00, float 0.000000e+00
+  %624 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %625 = extractelement <4 x float> %624, i32 1
+  %626 = fcmp une float 0xBFC9A6B500000000, %625
+  %627 = select i1 %626, float 1.000000e+00, float 0.000000e+00
+  %628 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %629 = extractelement <4 x float> %628, i32 2
+  %630 = fcmp une float 0xC0119BDA60000000, %629
+  %631 = select i1 %630, float 1.000000e+00, float 0.000000e+00
+  %632 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %633 = extractelement <4 x float> %632, i32 3
+  %634 = fcmp une float 0xC02085D640000000, %633
+  %635 = select i1 %634, float 1.000000e+00, float 0.000000e+00
+  %636 = fsub float -0.000000e+00, %623
+  %637 = fptosi float %636 to i32
+  %638 = fsub float -0.000000e+00, %627
+  %639 = fptosi float %638 to i32
+  %640 = fsub float -0.000000e+00, %631
+  %641 = fptosi float %640 to i32
+  %642 = fsub float -0.000000e+00, %635
+  %643 = fptosi float %642 to i32
+  %644 = bitcast i32 %637 to float
+  %645 = bitcast i32 %639 to float
+  %646 = bitcast i32 %641 to float
+  %647 = bitcast i32 %643 to float
+  %648 = bitcast float %644 to i32
+  %649 = bitcast float %645 to i32
+  %650 = or i32 %648, %649
+  %651 = bitcast i32 %650 to float
+  %652 = bitcast float %646 to i32
+  %653 = bitcast float %647 to i32
+  %654 = or i32 %652, %653
+  %655 = bitcast i32 %654 to float
+  %656 = bitcast float %651 to i32
+  %657 = bitcast float %655 to i32
+  %658 = or i32 %656, %657
+  %659 = bitcast i32 %658 to float
+  %660 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %661 = extractelement <4 x float> %660, i32 0
+  %662 = fcmp une float 0xBFD7C1BDA0000000, %661
+  %663 = select i1 %662, float 1.000000e+00, float 0.000000e+00
+  %664 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %665 = extractelement <4 x float> %664, i32 1
+  %666 = fcmp une float 0x401E1D7DC0000000, %665
+  %667 = select i1 %666, float 1.000000e+00, float 0.000000e+00
+  %668 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %669 = extractelement <4 x float> %668, i32 2
+  %670 = fcmp une float 0xC019893740000000, %669
+  %671 = select i1 %670, float 1.000000e+00, float 0.000000e+00
+  %672 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %673 = extractelement <4 x float> %672, i32 3
+  %674 = fcmp une float 0x40220F0D80000000, %673
+  %675 = select i1 %674, float 1.000000e+00, float 0.000000e+00
+  %676 = fsub float -0.000000e+00, %663
+  %677 = fptosi float %676 to i32
+  %678 = fsub float -0.000000e+00, %667
+  %679 = fptosi float %678 to i32
+  %680 = fsub float -0.000000e+00, %671
+  %681 = fptosi float %680 to i32
+  %682 = fsub float -0.000000e+00, %675
+  %683 = fptosi float %682 to i32
+  %684 = bitcast i32 %677 to float
+  %685 = bitcast i32 %679 to float
+  %686 = bitcast i32 %681 to float
+  %687 = bitcast i32 %683 to float
+  %688 = bitcast float %684 to i32
+  %689 = bitcast float %685 to i32
+  %690 = or i32 %688, %689
+  %691 = bitcast i32 %690 to float
+  %692 = bitcast float %686 to i32
+  %693 = bitcast float %687 to i32
+  %694 = or i32 %692, %693
+  %695 = bitcast i32 %694 to float
+  %696 = bitcast float %691 to i32
+  %697 = bitcast float %695 to i32
+  %698 = or i32 %696, %697
+  %699 = bitcast i32 %698 to float
+  %700 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %701 = extractelement <4 x float> %700, i32 0
+  %702 = fcmp une float 0xC018E2EB20000000, %701
+  %703 = select i1 %702, float 1.000000e+00, float 0.000000e+00
+  %704 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %705 = extractelement <4 x float> %704, i32 1
+  %706 = fcmp une float 0xBFEA8DB8C0000000, %705
+  %707 = select i1 %706, float 1.000000e+00, float 0.000000e+00
+  %708 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %709 = extractelement <4 x float> %708, i32 2
+  %710 = fcmp une float 0x4015236E20000000, %709
+  %711 = select i1 %710, float 1.000000e+00, float 0.000000e+00
+  %712 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %713 = extractelement <4 x float> %712, i32 3
+  %714 = fcmp une float 0x4016ED5D00000000, %713
+  %715 = select i1 %714, float 1.000000e+00, float 0.000000e+00
+  %716 = fsub float -0.000000e+00, %703
+  %717 = fptosi float %716 to i32
+  %718 = fsub float -0.000000e+00, %707
+  %719 = fptosi float %718 to i32
+  %720 = fsub float -0.000000e+00, %711
+  %721 = fptosi float %720 to i32
+  %722 = fsub float -0.000000e+00, %715
+  %723 = fptosi float %722 to i32
+  %724 = bitcast i32 %717 to float
+  %725 = bitcast i32 %719 to float
+  %726 = bitcast i32 %721 to float
+  %727 = bitcast i32 %723 to float
+  %728 = bitcast float %724 to i32
+  %729 = bitcast float %725 to i32
+  %730 = or i32 %728, %729
+  %731 = bitcast i32 %730 to float
+  %732 = bitcast float %726 to i32
+  %733 = bitcast float %727 to i32
+  %734 = or i32 %732, %733
+  %735 = bitcast i32 %734 to float
+  %736 = bitcast float %731 to i32
+  %737 = bitcast float %735 to i32
+  %738 = or i32 %736, %737
+  %739 = bitcast i32 %738 to float
+  %740 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %741 = extractelement <4 x float> %740, i32 0
+  %742 = fcmp une float 0x402332FEC0000000, %741
+  %743 = select i1 %742, float 1.000000e+00, float 0.000000e+00
+  %744 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %745 = extractelement <4 x float> %744, i32 1
+  %746 = fcmp une float 0xC01484B5E0000000, %745
+  %747 = select i1 %746, float 1.000000e+00, float 0.000000e+00
+  %748 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %749 = extractelement <4 x float> %748, i32 2
+  %750 = fcmp une float 0x400179A6C0000000, %749
+  %751 = select i1 %750, float 1.000000e+00, float 0.000000e+00
+  %752 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %753 = extractelement <4 x float> %752, i32 3
+  %754 = fcmp une float 0xBFEE752540000000, %753
+  %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00
+  %756 = fsub float -0.000000e+00, %743
+  %757 = fptosi float %756 to i32
+  %758 = fsub float -0.000000e+00, %747
+  %759 = fptosi float %758 to i32
+  %760 = fsub float -0.000000e+00, %751
+  %761 = fptosi float %760 to i32
+  %762 = fsub float -0.000000e+00, %755
+  %763 = fptosi float %762 to i32
+  %764 = bitcast i32 %757 to float
+  %765 = bitcast i32 %759 to float
+  %766 = bitcast i32 %761 to float
+  %767 = bitcast i32 %763 to float
+  %768 = bitcast float %764 to i32
+  %769 = bitcast float %765 to i32
+  %770 = or i32 %768, %769
+  %771 = bitcast i32 %770 to float
+  %772 = bitcast float %766 to i32
+  %773 = bitcast float %767 to i32
+  %774 = or i32 %772, %773
+  %775 = bitcast i32 %774 to float
+  %776 = bitcast float %771 to i32
+  %777 = bitcast float %775 to i32
+  %778 = or i32 %776, %777
+  %779 = bitcast i32 %778 to float
+  %780 = insertelement <4 x float> undef, float %659, i32 0
+  %781 = insertelement <4 x float> %780, float %699, i32 1
+  %782 = insertelement <4 x float> %781, float %739, i32 2
+  %783 = insertelement <4 x float> %782, float %779, i32 3
+  %784 = insertelement <4 x float> undef, float %659, i32 0
+  %785 = insertelement <4 x float> %784, float %699, i32 1
+  %786 = insertelement <4 x float> %785, float %739, i32 2
+  %787 = insertelement <4 x float> %786, float %779, i32 3
+  %788 = call float @llvm.AMDGPU.dp4(<4 x float> %783, <4 x float> %787)
+  %789 = bitcast float %788 to i32
+  %790 = icmp ne i32 %789, 0
+  %791 = sext i1 %790 to i32
+  %792 = bitcast i32 %791 to float
+  %793 = bitcast float %792 to i32
+  %794 = xor i32 %793, -1
+  %795 = bitcast i32 %794 to float
+  %796 = bitcast float %91 to i32
+  %797 = bitcast float %179 to i32
+  %798 = and i32 %796, %797
+  %799 = bitcast i32 %798 to float
+  %800 = bitcast float %311 to i32
+  %801 = bitcast float %443 to i32
+  %802 = and i32 %800, %801
+  %803 = bitcast i32 %802 to float
+  %804 = bitcast float %799 to i32
+  %805 = bitcast float %803 to i32
+  %806 = and i32 %804, %805
+  %807 = bitcast i32 %806 to float
+  %808 = bitcast float %619 to i32
+  %809 = bitcast float %795 to i32
+  %810 = and i32 %808, %809
+  %811 = bitcast i32 %810 to float
+  %812 = bitcast float %807 to i32
+  %813 = bitcast float %811 to i32
+  %814 = and i32 %812, %813
+  %815 = bitcast i32 %814 to float
+  %816 = bitcast float %815 to i32
+  %817 = icmp ne i32 %816, 0
+  %. = select i1 %817, float 1.000000e+00, float 0.000000e+00
+  %.32 = select i1 %817, float 0.000000e+00, float 1.000000e+00
+  %818 = insertelement <4 x float> undef, float %0, i32 0
+  %819 = insertelement <4 x float> %818, float %1, i32 1
+  %820 = insertelement <4 x float> %819, float %2, i32 2
+  %821 = insertelement <4 x float> %820, float %3, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %821, i32 60, i32 1)
+  %822 = insertelement <4 x float> undef, float %.32, i32 0
+  %823 = insertelement <4 x float> %822, float %., i32 1
+  %824 = insertelement <4 x float> %823, float 0.000000e+00, i32 2
+  %825 = insertelement <4 x float> %824, float 1.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %825, i32 0, i32 2)
+  ret void
+}
+
+declare float @llvm.R600.load.input(i32) #1
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.ll
similarity index 100%
rename from test/CodeGen/R600/and.v4i32.ll
rename to test/CodeGen/R600/and.ll
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
new file mode 100644
index 000000000000..c9015a6e78ae
--- /dev/null
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
+
+; BFI_INT Definition pattern from ISA docs
+; (y & x) | (z & ~x)
+;
+; R600-CHECK: @bfi_def
+; R600-CHECK: BFI_INT
+; SI-CHECK:   @bfi_def
+; SI-CHECK:   V_BFI_B32
+define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+entry:
+  %0 = xor i32 %x, -1
+  %1 = and i32 %z, %0
+  %2 = and i32 %y, %x
+  %3 = or i32 %1, %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; SHA-256 Ch function
+; z ^ (x & (y ^ z))
+; R600-CHECK: @bfi_sha256_ch
+; R600-CHECK: BFI_INT
+; SI-CHECK:   @bfi_sha256_ch
+; SI-CHECK:   V_BFI_B32
+define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+entry:
+  %0 = xor i32 %y, %z
+  %1 = and i32 %x, %0
+  %2 = xor i32 %z, %1
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
index a58674269a5f..09baee7a1dcd 100644
--- a/test/CodeGen/R600/disconnected-predset-break-bug.ll
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -5,9 +5,10 @@
 ; and the PREDICATE_BREAK in this loop.
 
 ; CHECK: @loop_ge
-; CHECK: WHILE
+; CHECK: LOOP_START_DX10
 ; CHECK: PRED_SET
-; CHECK-NEXT: PREDICATED_BREAK
+; CHECK-NEXT: JUMP
+; CHECK-NEXT: LOOP_BREAK
 define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
 entry:
   %cmp5 = icmp sgt i32 %iterations, 0
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
new file mode 100644
index 000000000000..555ee3d5c3d9
--- /dev/null
+++ b/test/CodeGen/R600/elf.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+
+; ELF-CHECK: Format: ELF32
+; ELF-CHECK: Name: .AMDGPU.config
+
+; CONFIG-CHECK: .section .AMDGPU.config
+; CONFIG-CHECK-NEXT: .long   45096
+; CONFIG-CHECK-NEXT: .long   0
+define void @test(i32 %p) {
+   %i = add i32 %p, 2
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index d7d1b6572c41..81a4fa5f9294 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -1,8 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
+; CHECK: @fadd_f32
 ; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
+define void @fadd_f32() {
    %r0 = call float @llvm.R600.load.input(i32 0)
    %r1 = call float @llvm.R600.load.input(i32 1)
    %r2 = fadd float %r0, %r1
@@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
+; CHECK: @fadd_v4f32
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fadd <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll
deleted file mode 100644
index 85dbfd52cbb3..000000000000
--- a/test/CodeGen/R600/fadd.v4f32.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
-  %result = fadd <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.ll
similarity index 100%
rename from test/CodeGen/R600/fdiv.v4f32.ll
rename to test/CodeGen/R600/fdiv.ll
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index eb1d523c0bb4..7fd22d823d78 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -1,8 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
+; CHECK: @fmul_f32
 ; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
+define void @fmul_f32() {
    %r0 = call float @llvm.R600.load.input(i32 0)
    %r1 = call float @llvm.R600.load.input(i32 1)
    %r2 = fmul float %r0, %r1
@@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
+; CHECK: @fmul_v4f32
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fmul <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
new file mode 100644
index 000000000000..9c21ad24e530
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fp_to_sint_v4i32
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %value = load <4 x float> addrspace(1) * %in
+  %result = fptosi <4 x float> %value to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
new file mode 100644
index 000000000000..d91098f71683
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fp_to_uint_v4i32
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %value = load <4 x float> addrspace(1) * %in
+  %result = fptoui <4 x float> %value to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index 591aa52676a4..812388b66fd7 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -1,8 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
+; CHECK: @fsub_f32
 ; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
 
-define void @test() {
+define void @fsub_f32() {
    %r0 = call float @llvm.R600.load.input(i32 0)
    %r1 = call float @llvm.R600.load.input(i32 1)
    %r2 = fsub float %r0, %r1
@@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
+; CHECK: @fsub_v4f32
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fsub <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll
deleted file mode 100644
index 612a57e4b609..000000000000
--- a/test/CodeGen/R600/fsub.v4f32.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
-  %result = fsub <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8-to-double-to-float.ll
similarity index 100%
rename from test/CodeGen/R600/i8_to_double_to_float.ll
rename to test/CodeGen/R600/i8-to-double-to-float.ll
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
new file mode 100644
index 000000000000..979efb00e7bd
--- /dev/null
+++ b/test/CodeGen/R600/imm.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+; Use a 64-bit value with lo bits that can be represented as an inline constant
+; CHECK: @i64_imm_inline_lo
+; CHECK: S_MOV_B32 [[LO:SGPR[0-9]+]], 5
+; CHECK: V_MOV_B32_e32 [[LO_VGPR:VGPR[0-9]+]], [[LO]]
+; CHECK: BUFFER_STORE_DWORDX2 [[LO_VGPR]]_
+define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
+entry:
+  store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
+  ret void
+}
+
+; Use a 64-bit value with hi bits that can be represented as an inline constant
+; CHECK: @i64_imm_inline_hi
+; CHECK: S_MOV_B32 [[HI:SGPR[0-9]+]], 5
+; CHECK: V_MOV_B32_e32 [[HI_VGPR:VGPR[0-9]+]], [[HI]]
+; CHECK: BUFFER_STORE_DWORDX2 {{VGPR[0-9]+}}_[[HI_VGPR]]
+define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
+entry:
+  store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
+  ret void
+}
diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll
new file mode 100644
index 000000000000..ae9c8bba4fd6
--- /dev/null
+++ b/test/CodeGen/R600/jump-address.ll
@@ -0,0 +1,52 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: JUMP @3
+; CHECK: EXPORT
+; CHECK-NOT: EXPORT
+
+define void @main() #0 {
+main_body:
+  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = bitcast float %1 to i32
+  %3 = icmp eq i32 %2, 0
+  %4 = sext i1 %3 to i32
+  %5 = bitcast i32 %4 to float
+  %6 = bitcast float %5 to i32
+  %7 = icmp ne i32 %6, 0
+  br i1 %7, label %ENDIF, label %ELSE
+
+ELSE:                                             ; preds = %main_body
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %9 = extractelement <4 x float> %8, i32 0
+  %10 = bitcast float %9 to i32
+  %11 = icmp eq i32 %10, 1
+  %12 = sext i1 %11 to i32
+  %13 = bitcast i32 %12 to float
+  %14 = bitcast float %13 to i32
+  %15 = icmp ne i32 %14, 0
+  br i1 %15, label %IF13, label %ENDIF
+
+ENDIF:                                            ; preds = %IF13, %ELSE, %main_body
+  %temp.0 = phi float [ 0xFFF8000000000000, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
+  %temp2.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %temp3.0 = phi float [ 5.000000e-01, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %16 = insertelement <4 x float> undef, float %temp.0, i32 0
+  %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
+  %18 = insertelement <4 x float> %17, float %temp2.0, i32 2
+  %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
+  ret void
+
+IF13:                                             ; preds = %ELSE
+  %20 = load <4 x float> addrspace(8)* null
+  %21 = extractelement <4 x float> %20, i32 0
+  %22 = fsub float -0.000000e+00, %21
+  %23 = fadd float 0xFFF8000000000000, %22
+  br label %ENDIF
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index e8e2bf5f0f68..3d70e4bd54aa 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -1,7 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ; CHECK: @main1
-; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
+; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}}
 define void @main1() {
 main_body:
   %0 = load <4 x float> addrspace(8)* null
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
index a8f604ac6d30..e45722c3fa67 100644
--- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
 ;CHECK: S_MOV_B32
 ;CHECK-NEXT: V_INTERP_MOV_F32
@@ -14,7 +14,7 @@ main_body:
 
 declare void @llvm.AMDGPU.shader.type(i32)
 
-declare float @llvm.SI.fs.constant(i32, i32, i32) readonly
+declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
 
 declare i32 @llvm.SI.packf16(float, float) readnone
 
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
index d397f3b67889..de06354a5646 100644
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,71 +1,140 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
-;CHECK: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1904
-;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
 
-define void @test() {
-   %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
+   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
+   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
+   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
+   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
+   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
+   %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
+   %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
+   %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
+   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
+   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
+   %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
+   %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
+   %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
+   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
+   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
+   %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
       <8 x i32> undef, <4 x i32> undef, i32 1)
-   %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
       <8 x i32> undef, <4 x i32> undef, i32 2)
-   %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
       <8 x i32> undef, <4 x i32> undef, i32 3)
-   %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
       <8 x i32> undef, <4 x i32> undef, i32 4)
-   %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
       <8 x i32> undef, <4 x i32> undef, i32 5)
-   %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
       <8 x i32> undef, <4 x i32> undef, i32 6)
-   %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
       <8 x i32> undef, <4 x i32> undef, i32 7)
-   %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
       <8 x i32> undef, <4 x i32> undef, i32 8)
-   %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
       <8 x i32> undef, <4 x i32> undef, i32 9)
-   %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
       <8 x i32> undef, <4 x i32> undef, i32 10)
-   %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
       <8 x i32> undef, <4 x i32> undef, i32 11)
-   %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
       <8 x i32> undef, <4 x i32> undef, i32 12)
-   %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
       <8 x i32> undef, <4 x i32> undef, i32 13)
-   %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
       <8 x i32> undef, <4 x i32> undef, i32 14)
-   %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
       <8 x i32> undef, <4 x i32> undef, i32 15)
-   %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+   %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
       <8 x i32> undef, <4 x i32> undef, i32 16)
+   %e1 = extractelement <4 x float> %res1, i32 0
+   %e2 = extractelement <4 x float> %res2, i32 1
+   %e3 = extractelement <4 x float> %res3, i32 2
+   %e4 = extractelement <4 x float> %res4, i32 3
+   %t0 = extractelement <4 x float> %res5, i32 0
+   %t1 = extractelement <4 x float> %res5, i32 1
+   %e5 = fadd float %t0, %t1
+   %t2 = extractelement <4 x float> %res6, i32 0
+   %t3 = extractelement <4 x float> %res6, i32 2
+   %e6 = fadd float %t2, %t3
+   %t4 = extractelement <4 x float> %res7, i32 0
+   %t5 = extractelement <4 x float> %res7, i32 3
+   %e7 = fadd float %t4, %t5
+   %t6 = extractelement <4 x float> %res8, i32 1
+   %t7 = extractelement <4 x float> %res8, i32 2
+   %e8 = fadd float %t6, %t7
+   %t8 = extractelement <4 x float> %res9, i32 1
+   %t9 = extractelement <4 x float> %res9, i32 3
+   %e9 = fadd float %t8, %t9
+   %t10 = extractelement <4 x float> %res10, i32 2
+   %t11 = extractelement <4 x float> %res10, i32 3
+   %e10 = fadd float %t10, %t11
+   %t12 = extractelement <4 x float> %res11, i32 0
+   %t13 = extractelement <4 x float> %res11, i32 1
+   %t14 = extractelement <4 x float> %res11, i32 2
+   %t15 = fadd float %t12, %t13
+   %e11 = fadd float %t14, %t15
+   %t16 = extractelement <4 x float> %res12, i32 0
+   %t17 = extractelement <4 x float> %res12, i32 1
+   %t18 = extractelement <4 x float> %res12, i32 3
+   %t19 = fadd float %t16, %t17
+   %e12 = fadd float %t18, %t19
+   %t20 = extractelement <4 x float> %res13, i32 0
+   %t21 = extractelement <4 x float> %res13, i32 2
+   %t22 = extractelement <4 x float> %res13, i32 3
+   %t23 = fadd float %t20, %t21
+   %e13 = fadd float %t22, %t23
+   %t24 = extractelement <4 x float> %res14, i32 1
+   %t25 = extractelement <4 x float> %res14, i32 2
+   %t26 = extractelement <4 x float> %res14, i32 3
+   %t27 = fadd float %t24, %t25
+   %e14 = fadd float %t26, %t27
+   %t28 = extractelement <4 x float> %res15, i32 0
+   %t29 = extractelement <4 x float> %res15, i32 1
+   %t30 = extractelement <4 x float> %res15, i32 2
+   %t31 = extractelement <4 x float> %res15, i32 3
+   %t32 = fadd float %t28, %t29
+   %t33 = fadd float %t30, %t31
+   %e15 = fadd float %t32, %t33
+   %e16 = extractelement <4 x float> %res16, i32 3
+   %s1 = fadd float %e1, %e2
+   %s2 = fadd float %s1, %e3
+   %s3 = fadd float %s2, %e4
+   %s4 = fadd float %s3, %e5
+   %s5 = fadd float %s4, %e6
+   %s6 = fadd float %s5, %e7
+   %s7 = fadd float %s6, %e8
+   %s8 = fadd float %s7, %e9
+   %s9 = fadd float %s8, %e10
+   %s10 = fadd float %s9, %e11
+   %s11 = fadd float %s10, %e12
+   %s12 = fadd float %s11, %e13
+   %s13 = fadd float %s12, %e14
+   %s14 = fadd float %s13, %e15
+   %s15 = fadd float %s14, %e16
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
    ret void
 }
 
-declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32)
+declare <4 x float> @llvm.SI.sample.(<4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
index 0ae91725793f..b4ce9f429f16 100644
--- a/test/CodeGen/R600/llvm.pow.ll
+++ b/test/CodeGen/R600/llvm.pow.ll
@@ -1,7 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK-NEXT: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll
deleted file mode 100644
index 93627283bb94..000000000000
--- a/test/CodeGen/R600/load.constant_addrspace.f32.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-define void @test(float addrspace(1)* %out, float addrspace(2)* %in) {
-  %1 = load float addrspace(2)* %in
-  store float %1, float addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll
deleted file mode 100644
index b070dcd52049..000000000000
--- a/test/CodeGen/R600/load.i8.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-  %1 = load i8 addrspace(1)* %in
-  %2 = zext i8 %1 to i32
-  store i32 %2, i32 addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
new file mode 100644
index 000000000000..b03245ae87b3
--- /dev/null
+++ b/test/CodeGen/R600/load.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Load an i8 value from the global address space.
+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %1 = load i8 addrspace(1)* %in
+  %2 = zext i8 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; Load a f32 value from the constant address space.
+; CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
+  %1 = load float addrspace(2)* %in
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
new file mode 100644
index 000000000000..dc9295e8e774
--- /dev/null
+++ b/test/CodeGen/R600/loop-address.ll
@@ -0,0 +1,44 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX
+;CHECK: ALU_PUSH
+;CHECK: JUMP @4
+;CHECK: ELSE @16
+;CHECK: TEX
+;CHECK: LOOP_START_DX10 @15
+;CHECK: LOOP_BREAK @14
+;CHECK: POP @16
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+target triple = "r600--"
+
+define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 {
+entry:
+  %cmp5 = icmp sgt i32 %iterations, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
+  %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %i.07 = add nsw i32 %i.07.in, -1
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+  store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4, !tbaa !4
+  %add = add nsw i32 %ai.06, 1
+  %exitcond = icmp eq i32 %add, %iterations
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
+
+!opencl.kernels = !{!0, !1, !2, !3}
+
+!0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_ge}
+!1 = metadata !{null}
+!2 = metadata !{null}
+!3 = metadata !{null}
+!4 = metadata !{metadata !"int", metadata !5}
+!5 = metadata !{metadata !"omnipotent char", metadata !6}
+!6 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
new file mode 100644
index 000000000000..fb698da62719
--- /dev/null
+++ b/test/CodeGen/R600/lshl.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = mul i32 %p, 2
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
new file mode 100644
index 000000000000..e0ed3ac07866
--- /dev/null
+++ b/test/CodeGen/R600/lshr.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = udiv i32 %p, 2
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
new file mode 100644
index 000000000000..bc17a597873e
--- /dev/null
+++ b/test/CodeGen/R600/mulhu.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
+;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
+;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = udiv i32 %p, 3
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index 18895a423e50..eb8b052b6f72 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -45,10 +45,12 @@ ENDIF:
 }
 
 ; CHECK: @nested_if
-; CHECK: IF_PREDICATE_SET
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
 ; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
 ; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: ENDIF
+; CHECK: POP
 define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp sgt i32 %in, 0
@@ -70,11 +72,13 @@ ENDIF:
 }
 
 ; CHECK: @nested_if_else
-; CHECK: IF_PREDICATE_SET
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
 ; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
 ; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
 ; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: ENDIF
+; CHECK: POP
 define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp sgt i32 %in, 0
diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc-cnd.ll
similarity index 100%
rename from test/CodeGen/R600/selectcc_cnde.ll
rename to test/CodeGen/R600/selectcc-cnd.ll
diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc-cnde-int.ll
similarity index 100%
rename from test/CodeGen/R600/selectcc_cnde_int.ll
rename to test/CodeGen/R600/selectcc-cnde-int.ll
diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.ll
similarity index 100%
rename from test/CodeGen/R600/setcc.v4i32.ll
rename to test/CodeGen/R600/setcc.ll
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
new file mode 100644
index 000000000000..4622203ffdbc
--- /dev/null
+++ b/test/CodeGen/R600/seto.ll
@@ -0,0 +1,13 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+
+define void @main(float %p) {
+main_body:
+  %c = fcmp oeq float %p, %p
+  %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
new file mode 100644
index 000000000000..0bf5801b1c33
--- /dev/null
+++ b/test/CodeGen/R600/setuo.ll
@@ -0,0 +1,13 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+
+define void @main(float %p) {
+main_body:
+  %c = fcmp une float %p, %p
+  %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
new file mode 100644
index 000000000000..6a56db352d60
--- /dev/null
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @sint_to_fp_v4i32
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %value = load <4 x i32> addrspace(1) * %in
+  %result = sitofp <4 x i32> %value to <4 x float>
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
new file mode 100644
index 000000000000..4d673f3ea326
--- /dev/null
+++ b/test/CodeGen/R600/store.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+
+; floating-point store
+; EG-CHECK: @store_f32
+; EG-CHECK: RAT_WRITE_CACHELESS_32_eg T{{[0-9]+\.X, T[0-9]+\.X}}, 1
+; SI-CHECK: @store_f32
+; SI-CHECK: BUFFER_STORE_DWORD
+
+define void @store_f32(float addrspace(1)* %out, float %in) {
+  store float %in, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll
new file mode 100644
index 000000000000..5ffb7f1809f8
--- /dev/null
+++ b/test/CodeGen/R600/store.r600.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+
+; XXX: Merge this test into store.ll once it is supported on SI
+
+; v4i32 store
+; EG-CHECK: @store_v4i32
+; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+
+define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %1 = load <4 x i32> addrspace(1) * %in
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; v4f32 store
+; EG-CHECK: @store_v4f32
+; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %1 = load <4 x float> addrspace(1) * %in
+  store <4 x float> %1, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll
deleted file mode 100644
index 8b0d24445971..000000000000
--- a/test/CodeGen/R600/store.v4f32.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
-
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %1 = load <4 x float> addrspace(1) * %in
-  store <4 x float> %1, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll
deleted file mode 100644
index a659815ddeba..000000000000
--- a/test/CodeGen/R600/store.v4i32.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
-
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %1 = load <4 x i32> addrspace(1) * %in
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.ll
similarity index 100%
rename from test/CodeGen/R600/udiv.v4i32.ll
rename to test/CodeGen/R600/udiv.ll
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
new file mode 100644
index 000000000000..ae8fc8ed6c9a
--- /dev/null
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @uint_to_fp_v4i32
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %value = load <4 x i32> addrspace(1) * %in
+  %result = uitofp <4 x i32> %value to <4 x float>
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll
new file mode 100644
index 000000000000..dad02dd76f0a
--- /dev/null
+++ b/test/CodeGen/R600/urecip.ll
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_RCP_IFLAG_F32_e32
+
+define void @test(i32 %p, i32 %q) {
+   %i = udiv i32 %p, %q
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.ll
similarity index 100%
rename from test/CodeGen/R600/urem.v4i32.ll
rename to test/CodeGen/R600/urem.ll
diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll
deleted file mode 100644
index 8f62bc692908..000000000000
--- a/test/CodeGen/R600/vec4-expand.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; CHECK: @fp_to_sint
-; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %value = load <4 x float> addrspace(1) * %in
-  %result = fptosi <4 x float> %value to <4 x i32>
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; CHECK: @fp_to_uint
-; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %value = load <4 x float> addrspace(1) * %in
-  %result = fptoui <4 x float> %value to <4 x i32>
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; CHECK: @sint_to_fp
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %value = load <4 x i32> addrspace(1) * %in
-  %result = sitofp <4 x i32> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
-
-; CHECK: @uint_to_fp
-; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %value = load <4 x i32> addrspace(1) * %in
-  %result = uitofp <4 x i32> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
new file mode 100644
index 000000000000..ec9713572141
--- /dev/null
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -0,0 +1,378 @@
+; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler | FileCheck %s
+
+; CHECK: intarg
+; The save/restore frame is not strictly necessary here, but we would need to
+; refer to %o registers instead.
+; CHECK: save %sp, -128, %sp
+; CHECK: stb %i0, [%i4]
+; CHECK: stb %i1, [%i4]
+; CHECK: sth %i2, [%i4]
+; CHECK: st  %i3, [%i4]
+; CHECK: stx %i4, [%i4]
+; CHECK: st  %i5, [%i4]
+; CHECK: ld [%fp+2227], [[R:%[gilo][0-7]]]
+; CHECK: st  [[R]], [%i4]
+; CHECK: ldx [%fp+2231], [[R:%[gilo][0-7]]]
+; CHECK: stx [[R]], [%i4]
+; CHECK: restore
+define void @intarg(i8  %a0,   ; %i0
+                    i8  %a1,   ; %i1
+                    i16 %a2,   ; %i2
+                    i32 %a3,   ; %i3
+                    i8* %a4,   ; %i4
+                    i32 %a5,   ; %i5
+                    i32 signext %a6,   ; [%fp+BIAS+176]
+                    i8* %a7) { ; [%fp+BIAS+184]
+  store i8 %a0, i8* %a4
+  store i8 %a1, i8* %a4
+  %p16 = bitcast i8* %a4 to i16*
+  store i16 %a2, i16* %p16
+  %p32 = bitcast i8* %a4 to i32*
+  store i32 %a3, i32* %p32
+  %pp = bitcast i8* %a4 to i8**
+  store i8* %a4, i8** %pp
+  store i32 %a5, i32* %p32
+  store i32 %a6, i32* %p32
+  store i8* %a7, i8** %pp
+  ret void
+}
+
+; CHECK: call_intarg
+; 16 saved + 8 args.
+; CHECK: save %sp, -192, %sp
+; Sign-extend and store the full 64 bits.
+; CHECK: sra %i0, 0, [[R:%[gilo][0-7]]]
+; CHECK: stx [[R]], [%sp+2223]
+; Use %o0-%o5 for outgoing arguments
+; CHECK: or %g0, 5, %o5
+; CHECK: call intarg
+; CHECK-NOT: add %sp
+; CHECK: restore
+define void @call_intarg(i32 %i0, i8* %i1) {
+  call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 %i0, i8* %i1)
+  ret void
+}
+
+; CHECK: floatarg
+; CHECK: save %sp, -128, %sp
+; CHECK: fstod %f1,
+; CHECK: faddd %f2,
+; CHECK: faddd %f4,
+; CHECK: faddd %f6,
+; CHECK: ld [%fp+2307], [[F:%f[0-9]+]]
+; CHECK: fadds %f31, [[F]]
+define double @floatarg(float %a0,    ; %f1
+                        double %a1,   ; %d2
+                        double %a2,   ; %d4
+                        double %a3,   ; %d6
+                        float %a4,    ; %f9
+                        float %a5,    ; %f11
+                        float %a6,    ; %f13
+                        float %a7,    ; %f15
+                        float %a8,    ; %f17
+                        float %a9,    ; %f19
+                        float %a10,   ; %f21
+                        float %a11,   ; %f23
+                        float %a12,   ; %f25
+                        float %a13,   ; %f27
+                        float %a14,   ; %f29
+                        float %a15,   ; %f31
+                        float %a16,   ; [%fp+BIAS+256] (using 8 bytes)
+                        double %a17) { ; [%fp+BIAS+264] (using 8 bytes)
+  %d0 = fpext float %a0 to double
+  %s1 = fadd double %a1, %d0
+  %s2 = fadd double %a2, %s1
+  %s3 = fadd double %a3, %s2
+  %s16 = fadd float %a15, %a16
+  %d16 = fpext float %s16 to double
+  %s17 = fadd double %d16, %s3
+  ret double %s17
+}
+
+; CHECK: call_floatarg
+; CHECK: save %sp, -272, %sp
+; Store 4 bytes, right-aligned in slot.
+; CHECK: st %f1, [%sp+2307]
+; Store 8 bytes in full slot.
+; CHECK: std %f2, [%sp+2311]
+; CHECK: fmovd %f2, %f4
+; CHECK: call floatarg
+; CHECK-NOT: add %sp
+; CHECK: restore
+define void @call_floatarg(float %f1, double %d2, float %f5, double *%p) {
+  %r = call double @floatarg(float %f5, double %d2, double %d2, double %d2,
+                             float %f5, float %f5,  float %f5,  float %f5,
+                             float %f5, float %f5,  float %f5,  float %f5,
+                             float %f5, float %f5,  float %f5,  float %f5,
+                             float %f1, double %d2)
+  store double %r, double* %p
+  ret void
+}
+
+; CHECK: mixedarg
+; CHECK: fstod %f3
+; CHECK: faddd %f6
+; CHECK: faddd %f16
+; CHECK: ldx [%fp+2231]
+; CHECK: ldx [%fp+2247]
+define void @mixedarg(i8 %a0,      ; %i0
+                      float %a1,   ; %f3
+                      i16 %a2,     ; %i2
+                      double %a3,  ; %d6
+                      i13 %a4,     ; %i4
+                      float %a5,   ; %f11
+                      i64 %a6,     ; [%fp+BIAS+176]
+                      double *%a7, ; [%fp+BIAS+184]
+                      double %a8,  ; %d16
+                      i16* %a9) {  ; [%fp+BIAS+200]
+  %d1 = fpext float %a1 to double
+  %s3 = fadd double %a3, %d1
+  %s8 = fadd double %a8, %s3
+  store double %s8, double* %a7
+  store i16 %a2, i16* %a9
+  ret void
+}
+
+; CHECK: call_mixedarg
+; CHECK: stx %i2, [%sp+2247]
+; CHECK: stx %i0, [%sp+2223]
+; CHECK: fmovd %f2, %f6
+; CHECK: fmovd %f2, %f16
+; CHECK: call mixedarg
+; CHECK-NOT: add %sp
+; CHECK: restore
+define void @call_mixedarg(i64 %i0, double %f2, i16* %i2) {
+  call void @mixedarg(i8 undef,
+                      float undef,
+                      i16 undef,
+                      double %f2,
+                      i13 undef,
+                      float undef,
+                      i64 %i0,
+                      double* undef,
+                      double %f2,
+                      i16* %i2)
+  ret void
+}
+
+; The inreg attribute is used to indicate 32-bit sized struct elements that
+; share an 8-byte slot.
+; CHECK: inreg_fi
+; CHECK: fstoi %f1
+; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
+; CHECK: sub [[R]],
+define i32 @inreg_fi(i32 inreg %a0,     ; high bits of %i0
+                     float inreg %a1) { ; %f1
+  %b1 = fptosi float %a1 to i32
+  %rv = sub i32 %a0, %b1
+  ret i32 %rv
+}
+
+; CHECK: call_inreg_fi
+; Allocate space for 6 arguments, even when only 2 are used.
+; CHECK: save %sp, -176, %sp
+; CHECK: sllx %i1, 32, %o0
+; CHECK: fmovs %f5, %f1
+; CHECK: call inreg_fi
+define void @call_inreg_fi(i32* %p, i32 %i1, float %f5) {
+  %x = call i32 @inreg_fi(i32 %i1, float %f5)
+  ret void
+}
+
+; CHECK: inreg_ff
+; CHECK: fsubs %f0, %f1, %f1
+define float @inreg_ff(float inreg %a0,   ; %f0
+                       float inreg %a1) { ; %f1
+  %rv = fsub float %a0, %a1
+  ret float %rv
+}
+
+; CHECK: call_inreg_ff
+; CHECK: fmovs %f3, %f0
+; CHECK: fmovs %f5, %f1
+; CHECK: call inreg_ff
+define void @call_inreg_ff(i32* %p, float %f3, float %f5) {
+  %x = call float @inreg_ff(float %f3, float %f5)
+  ret void
+}
+
+; CHECK: inreg_if
+; CHECK: fstoi %f0
+; CHECK: sub %i0
+define i32 @inreg_if(float inreg %a0, ; %f0
+                     i32 inreg %a1) { ; low bits of %i0
+  %b0 = fptosi float %a0 to i32
+  %rv = sub i32 %a1, %b0
+  ret i32 %rv
+}
+
+; CHECK: call_inreg_if
+; CHECK: fmovs %f3, %f0
+; CHECK: or %g0, %i2, %o0
+; CHECK: call inreg_if
+define void @call_inreg_if(i32* %p, float %f3, i32 %i2) {
+  %x = call i32 @inreg_if(float %f3, i32 %i2)
+  ret void
+}
+
+; The frontend shouldn't do this. Just pass i64 instead.
+; CHECK: inreg_ii
+; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
+; CHECK: sub %i0, [[R]], %i0
+define i32 @inreg_ii(i32 inreg %a0,   ; high bits of %i0
+                     i32 inreg %a1) { ; low bits of %i0
+  %rv = sub i32 %a1, %a0
+  ret i32 %rv
+}
+
+; CHECK: call_inreg_ii
+; CHECK: srl %i2, 0, [[R2:%[gilo][0-7]]]
+; CHECK: sllx %i1, 32, [[R1:%[gilo][0-7]]]
+; CHECK: or [[R1]], [[R2]], %o0
+; CHECK: call inreg_ii
+define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) {
+  %x = call i32 @inreg_ii(i32 %i1, i32 %i2)
+  ret void
+}
+
+; Structs up to 32 bytes in size can be returned in registers.
+; CHECK: ret_i64_pair
+; CHECK: ldx [%i2], %i0
+; CHECK: ldx [%i3], %i1
+define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
+  %r1 = load i64* %p
+  %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0
+  store i64 0, i64* %p
+  %r2 = load i64* %q
+  %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1
+  ret { i64, i64 } %rv2
+}
+
+; CHECK: call_ret_i64_pair
+; CHECK: call ret_i64_pair
+; CHECK: stx %o0, [%i0]
+; CHECK: stx %o1, [%i0]
+define void @call_ret_i64_pair(i64* %i0) {
+  %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
+                                        i64* undef, i64* undef)
+  %e0 = extractvalue { i64, i64 } %rv, 0
+  store i64 %e0, i64* %i0
+  %e1 = extractvalue { i64, i64 } %rv, 1
+  store i64 %e1, i64* %i0
+  ret void
+}
+
+; This is not a C struct, each member uses 8 bytes.
+; CHECK: ret_i32_float_pair
+; CHECK: ld [%i2], %i0
+; CHECK: ld [%i3], %f3
+define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1,
+                                          i32* %p, float* %q) {
+  %r1 = load i32* %p
+  %rv1 = insertvalue { i32, float } undef, i32 %r1, 0
+  store i32 0, i32* %p
+  %r2 = load float* %q
+  %rv2 = insertvalue { i32, float } %rv1, float %r2, 1
+  ret { i32, float } %rv2
+}
+
+; CHECK: call_ret_i32_float_pair
+; CHECK: call ret_i32_float_pair
+; CHECK: st %o0, [%i0]
+; CHECK: st %f3, [%i1]
+define void @call_ret_i32_float_pair(i32* %i0, float* %i1) {
+  %rv = call { i32, float } @ret_i32_float_pair(i32 undef, i32 undef,
+                                                i32* undef, float* undef)
+  %e0 = extractvalue { i32, float } %rv, 0
+  store i32 %e0, i32* %i0
+  %e1 = extractvalue { i32, float } %rv, 1
+  store float %e1, float* %i1
+  ret void
+}
+
+; This is a C struct, each member uses 4 bytes.
+; CHECK: ret_i32_float_packed
+; CHECK: ld [%i2], [[R:%[gilo][0-7]]]
+; CHECK: sllx [[R]], 32, %i0
+; CHECK: ld [%i3], %f1
+define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1,
+                                                  i32* %p, float* %q) {
+  %r1 = load i32* %p
+  %rv1 = insertvalue { i32, float } undef, i32 %r1, 0
+  store i32 0, i32* %p
+  %r2 = load float* %q
+  %rv2 = insertvalue { i32, float } %rv1, float %r2, 1
+  ret { i32, float } %rv2
+}
+
+; CHECK: call_ret_i32_float_packed
+; CHECK: call ret_i32_float_packed
+; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
+; CHECK: st [[R]], [%i0]
+; CHECK: st %f1, [%i1]
+define void @call_ret_i32_float_packed(i32* %i0, float* %i1) {
+  %rv = call { i32, float } @ret_i32_float_packed(i32 undef, i32 undef,
+                                                  i32* undef, float* undef)
+  %e0 = extractvalue { i32, float } %rv, 0
+  store i32 %e0, i32* %i0
+  %e1 = extractvalue { i32, float } %rv, 1
+  store float %e1, float* %i1
+  ret void
+}
+
+; The C frontend should use i64 to return { i32, i32 } structs, but verify that
+; we don't miscompile thi case where both struct elements are placed in %i0.
+; CHECK: ret_i32_packed
+; CHECK: ld [%i2], [[R1:%[gilo][0-7]]]
+; CHECK: ld [%i3], [[R2:%[gilo][0-7]]]
+; CHECK: sllx [[R2]], 32, [[R3:%[gilo][0-7]]]
+; CHECK: or [[R3]], [[R1]], %i0
+define inreg { i32, i32 } @ret_i32_packed(i32 %a0, i32 %a1,
+                                          i32* %p, i32* %q) {
+  %r1 = load i32* %p
+  %rv1 = insertvalue { i32, i32 } undef, i32 %r1, 1
+  store i32 0, i32* %p
+  %r2 = load i32* %q
+  %rv2 = insertvalue { i32, i32 } %rv1, i32 %r2, 0
+  ret { i32, i32 } %rv2
+}
+
+; CHECK: call_ret_i32_packed
+; CHECK: call ret_i32_packed
+; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
+; CHECK: st [[R]], [%i0]
+; CHECK: st %o0, [%i1]
+define void @call_ret_i32_packed(i32* %i0, i32* %i1) {
+  %rv = call { i32, i32 } @ret_i32_packed(i32 undef, i32 undef,
+                                          i32* undef, i32* undef)
+  %e0 = extractvalue { i32, i32 } %rv, 0
+  store i32 %e0, i32* %i0
+  %e1 = extractvalue { i32, i32 } %rv, 1
+  store i32 %e1, i32* %i1
+  ret void
+}
+
+; The return value must be sign-extended to 64 bits.
+; CHECK: ret_sext
+; CHECK: sra %i0, 0, %i0
+define signext i32 @ret_sext(i32 %a0) {
+  ret i32 %a0
+}
+
+; CHECK: ret_zext
+; CHECK: srl %i0, 0, %i0
+define zeroext i32 @ret_zext(i32 %a0) {
+  ret i32 %a0
+}
+
+; CHECK: ret_nosext
+; CHECK-NOT: sra
+define signext i32 @ret_nosext(i32 signext %a0) {
+  ret i32 %a0
+}
+
+; CHECK: ret_nozext
+; CHECK-NOT: srl
+define signext i32 @ret_nozext(i32 signext %a0) {
+  ret i32 %a0
+}
diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll
new file mode 100644
index 000000000000..108663b2553f
--- /dev/null
+++ b/test/CodeGen/SPARC/64bit.ll
@@ -0,0 +1,177 @@
+; RUN: llc < %s -march=sparcv9 | FileCheck %s
+
+; CHECK: ret2:
+; CHECK: or %g0, %i1, %i0
+define i64 @ret2(i64 %a, i64 %b) {
+  ret i64 %b
+}
+
+; CHECK: shl_imm
+; CHECK: sllx %i0, 7, %i0
+define i64 @shl_imm(i64 %a) {
+  %x = shl i64 %a, 7
+  ret i64 %x
+}
+
+; CHECK: sra_reg
+; CHECK: srax %i0, %i1, %i0
+define i64 @sra_reg(i64 %a, i64 %b) {
+  %x = ashr i64 %a, %b
+  ret i64 %x
+}
+
+; Immediate materialization. Many of these patterns could actually be merged
+; into the restore instruction:
+;
+;     restore %g0, %g0, %o0
+;
+; CHECK: ret_imm0
+; CHECK: or %g0, %g0, %i0
+define i64 @ret_imm0() {
+  ret i64 0
+}
+
+; CHECK: ret_simm13
+; CHECK: or %g0, -4096, %i0
+define i64 @ret_simm13() {
+  ret i64 -4096
+}
+
+; CHECK: ret_sethi
+; CHECK: sethi 4, %i0
+; CHECK-NOT: or
+; CHECK: restore
+define i64 @ret_sethi() {
+  ret i64 4096
+}
+
+; CHECK: ret_sethi
+; CHECK: sethi 4, [[R:%[goli][0-7]]]
+; CHECK: or [[R]], 1, %i0
+define i64 @ret_sethi_or() {
+  ret i64 4097
+}
+
+; CHECK: ret_nimm33
+; CHECK: sethi 4, [[R:%[goli][0-7]]]
+; CHECK: xor [[R]], -4, %i0
+define i64 @ret_nimm33() {
+  ret i64 -4100
+}
+
+; CHECK: ret_bigimm
+; CHECK: sethi
+; CHECK: sethi
+define i64 @ret_bigimm() {
+  ret i64 6800754272627607872
+}
+
+; CHECK: reg_reg_alu
+; CHECK: add %i0, %i1, [[R0:%[goli][0-7]]]
+; CHECK: sub [[R0]], %i2, [[R1:%[goli][0-7]]]
+; CHECK: andn [[R1]], %i0, %i0
+define i64 @reg_reg_alu(i64 %x, i64 %y, i64 %z) {
+  %a = add i64 %x, %y
+  %b = sub i64 %a, %z
+  %c = xor i64 %x, -1
+  %d = and i64 %b, %c
+  ret i64 %d
+}
+
+; CHECK: reg_imm_alu
+; CHECK: add %i0, -5, [[R0:%[goli][0-7]]]
+; CHECK: xor [[R0]], 2, %i0
+define i64 @reg_imm_alu(i64 %x, i64 %y, i64 %z) {
+  %a = add i64 %x, -5
+  %b = xor i64 %a, 2
+  ret i64 %b
+}
+
+; CHECK: loads
+; CHECK: ldx [%i0]
+; CHECK: stx %
+; CHECK: ld [%i1]
+; CHECK: st %
+; CHECK: ldsw [%i2]
+; CHECK: stx %
+; CHECK: ldsh [%i3]
+; CHECK: sth %
+define i64 @loads(i64* %p, i32* %q, i32* %r, i16* %s) {
+  %a = load i64* %p
+  %ai = add i64 1, %a
+  store i64 %ai, i64* %p
+  %b = load i32* %q
+  %b2 = zext i32 %b to i64
+  %bi = trunc i64 %ai to i32
+  store i32 %bi, i32* %q
+  %c = load i32* %r
+  %c2 = sext i32 %c to i64
+  store i64 %ai, i64* %p
+  %d = load i16* %s
+  %d2 = sext i16 %d to i64
+  %di = trunc i64 %ai to i16
+  store i16 %di, i16* %s
+
+  %x1 = add i64 %a, %b2
+  %x2 = add i64 %c2, %d2
+  %x3 = add i64 %x1, %x2
+  ret i64 %x3
+}
+
+; CHECK: stores
+; CHECK: ldx [%i0+8], [[R:%[goli][0-7]]]
+; CHECK: stx [[R]], [%i0+16]
+; CHECK: st [[R]], [%i1+-8]
+; CHECK: sth [[R]], [%i2+40]
+; CHECK: stb [[R]], [%i3+-20]
+define void @stores(i64* %p, i32* %q, i16* %r, i8* %s) {
+  %p1 = getelementptr i64* %p, i64 1
+  %p2 = getelementptr i64* %p, i64 2
+  %pv = load i64* %p1
+  store i64 %pv, i64* %p2
+
+  %q2 = getelementptr i32* %q, i32 -2
+  %qv = trunc i64 %pv to i32
+  store i32 %qv, i32* %q2
+
+  %r2 = getelementptr i16* %r, i16 20
+  %rv = trunc i64 %pv to i16
+  store i16 %rv, i16* %r2
+
+  %s2 = getelementptr i8* %s, i8 -20
+  %sv = trunc i64 %pv to i8
+  store i8 %sv, i8* %s2
+
+  ret void
+}
+
+; CHECK: promote_shifts
+; CHECK: ldub [%i0], [[R:%[goli][0-7]]]
+; CHECK: sll [[R]], [[R]], %i0
+define i8 @promote_shifts(i8* %p) {
+  %L24 = load i8* %p
+  %L32 = load i8* %p
+  %B36 = shl i8 %L24, %L32
+  ret i8 %B36
+}
+
+; CHECK: multiply
+; CHECK: mulx %i0, %i1, %i0
+define i64 @multiply(i64 %a, i64 %b) {
+  %r = mul i64 %a, %b
+  ret i64 %r
+}
+
+; CHECK: signed_divide
+; CHECK: sdivx %i0, %i1, %i0
+define i64 @signed_divide(i64 %a, i64 %b) {
+  %r = sdiv i64 %a, %b
+  ret i64 %r
+}
+
+; CHECK: unsigned_divide
+; CHECK: udivx %i0, %i1, %i0
+define i64 @unsigned_divide(i64 %a, i64 %b) {
+  %r = udiv i64 %a, %b
+  ret i64 %r
+}
diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll
new file mode 100644
index 000000000000..6e66a262a4f2
--- /dev/null
+++ b/test/CodeGen/SPARC/64cond.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=sparcv9 | FileCheck %s
+; Testing 64-bit conditionals.
+
+; CHECK: cmpri
+; CHECK: subcc %i1, 1
+; CHECK: bpe %xcc,
+define void @cmpri(i64* %p, i64 %x) {
+entry:
+  %tobool = icmp eq i64 %x, 1
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  store i64 %x, i64* %p, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK: cmprr
+; CHECK: subcc %i1, %i2
+; CHECK: bpgu %xcc,
+define void @cmprr(i64* %p, i64 %x, i64 %y) {
+entry:
+  %tobool = icmp ugt i64 %x, %y
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  store i64 %x, i64* %p, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK: selecti32_xcc
+; CHECK: subcc %i0, %i1
+; CHECK: movg %xcc, %i2, %i3
+; CHECK: or %g0, %i3, %i0
+define i32 @selecti32_xcc(i64 %x, i64 %y, i32 %a, i32 %b) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, i32 %a, i32 %b
+  ret i32 %rv
+}
+
+; CHECK: selecti64_xcc
+; CHECK: subcc %i0, %i1
+; CHECK: movg %xcc, %i2, %i3
+; CHECK: or %g0, %i3, %i0
+define i64 @selecti64_xcc(i64 %x, i64 %y, i64 %a, i64 %b) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, i64 %a, i64 %b
+  ret i64 %rv
+}
diff --git a/test/CodeGen/SPARC/DbgValueOtherTargets.test b/test/CodeGen/SPARC/DbgValueOtherTargets.test
index 749a7b534669..a669bf848d65 100644
--- a/test/CodeGen/SPARC/DbgValueOtherTargets.test
+++ b/test/CodeGen/SPARC/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=sparc -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=sparc -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/SPARC/constpool.ll b/test/CodeGen/SPARC/constpool.ll
new file mode 100644
index 000000000000..d93a53b3ac04
--- /dev/null
+++ b/test/CodeGen/SPARC/constpool.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=sparc   -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=medium | FileCheck --check-prefix=abs44 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=large  | FileCheck --check-prefix=abs64 %s
+; RUN: llc < %s -march=sparc   -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v8pic32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v9pic32 %s
+
+define float @floatCP() {
+entry:
+  ret float 1.000000e+00
+}
+
+; abs32: floatCP
+; abs32: sethi %hi(.LCPI0_0), %[[R:[gilo][0-7]]]
+; abs32: ld [%[[R]]+%lo(.LCPI0_0)], %f
+; abs32: jmp %i7+8
+
+; abs44: floatCP
+; abs44: sethi %h44(.LCPI0_0), %[[R1:[gilo][0-7]]]
+; abs44: add %[[R1]], %m44(.LCPI0_0), %[[R2:[gilo][0-7]]]
+; abs44: sllx %[[R2]], 12, %[[R3:[gilo][0-7]]]
+; abs44: ld [%[[R3]]+%l44(.LCPI0_0)], %f1
+; abs44: jmp %i7+8
+
+; abs64: floatCP
+; abs64: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
+; abs64: add %[[R1]], %lo(.LCPI0_0), %[[R2:[gilo][0-7]]]
+; abs64: sethi %hh(.LCPI0_0), %[[R3:[gilo][0-7]]]
+; abs64: add %[[R3]], %hm(.LCPI0_0), %[[R4:[gilo][0-7]]]
+; abs64: sllx %[[R4]], 32, %[[R5:[gilo][0-7]]]
+; abs64: ld [%[[R5]]+%[[R2]]], %f1
+; abs64: jmp %i7+8
+
+; v8pic32: floatCP
+; v8pic32: _GLOBAL_OFFSET_TABLE_
+; v8pic32: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
+; v8pic32: add %[[R1]], %lo(.LCPI0_0), %[[Goffs:[gilo][0-7]]]
+; v8pic32: ld [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
+; v8pic32: ld [%[[Gaddr]]], %f0
+; v8pic32: jmp %i7+8
+
+; v9pic32: floatCP
+; v9pic32: _GLOBAL_OFFSET_TABLE_
+; v9pic32: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
+; v9pic32: add %[[R1]], %lo(.LCPI0_0), %[[Goffs:[gilo][0-7]]]
+; v9pic32: ldx [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
+; v9pic32: ld [%[[Gaddr]]], %f1
+; v9pic32: jmp %i7+8
diff --git a/test/CodeGen/SPARC/ctpop.ll b/test/CodeGen/SPARC/ctpop.ll
index e56f4947b52a..916a41496e2a 100644
--- a/test/CodeGen/SPARC/ctpop.ll
+++ b/test/CodeGen/SPARC/ctpop.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=sparc -mattr=-v9 | not grep popc
-; RUN: llc < %s -march=sparcv9 -mattr=v9 | grep popc
+; RUN: llc < %s -march=sparc -mattr=+v9 | grep popc
 
 declare i32 @llvm.ctpop.i32(i32)
 
diff --git a/test/CodeGen/SPARC/globals.ll b/test/CodeGen/SPARC/globals.ll
new file mode 100644
index 000000000000..8d8de58f7ccf
--- /dev/null
+++ b/test/CodeGen/SPARC/globals.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=sparc   -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=medium | FileCheck --check-prefix=abs44 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=large  | FileCheck --check-prefix=abs64 %s
+; RUN: llc < %s -march=sparc   -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v8pic32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v9pic32 %s
+
+@G = external global i8
+
+define zeroext i8 @loadG() {
+  %tmp = load i8* @G
+  ret i8 %tmp
+}
+
+; abs32: loadG
+; abs32: sethi %hi(G), %[[R:[gilo][0-7]]]
+; abs32: ldub [%[[R]]+%lo(G)], %i0
+; abs32: jmp %i7+8
+
+; abs44: loadG
+; abs44: sethi %h44(G), %[[R1:[gilo][0-7]]]
+; abs44: add %[[R1]], %m44(G), %[[R2:[gilo][0-7]]]
+; abs44: sllx %[[R2]], 12, %[[R3:[gilo][0-7]]]
+; abs44: ldub [%[[R3]]+%l44(G)], %i0
+; abs44: jmp %i7+8
+
+; abs64: loadG
+; abs64: sethi %hi(G), %[[R1:[gilo][0-7]]]
+; abs64: add %[[R1]], %lo(G), %[[R2:[gilo][0-7]]]
+; abs64: sethi %hh(G), %[[R3:[gilo][0-7]]]
+; abs64: add %[[R3]], %hm(G), %[[R4:[gilo][0-7]]]
+; abs64: sllx %[[R4]], 32, %[[R5:[gilo][0-7]]]
+; abs64: ldub [%[[R5]]+%[[R2]]], %i0
+; abs64: jmp %i7+8
+
+; v8pic32: loadG
+; v8pic32: _GLOBAL_OFFSET_TABLE_
+; v8pic32: sethi %hi(G), %[[R1:[gilo][0-7]]]
+; v8pic32: add %[[R1]], %lo(G), %[[Goffs:[gilo][0-7]]]
+; v8pic32: ld [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
+; v8pic32: ldub [%[[Gaddr]]], %i0
+; v8pic32: jmp %i7+8
+
+; v9pic32: loadG
+; v9pic32: _GLOBAL_OFFSET_TABLE_
+; v9pic32: sethi %hi(G), %[[R1:[gilo][0-7]]]
+; v9pic32: add %[[R1]], %lo(G), %[[Goffs:[gilo][0-7]]]
+; v9pic32: ldx [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
+; v9pic32: ldub [%[[Gaddr]]], %i0
+; v9pic32: jmp %i7+8
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
index 786fee9e6610..6f30a8797967 100644
--- a/test/CodeGen/SPARC/lit.local.cfg
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 498c78165ebb..d6b649569173 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -47,8 +47,8 @@ declare double @sqrt(double) nounwind readonly
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 46, i32 0, metadata !1, null}
-!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!1 = metadata !{i32 524299, metadata !4, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524299, metadata !4, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
 !3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
@@ -140,8 +140,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ]
 !92 = metadata !{i32 48, i32 0, metadata !1, null}
 !93 = metadata !{i32 218, i32 0, metadata !94, metadata !96}
-!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
-!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!94 = metadata !{i32 524299, metadata !4, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!95 = metadata !{i32 524299, metadata !4, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
 !96 = metadata !{i32 51, i32 0, metadata !1, null}
 !97 = metadata !{i32 227, i32 0, metadata !94, metadata !96}
 !98 = metadata !{i32 52, i32 0, metadata !1, null}
diff --git a/test/CodeGen/Thumb/DbgValueOtherTargets.test b/test/CodeGen/Thumb/DbgValueOtherTargets.test
index aa31a7c75c93..afb18a43be47 100644
--- a/test/CodeGen/Thumb/DbgValueOtherTargets.test
+++ b/test/CodeGen/Thumb/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=thumb -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=thumb -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
index cb77b09ef4ad..4d75f581a1d2 100644
--- a/test/CodeGen/Thumb/lit.local.cfg
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index 239522d68fd7..7dba332b1bec 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -31,20 +31,20 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 786468, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786451, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786451, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 786445, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786445, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 786445, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
 !15 = metadata !{i32 11, i32 0, metadata !1, null}
 !16 = metadata !{i32 12, i32 0, metadata !17, null}
 !17 = metadata !{i32 786443, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 89f842d7f22a..8ab93fcb978f 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -201,21 +201,21 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
-!6 = metadata !{i32 786454, metadata !7, metadata !"SCtype", metadata !7, i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
 !7 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
-!8 = metadata !{i32 786468, metadata !2, metadata !"complex float", metadata !2, i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786454, metadata !7, metadata !"SFtype", metadata !7, i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!10 = metadata !{i32 786468, metadata !2, metadata !"float", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"complex float", i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SFtype", i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !11 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !12 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 786689, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !14 = metadata !{i32 786688, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 786443, metadata !2, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 786688, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !17 = metadata !{i32 786688, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !18 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 5596b1cba95b..6519ca063a7c 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -25,14 +25,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
 !1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
-!5 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{null, metadata !3}
 !8 = metadata !{i32 786689, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ]
 !10 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{metadata !12, metadata !13}
 !12 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
@@ -42,7 +42,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !16 = metadata !{i32 786445, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
 !17 = metadata !{i32 786445, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
 !18 = metadata !{i32 786689, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ]
 !20 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !21 = metadata !{metadata !3, metadata !3, metadata !22}
 !22 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
@@ -50,9 +50,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !24 = metadata !{i32 786468, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !25 = metadata !{i32 786689, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22, i32 0, null} ; [ DW_TAG_arg_variable ]
 !26 = metadata !{i32 786688, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14, i32 0, null} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 786443, metadata !19, i32 22, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !36, metadata !19, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{i32 18, i32 0, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !9, i32 17, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 786443, metadata !36, metadata !9, i32 17, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 19, i32 0, metadata !29, null}
 !31 = metadata !{metadata !0}
 !32 = metadata !{metadata !5, metadata !9, metadata !19}
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 1a8277900c53..4ea3bf077841 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -25,20 +25,20 @@ entry:
 !llvm.dbg.cu = !{!3}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !6}
 !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786478, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ]
 !9 = metadata !{i32 3, i32 0, metadata !10, null}
-!10 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 1}
 !12 = metadata !{i32 3, i32 0, metadata !10, metadata !13}
 !13 = metadata !{i32 7, i32 0, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !2, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{metadata !0}
 !16 = metadata !{metadata !7}
 !17 = metadata !{metadata !1, metadata !8}
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index c9accfa29050..b764b0b34597 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -22,14 +22,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", metadata !3, i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786451, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
 !3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
 !4 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{metadata !6, metadata !1, metadata !8}
 !6 = metadata !{i32 786445, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
 !7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786478, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", metadata !3, i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
 !9 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !10 = metadata !{metadata !7, metadata !11, metadata !7}
 !11 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
@@ -39,7 +39,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !15 = metadata !{i32 786689, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
 !16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 786689, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 786478, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 786478, metadata !3, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ]
 !19 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !20 = metadata !{metadata !7, metadata !7, metadata !21}
 !21 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
index b22a391ef358..b5679e665696 100644
--- a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
+++ b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
@@ -1,4 +1,5 @@
-; RUN: llc -fast-isel -march=x86 < %s | grep %fs:
+; RUN: llc -fast-isel -march=x86 < %s | FileCheck %s
+; CHECK: %fs:
 
 define i32 @test1(i32 addrspace(257)* %arg) nounwind {
        %tmp = load i32 addrspace(257)* %arg
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index 84b3e395a9a6..aaa562a439d5 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -77,27 +77,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
 !2 = metadata !{i32 786473, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
 !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
 !8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ]
 !10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{null, metadata !12, metadata !13}
 !12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
 !13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 786478, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ]
 !18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ]
 !21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !22 = metadata !{metadata !13}
 !23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -105,7 +105,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ]
 !26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 18, i32 0, metadata !28, null}
 !30 = metadata !{i32 20, i32 0, metadata !28, null}
 !31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -113,11 +113,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 11, i32 0, metadata !16, null}
 !35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 786443, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 786443, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, null} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 786443, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 786443, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !41 = metadata !{i32 24, i32 0, metadata !39, null}
 !42 = metadata !{i32 25, i32 0, metadata !39, null}
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index 2a766d4544d1..de0d216e266f 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -14,13 +14,13 @@ entry:
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !7, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, metadata !13, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
 !7 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
 !8 = metadata !{i32 53, i32 13, metadata !9, null}
 !9 = metadata !{i32 786443, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 6d99ffae0ccf..31a6822b34b8 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -17,9 +17,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
index 33c59c4425c5..4d8d974f703e 100644
--- a/test/CodeGen/X86/2010-12-02-MC-Set.ll
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -9,9 +9,9 @@ entry:
 !llvm.dbg.cu = !{!2}
 !7 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120563)", i1 false, metadata !"", i32 0, null, null, metadata !7, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120563)", i1 false, metadata !"", i32 0, null, null, metadata !7, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 1, metadata !6, null}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index b6078a8a550b..2355528a81e8 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -71,29 +71,29 @@ declare i32 @puts(i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !12 = metadata !{i32 786688, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 786443, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
 !14 = metadata !{i32 786688, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0, null} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!15 = metadata !{i32 786443, metadata !1, metadata !6, i32 25, i32 12, i32 2} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !17 = metadata !{i32 786688, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !18 = metadata !{i32 5, i32 41, metadata !0, null}
 !19 = metadata !{i32 5, i32 49, metadata !0, null}
 !20 = metadata !{i32 7, i32 5, metadata !13, null}
 !21 = metadata !{i32 8, i32 9, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !1, metadata !13, i32 7, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 9, i32 9, metadata !22, null}
 !24 = metadata !{i32 26, i32 38, metadata !15, null}
 !25 = metadata !{i32 27, i32 38, metadata !15, null}
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index a5ec614a943b..54d2b403509d 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-code-place | FileCheck %s
+; RUN: llc < %s -march=x86 -disable-block-placement | FileCheck %s
 ;
 ; Test RegistersDefinedFromSameValue. We have multiple copies of the same vreg:
 ; while.body85.i:
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index e37a3887b54d..9525653f3fff 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -43,8 +43,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !3 = metadata !{null}
 !4 = metadata !{i32 786689, null, metadata !"hg", metadata !5, i32 67109589, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [hg] [line 725]
 !5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
-!6 = metadata !{i32 786454, null, metadata !"hgstruct", metadata !5, i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786451, null, metadata !"", metadata !5, i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
+!6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
 !8 = metadata !{metadata !"short", metadata !9}
 !9 = metadata !{metadata !"omnipotent char", metadata !10}
 !10 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index c35532b5b559..a0fbbb2ff9ef 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -79,8 +79,8 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 !12 = metadata !{i32 786443, metadata !13, i32 249, i32 0, metadata !14, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
 !13 = metadata !{i32 786443, metadata !3, i32 221, i32 0, metadata !14, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
 !14 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!15 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
-!16 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
 !19 = metadata !{metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset"}
@@ -131,6 +131,6 @@ declare void @_Znwm()
 
 !30 = metadata !{i32 786449, i32 0, i32 4, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, i1 true, metadata !"", i32 0, null, null, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
 !31 = metadata !{i32 786688, null, metadata !"X", null, i32 29, metadata !32, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [X] [line 29]
-!32 = metadata !{i32 786454, null, metadata !"HM", metadata !33, i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
+!32 = metadata !{i32 786454, metadata !34, null, metadata !"HM", i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
 !33 = metadata !{i32 786473, metadata !34} ; [ DW_TAG_file_type ]
 !34 = metadata !{metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++"}
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
index 0e3e5fa74e34..df93c5647d95 100644
--- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -39,6 +39,6 @@ invoke.cont44:                                    ; preds = %if.end
 !1 = metadata !{metadata !2}
 !2 = metadata !{null, null}
 !3 = metadata !{i32 786688, null, metadata !"callback", null, i32 214, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [callback] [line 214]
-!4 = metadata !{i32 786451, null, metadata !"btCompoundLeafCallback", metadata !5, i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ]
+!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ]
 !5 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
 !6 = metadata !{metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet"}
diff --git a/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
index db7ec8ae26cd..1b417e54a2f7 100644
--- a/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
+++ b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
@@ -39,3 +39,36 @@ define void @t() nounwind uwtable ssp {
 ; <label>:11                                      ; preds = %11, %4
   br label %11
 }
+
+; PR15608
+@global = external constant [2 x i8]
+
+define void @PR15608() {
+bb:
+  br label %bb3
+
+bb1:                                              ; No predecessors!
+  br i1 icmp ult (i64 xor (i64 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i64), i64 1), i64 1), label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  unreachable
+
+bb3:                                              ; preds = %bb1, %bb
+  br i1 xor (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1), i1 trunc (i192 lshr (i192 or (i192 and (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 -340282366920938463463374607431768211457), i192 shl (i192 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i192), i192 128)), i192 128) to i1)), label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb6, %bb3
+  %tmp = phi i1 [ true, %bb6 ], [ trunc (i192 lshr (i192 or (i192 and (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 -340282366920938463463374607431768211457), i192 shl (i192 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i192), i192 128)), i192 128) to i1), %bb3 ]
+  br i1 false, label %bb8, label %bb5
+
+bb5:                                              ; preds = %bb4
+  br i1 %tmp, label %bb8, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 false, label %bb8, label %bb4
+
+bb7:                                              ; preds = %bb3
+  unreachable
+
+bb8:                                              ; preds = %bb6, %bb5, %bb4
+  unreachable
+}
diff --git a/test/CodeGen/X86/DbgValueOtherTargets.test b/test/CodeGen/X86/DbgValueOtherTargets.test
index c95e8c6abdef..7b4d431c93b1 100644
--- a/test/CodeGen/X86/DbgValueOtherTargets.test
+++ b/test/CodeGen/X86/DbgValueOtherTargets.test
@@ -1,2 +1,2 @@
-; RUN: llc -O0 -march=x86 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
-; RUN: llc -O0 -march=x86-64 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=x86 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=x86-64 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/X86/GC/erlang-gc.ll b/test/CodeGen/X86/GC/erlang-gc.ll
new file mode 100644
index 000000000000..c55b7f6dcf61
--- /dev/null
+++ b/test/CodeGen/X86/GC/erlang-gc.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK64
+; RUN: llc -mtriple=i686-linux-gnu < %s | FileCheck %s --check-prefix=CHECK32
+
+define i32 @main(i32 %x) nounwind gc "erlang" {
+  %puts = tail call i32 @foo(i32 %x)
+  ret i32 0
+
+; CHECK64:      .section  .note.gc,"",@progbits
+; CHECK64-NEXT: .align 8
+; CHECK64-NEXT: .short 1      # safe point count
+; CHECK64-NEXT: .long  .Ltmp0 # safe point address
+; CHECK64-NEXT: .short 1      # stack frame size (in words)
+; CHECK64-NEXT: .short 0      # stack arity
+; CHECK64-NEXT: .short 0      # live root count
+
+; CHECK32:      .section  .note.gc,"",@progbits
+; CHECK32-NEXT: .align 4
+; CHECK32-NEXT: .short 1      # safe point count
+; CHECK32-NEXT: .long  .Ltmp0 # safe point address
+; CHECK32-NEXT: .short 3      # stack frame size (in words)
+; CHECK32-NEXT: .short 0      # stack arity
+; CHECK32-NEXT: .short 0      # live root count
+}
+
+declare i32 @foo(i32)
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index cb5a9bead29a..227ef3466e0a 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -27,8 +27,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
@@ -38,7 +38,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !8 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
 !9 = metadata !{i32 786468, metadata !0, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786688, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !1, i32 2, i32 25, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786443, metadata !20, metadata !1, i32 2, i32 25, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 13, metadata !1, null}
 !13 = metadata !{i32 2, i32 22, metadata !1, null}
 !14 = metadata !{i32 3, i32 14, metadata !11, null}
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index fbe8879ad627..bb227a0185df 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -337,3 +337,99 @@ block4:                                       ; preds = %4, %.lr.ph
   ret void
 }
 
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy.
+; CHECK: MergeLoadStoreBaseIndexOffset
+; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK: movw    [[REG]], (%{{.*}})
+define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
+  %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i64* %.0, i64 1
+  %3 = load i64* %.0, align 1
+  %4 = getelementptr inbounds i8* %c, i64 %3
+  %5 = load i8* %4, align 1
+  %6 = add i64 %3, 1
+  %7 = getelementptr inbounds i8* %c, i64 %6
+  %8 = load i8* %7, align 1
+  store i8 %5, i8* %.08, align 1
+  %9 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %8, i8* %9, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 2
+  %11 = add nsw i32 %.09, -1
+  %12 = icmp eq i32 %11, 0
+  br i1 %12, label %13, label %1
+
+; <label>:13
+  ret void
+}
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy even if there are intermediate sign
+; extensions.
+; CHECK: MergeLoadStoreBaseIndexOffsetSext
+; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK: movw    [[REG]], (%{{.*}})
+define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i64
+  %5 = getelementptr inbounds i8* %c, i64 %4
+  %6 = load i8* %5, align 1
+  %7 = add i64 %4, 1
+  %8 = getelementptr inbounds i8* %c, i64 %7
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i64 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
+
+; However, we can only merge ignore sign extensions when they are on all memory
+; computations;
+; CHECK: loadStoreBaseIndexOffsetSextNoSex
+; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK-NOT: movw    [[REG]], (%{{.*}})
+define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i64
+  %5 = getelementptr inbounds i8* %c, i64 %4
+  %6 = load i8* %5, align 1
+  %7 = add i8 %3, 1
+  %wrap.4 = sext i8 %7 to i64
+  %8 = getelementptr inbounds i8* %c, i64 %wrap.4
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i64 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 03d2e472cba6..5fe08ed305f1 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -119,8 +119,8 @@ entry:
 
 ; X64: test8:
 ; X64: addq
-; X64-NEXT: sbbq
-; X64-NEXT: testb
+; X64-NEXT: setb
+; X64: ret
 
 define i32 @test9(i32 %x, i32 %y) nounwind readnone {
   %cmp = icmp eq i32 %x, 10
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
new file mode 100644
index 000000000000..6237b66a5ea6
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=atom  | \
+; RUN:     FileCheck --check-prefix=ATOM %s
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=core2 | \
+; RUN:     FileCheck --check-prefix=CORE2 %s
+; ATOM: calll *{{%[a-z]+}}
+; CORE2: calll *funcp
+;
+; original source code built with clang -S -emit-llvm -M32 test32.c:
+;
+;   int a, b, c, d, e, f, g, h, i, j;
+;   extern int (*funcp)(int, int, int, int, int, int, int, int);
+;   extern int sum;
+;   
+;   void func()
+;   {
+;     sum = 0;
+;     for( i = a; i < b; ++i )
+;     {
+;       sum += (*funcp)(i, b, c, d, e, f, g, h);
+;     }
+;   }
+;
+@sum = external global i32
+@a = common global i32 0, align 4
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@funcp = external global i32 (i32, i32, i32, i32, i32, i32, i32, i32)*
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+@j = common global i32 0, align 4
+
+define void @func() #0 {
+entry:
+  store i32 0, i32* @sum, align 4
+  %0 = load i32* @a, align 4
+  store i32 %0, i32* @i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32* @i, align 4
+  %2 = load i32* @b, align 4
+  %cmp = icmp slt i32 %1, %2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 4
+  %4 = load i32* @i, align 4
+  %5 = load i32* @b, align 4
+  %6 = load i32* @c, align 4
+  %7 = load i32* @d, align 4
+  %8 = load i32* @e, align 4
+  %9 = load i32* @f, align 4
+  %10 = load i32* @g, align 4
+  %11 = load i32* @h, align 4
+  %call = call i32 %3(i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11)
+  %12 = load i32* @sum, align 4
+  %add = add nsw i32 %12, %call
+  store i32 %add, i32* @sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %13 = load i32* @i, align 4
+  %inc = add nsw i32 %13, 1
+  store i32 %inc, i32* @i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
new file mode 100644
index 000000000000..a196d8175aa9
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom  | \
+; RUN:    FileCheck --check-prefix=ATOM %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 | \
+; RUN:    FileCheck --check-prefix=CORE2 %s
+; ATOM: callq *{{%[a-z]+[0-9]*}}
+; CORE2: callq *funcp
+;
+; Original source code built with clang -S -emit-llvm -m64 test64.c:
+;   int a, b, c, d, e, f, g, h, i, j, k, l, m, n;
+;   extern int (*funcp)(int, int, int, int, int, int,
+;                       int, int, int, int, int, int,
+;                       int, int);
+;   extern int sum;
+;   
+;   void func()
+;   {
+;     sum = 0;
+;     for( i = a; i < b; ++i )
+;     {
+;       sum += (*funcp)(a, i, i*2, i/b, c, d, e, f, g, h, j, k, l, n);
+;     }
+;   }
+;   
+
+@sum = external global i32
+@a = common global i32 0, align 4
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@funcp = external global i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+@n = common global i32 0, align 4
+@m = common global i32 0, align 4
+
+define void @func() #0 {
+entry:
+  store i32 0, i32* @sum, align 4
+  %0 = load i32* @a, align 4
+  store i32 %0, i32* @i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32* @i, align 4
+  %2 = load i32* @b, align 4
+  %cmp = icmp slt i32 %1, %2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 8
+  %4 = load i32* @a, align 4
+  %5 = load i32* @i, align 4
+  %6 = load i32* @i, align 4
+  %mul = mul nsw i32 %6, 2
+  %7 = load i32* @i, align 4
+  %8 = load i32* @b, align 4
+  %div = sdiv i32 %7, %8
+  %9 = load i32* @c, align 4
+  %10 = load i32* @d, align 4
+  %11 = load i32* @e, align 4
+  %12 = load i32* @f, align 4
+  %13 = load i32* @g, align 4
+  %14 = load i32* @h, align 4
+  %15 = load i32* @j, align 4
+  %16 = load i32* @k, align 4
+  %17 = load i32* @l, align 4
+  %18 = load i32* @n, align 4
+  %call = call i32 %3(i32 %4, i32 %5, i32 %mul, i32 %div, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17, i32 %18)
+  %19 = load i32* @sum, align 4
+  %add = add nsw i32 %19, %call
+  store i32 %add, i32* @sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %20 = load i32* @i, align 4
+  %inc = add nsw i32 %20, 1
+  store i32 %inc, i32* @i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll
new file mode 100644
index 000000000000..632781130d06
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=ATOM32 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
+; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux  | FileCheck -check-prefix=ATOM64 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
+
+
+; fn_ptr.ll
+%class.A = type { i32 (...)** }
+
+define i32 @test1() #0 {
+  ;ATOM: test1
+entry:
+  %call = tail call %class.A* @_Z3facv()
+  %0 = bitcast %class.A* %call to void (%class.A*)***
+  %vtable = load void (%class.A*)*** %0, align 8
+  %1 = load void (%class.A*)** %vtable, align 8
+  ;ATOM32: movl (%ecx), %ecx
+  ;ATOM32: calll *%ecx
+  ;ATOM-NOT32: calll *(%ecx)
+  ;ATOM64: movq (%rcx), %rcx
+  ;ATOM64: callq *%rcx
+  ;ATOM-NOT64: callq *(%rcx)
+  tail call void %1(%class.A* %call)
+  ret i32 0
+}
+
+declare %class.A* @_Z3facv() #1
+
+; virt_fn.ll
+@p = external global void (i32)**
+
+define i32 @test2() #0 {
+  ;ATOM: test2
+entry:
+  %0 = load void (i32)*** @p, align 8
+  %1 = load void (i32)** %0, align 8
+  ;ATOM32: movl (%eax), %eax
+  ;ATOM32: calll *%eax
+  ;ATOM-NOT: calll *(%eax)
+  ;ATOM64: movq (%rax), %rax
+  ;ATOM64: callq *%rax
+  ;ATOM-NOT64: callq *(%rax)
+  tail call void %1(i32 2)
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll
index 3ea96bf16997..3cb9ca1c76c7 100644
--- a/test/CodeGen/X86/atomic32.ll
+++ b/test/CodeGen/X86/atomic32.ll
@@ -2,8 +2,6 @@
 ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
 ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -mattr=-cmov -verify-machineinstrs | FileCheck %s --check-prefix NOCMOV
 
-; XFAIL: cygwin,mingw32,win32
-
 @sc32 = external global i32
 
 define void @atomic_fetch_add32() nounwind {
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
index d362c31e6db8..aa000455753f 100644
--- a/test/CodeGen/X86/atomic64.ll
+++ b/test/CodeGen/X86/atomic64.ll
@@ -1,7 +1,5 @@
 ; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
 
-; XFAIL: cygwin,mingw32,win32
-
 @sc64 = external global i64
 
 define void @atomic_fetch_add64() nounwind {
diff --git a/test/CodeGen/X86/avx-brcond.ll b/test/CodeGen/X86/avx-brcond.ll
new file mode 100644
index 000000000000..d52ae52e0b98
--- /dev/null
+++ b/test/CodeGen/X86/avx-brcond.ll
@@ -0,0 +1,150 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+
+define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test1:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test3:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test4:
+; CHECK: vptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test6:
+; CHECK: vptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp eq i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: vptest
+; CHECK-NEXT:	je
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 62bdea2b4957..22fad7ce4b7d 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -18,6 +18,12 @@ define <4 x double> @sitofp01(<4 x i32> %a) {
   ret <4 x double> %b
 }
 
+; CHECK: vcvtdq2ps %ymm
+define <8 x float> @sitofp02(<8 x i16> %a) {
+  %b = sitofp <8 x i16> %a to <8 x float>
+  ret <8 x float> %b
+}
+
 ; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
   %b = fptosi <4 x double> %a to <4 x i32>
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index 432852d47d30..a6775aba0989 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -81,7 +81,7 @@ define void @storev32i8_01(<32 x i8> %a) nounwind {
 ; CHECK: _double_save
 ; CHECK-NOT: vinsertf128 $1
 ; CHECK-NOT: vinsertf128 $0
-; CHECK: vmovups %xmm
+; CHECK: vmovaps %xmm
 ; CHECK: vmovaps %xmm
 define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
 entry:
@@ -114,3 +114,38 @@ cif_mixed_test_any_check:                         ; preds = %cif_mask_mixed
   unreachable
 }
 
+; CHECK: add8i32
+; CHECK: vmovups
+; CHECK: vmovups
+; CHECK-NOT: vinsertf128
+; CHECK-NOT: vextractf128
+; CHECK: vmovups
+; CHECK: vmovups
+define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
+  %b = load <8 x i32>* %bp, align 1
+  %x = add <8 x i32> zeroinitializer, %b
+  store <8 x i32> %x, <8 x i32>* %ret, align 1
+  ret void
+}
+
+; CHECK: add4i64a64
+; CHECK: vmovaps ({{.*}}), %ymm{{.*}}
+; CHECK: vmovaps %ymm{{.*}}, ({{.*}})
+define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
+  %b = load <4 x i64>* %bp, align 64
+  %x = add <4 x i64> zeroinitializer, %b
+  store <4 x i64> %x, <4 x i64>* %ret, align 64
+  ret void
+}
+
+; CHECK: add4i64a16
+; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
+; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
+; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
+; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
+define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
+  %b = load <4 x i64>* %bp, align 16
+  %x = add <4 x i64> zeroinitializer, %b
+  store <4 x i64> %x, <4 x i64>* %ret, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
index 7ae0d36c0802..b9c700051005 100755
--- a/test/CodeGen/X86/avx-sext.ll
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -165,3 +165,24 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
   ret <4 x i64> %extmask
 }
 
+; AVX: sext_4i8_to_4i64
+; AVX: vpmovsxbd
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
+
+; AVX: sext_4i16_to_4i64
+; AVX: vpmovsxwd
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
index 09eb5d1038f7..fa6f6e85e9b8 100644
--- a/test/CodeGen/X86/bool-simplify.ll
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand,+rdseed | FileCheck %s
 
 define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
@@ -39,7 +39,22 @@ define i32 @bax(<2 x i64> %c) {
 ; CHECK: ret
 }
 
-define i32 @rnd(i32 %arg) nounwind uwtable {
+define i16 @rnd16(i16 %arg) nounwind uwtable {
+  %1 = tail call { i16, i32 } @llvm.x86.rdrand.16() nounwind
+  %2 = extractvalue { i16, i32 } %1, 0
+  %3 = extractvalue { i16, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i16 0, i16 %arg
+  %6 = add i16 %5, %2
+  ret i16 %6
+; CHECK: rnd16
+; CHECK: rdrand
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i32 @rnd32(i32 %arg) nounwind uwtable {
   %1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
   %2 = extractvalue { i32, i32 } %1, 0
   %3 = extractvalue { i32, i32 } %1, 1
@@ -47,12 +62,77 @@ define i32 @rnd(i32 %arg) nounwind uwtable {
   %5 = select i1 %4, i32 0, i32 %arg
   %6 = add i32 %5, %2
   ret i32 %6
-; CHECK: rnd
+; CHECK: rnd32
 ; CHECK: rdrand
 ; CHECK: cmov
 ; CHECK-NOT: cmov
 ; CHECK: ret
 }
 
+define i64 @rnd64(i64 %arg) nounwind uwtable {
+  %1 = tail call { i64, i32 } @llvm.x86.rdrand.64() nounwind
+  %2 = extractvalue { i64, i32 } %1, 0
+  %3 = extractvalue { i64, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i64 0, i64 %arg
+  %6 = add i64 %5, %2
+  ret i64 %6
+; CHECK: rnd64
+; CHECK: rdrand
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i16 @seed16(i16 %arg) nounwind uwtable {
+  %1 = tail call { i16, i32 } @llvm.x86.rdseed.16() nounwind
+  %2 = extractvalue { i16, i32 } %1, 0
+  %3 = extractvalue { i16, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i16 0, i16 %arg
+  %6 = add i16 %5, %2
+  ret i16 %6
+; CHECK: seed16
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i32 @seed32(i32 %arg) nounwind uwtable {
+  %1 = tail call { i32, i32 } @llvm.x86.rdseed.32() nounwind
+  %2 = extractvalue { i32, i32 } %1, 0
+  %3 = extractvalue { i32, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i32 0, i32 %arg
+  %6 = add i32 %5, %2
+  ret i32 %6
+; CHECK: seed32
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i64 @seed64(i64 %arg) nounwind uwtable {
+  %1 = tail call { i64, i32 } @llvm.x86.rdseed.64() nounwind
+  %2 = extractvalue { i64, i32 } %1, 0
+  %3 = extractvalue { i64, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i64 0, i64 %arg
+  %6 = add i64 %5, %2
+  ret i64 %6
+; CHECK: seed64
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare { i16, i32 } @llvm.x86.rdrand.16() nounwind
 declare { i32, i32 } @llvm.x86.rdrand.32() nounwind
+declare { i64, i32 } @llvm.x86.rdrand.64() nounwind
+declare { i16, i32 } @llvm.x86.rdseed.16() nounwind
+declare { i32, i32 } @llvm.x86.rdseed.32() nounwind
+declare { i64, i32 } @llvm.x86.rdseed.64() nounwind
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 44670c802b41..bc4032b13cc0 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -108,3 +108,150 @@ bb2:                                              ; preds = %entry, %bb1
   ret float %.0
 }
 
+declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind
+declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind
+
+define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test5:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: ptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test10:
+; CHECK: ptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test11:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp eq i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test12:
+; CHECK: ptest
+; CHECK-NEXT:	je
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
diff --git a/test/CodeGen/X86/code_placement_align_all.ll b/test/CodeGen/X86/code_placement_align_all.ll
new file mode 100644
index 000000000000..1e5e8f780b70
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_align_all.ll
@@ -0,0 +1,22 @@
+; RUN: llc  -mcpu=corei7 -mtriple=x86_64-linux -align-all-blocks=16 < %s | FileCheck %s
+
+;CHECK: foo
+;CHECK: .align  65536, 0x90
+;CHECK: .align  65536, 0x90
+;CHECK: .align  65536, 0x90
+;CHECK: ret
+define i32 @foo(i32 %t, i32 %l) nounwind readnone ssp uwtable {
+  %1 = icmp eq i32 %t, 0
+  br i1 %1, label %4, label %2
+
+; <label>:2                                       ; preds = %0
+  %3 = add nsw i32 %t, 2
+  ret i32 %3
+
+; <label>:4                                       ; preds = %0
+  %5 = icmp eq i32 %l, 0
+  %. = select i1 %5, i32 0, i32 5
+  ret i32 %.
+}
+
+
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 7e7acaa98a76..8ad38a4ee5c0 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -march=x86 | grep mov | count 2
 
+; Skip this on Windows as there is no ccosl and sret behaves differently.
+; XFAIL: pc-win32
+
 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
 entry:
 	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
diff --git a/test/CodeGen/X86/dagcombine_unsafe_math.ll b/test/CodeGen/X86/dagcombine_unsafe_math.ll
index a3221dea5b3f..592cf1bec2e5 100644
--- a/test/CodeGen/X86/dagcombine_unsafe_math.ll
+++ b/test/CodeGen/X86/dagcombine_unsafe_math.ll
@@ -40,3 +40,17 @@ define float @test4(float %x, float %y) {
 ; CHECK: test4
 ; CHECK: vaddss
 }
+
+; rdar://13445387
+; "x + x + x => 3.0 * x" should be disabled after legalization because 
+; Instruction-Selection dosen't know how to handle "3.0"
+; 
+define float @test5() {
+  %mul.i.i151 = fmul <4 x float> zeroinitializer, zeroinitializer
+  %vecext.i8.i152 = extractelement <4 x float> %mul.i.i151, i32 1
+  %vecext1.i9.i153 = extractelement <4 x float> %mul.i.i151, i32 0
+  %add.i10.i154 = fadd float %vecext1.i9.i153, %vecext.i8.i152
+  %vecext.i7.i155 = extractelement <4 x float> %mul.i.i151, i32 2
+  %add.i.i156 = fadd float %vecext.i7.i155, %add.i10.i154
+  ret float %add.i.i156
+}
diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/CodeGen/X86/dbg-byval-parameter.ll
index 859bc94b6e55..aca06a27a1df 100644
--- a/test/CodeGen/X86/dbg-byval-parameter.ll
+++ b/test/CodeGen/X86/dbg-byval-parameter.ll
@@ -28,22 +28,22 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 786468, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786451, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786451, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 786445, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786445, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 786445, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
 !15 = metadata !{i32 11, i32 0, metadata !1, null}
 !16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !2, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{metadata !1}
 !19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll
index 7e85d665ab76..aabc2068068d 100644
--- a/test/CodeGen/X86/dbg-const-int.ll
+++ b/test/CodeGen/X86/dbg-const-int.ll
@@ -14,14 +14,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786688, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 786443, metadata !1, i32 1, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 42}
 !9 = metadata !{i32 2, i32 12, metadata !7, null}
 !10 = metadata !{i32 3, i32 2, metadata !7, null}
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll
index 9215c139744e..a9b8f1fdc4f3 100644
--- a/test/CodeGen/X86/dbg-const.ll
+++ b/test/CodeGen/X86/dbg-const.ll
@@ -18,14 +18,14 @@ declare i32 @bar() nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0}
+!0 = metadata !{i32 786478, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null, metadata !""}
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !6 = metadata !{i32 786688, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5, i32 0, null}
-!7 = metadata !{i32 786443, metadata !0, i32 12, i32 52, metadata !1, i32 0}
+!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 42}
 !9 = metadata !{i32 15, i32 12, metadata !7, null}
 !10 = metadata !{i32 23, i32 3, metadata !7, null}
diff --git a/test/CodeGen/X86/dbg-declare-arg.ll b/test/CodeGen/X86/dbg-declare-arg.ll
index 650671b90020..f7e0c91cdff2 100644
--- a/test/CodeGen/X86/dbg-declare-arg.ll
+++ b/test/CodeGen/X86/dbg-declare-arg.ll
@@ -71,9 +71,9 @@ entry:
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 589826, metadata !2, metadata !"A", metadata !3, i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
-!2 = metadata !{i32 786449, i32 0, i32 4, metadata !3, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
 !4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
 !5 = metadata !{i32 786445, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
@@ -81,22 +81,22 @@ entry:
 !7 = metadata !{i32 786445, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
 !8 = metadata !{i32 786445, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
 !9 = metadata !{i32 786445, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{null, metadata !13}
 !13 = metadata !{i32 786447, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786478, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
 !15 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !16 = metadata !{null, metadata !13, metadata !17}
 !17 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
 !18 = metadata !{i32 786470, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
-!19 = metadata !{i32 786478, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 786478, metadata !"_Z3fooi", i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
 !20 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !21 = metadata !{metadata !1}
-!22 = metadata !{i32 786478, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 786478, metadata !"_ZN1AD1Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
 !23 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !24 = metadata !{null}
-!25 = metadata !{i32 786478, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
+!25 = metadata !{i32 786478, metadata !"_ZN1AD2Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
 !26 = metadata !{i32 786689, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !27 = metadata !{i32 4, i32 11, metadata !19, null}
 !28 = metadata !{i32 786688, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/CodeGen/X86/dbg-declare.ll
index 0b29dc3705be..6ac397ac42e1 100644
--- a/test/CodeGen/X86/dbg-declare.ll
+++ b/test/CodeGen/X86/dbg-declare.ll
@@ -29,10 +29,10 @@ declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10}
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
index e1534093346c..1bd3d77522a3 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -12,7 +12,7 @@ define i32 @main() nounwind {
 !llvm.dbg.cu = !{!2}
 
 !1 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 1, metadata !1, metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !10, i32 1, metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll
index 075762c4fec5..17d645757d99 100644
--- a/test/CodeGen/X86/dbg-i128-const.ll
+++ b/test/CodeGen/X86/dbg-i128-const.ll
@@ -16,15 +16,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i128 42 }
 !1 = metadata !{i32 786688, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 786443, metadata !3, i32 26, i32 0, metadata !4, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786443, metadata !4, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 786478, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, i32 0, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 786453, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{metadata !8, metadata !8, metadata !8}
-!8 = metadata !{i32 786454, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
 !9 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!10 = metadata !{i32 786468, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786468, metadata !13, metadata !4, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{i32 29, i32 0, metadata !2, null}
 !12 = metadata !{metadata !3}
 !13 = metadata !{metadata !"foo.c", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-large-unsigned-const.ll b/test/CodeGen/X86/dbg-large-unsigned-const.ll
index f8a06c47671c..ff16318efcec 100644
--- a/test/CodeGen/X86/dbg-large-unsigned-const.ll
+++ b/test/CodeGen/X86/dbg-large-unsigned-const.ll
@@ -30,13 +30,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !30 = metadata !{metadata !7, metadata !11}
 !31 = metadata !{metadata !12}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, null, null, metadata !29, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !"_Z3iseRKxS0_", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, null, null, metadata !29, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !"_Z3iseRKxS0_", i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !"lli.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !"_Z2fnx", i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ]
 !7 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !8 = metadata !{i32 786448, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
 !9 = metadata !{i32 786470, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
@@ -46,14 +46,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !13 = metadata !{i32 2, i32 27, metadata !1, null}
 !14 = metadata !{i32 2, i32 49, metadata !1, null}
 !15 = metadata !{i32 3, i32 3, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, i32 2, i32 54, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ]
 !17 = metadata !{metadata !"long long", metadata !18}
 !18 = metadata !{metadata !"omnipotent char", metadata !19}
 !19 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !20 = metadata !{i32 6, i32 19, metadata !6, null}
 !21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 7, i32 10, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !6, i32 6, i32 22, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !2, metadata !6, i32 6, i32 22, i32 1} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 2, i32 27, metadata !1, metadata !22}
 !25 = metadata !{i64 9223372036854775807}         
 !26 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index bf9045c5815b..baad6c0b60e6 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -45,32 +45,32 @@ declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
-!5 = metadata !{i32 786454, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
 !6 = metadata !{i32 786473, metadata !30} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786468, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786447, metadata !29, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{metadata !12, metadata !12, metadata !12}
-!12 = metadata !{i32 786454, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
-!13 = metadata !{i32 786468, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"TItype", i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786689, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 1093, i32 0, metadata !9, null}
 !16 = metadata !{i64 0}
 !17 = metadata !{i32 786688, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0, null} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 786443, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 786454, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
-!20 = metadata !{i32 786468, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!18 = metadata !{i32 786443, metadata !1, metadata !9, i32 1094, i32 0, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"word_type", i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !21 = metadata !{i32 1095, i32 0, metadata !18, null}
 !22 = metadata !{i32 1103, i32 0, metadata !18, null}
 !23 = metadata !{i32 1104, i32 0, metadata !18, null}
 !24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
-!25 = metadata !{i32 786443, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 786443, metadata !1, metadata !0, i32 879, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 1107, i32 0, metadata !18, null}
 !27 = metadata !{i32 1111, i32 0, metadata !18, null}
 !28 = metadata !{metadata !0, metadata !9}
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll
index 705c2f2aa068..26bac2e08286 100644
--- a/test/CodeGen/X86/dbg-prolog-end.ll
+++ b/test/CodeGen/X86/dbg-prolog-end.ll
@@ -35,21 +35,21 @@ entry:
 !llvm.dbg.cu = !{!0}
 !18 = metadata !{metadata !1, metadata !6}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
 !7 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !8 = metadata !{i32 1, i32 13, metadata !1, null}
 !9 = metadata !{i32 786688, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !1, i32 1, i32 16, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 2, i32 6, metadata !10, null}
 !12 = metadata !{i32 2, i32 11, metadata !10, null}
 !13 = metadata !{i32 3, i32 2, metadata !10, null}
 !14 = metadata !{i32 4, i32 2, metadata !10, null}
 !15 = metadata !{i32 5, i32 2, metadata !10, null}
 !16 = metadata !{i32 8, i32 2, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !6, i32 7, i32 12, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 12, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
index 6a069917642a..6090185dc10e 100644
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -14,10 +14,10 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
index d284182db302..fcbf64f42378 100644
--- a/test/CodeGen/X86/dbg-value-dag-combine.ll
+++ b/test/CodeGen/X86/dbg-value-dag-combine.ll
@@ -25,11 +25,9 @@ entry:
 }
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata
-!"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata
-!"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
 !5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll
index 5993b8cef09f..55be3b1f222b 100644
--- a/test/CodeGen/X86/dbg-value-isel.ll
+++ b/test/CodeGen/X86/dbg-value-isel.ll
@@ -80,14 +80,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 589846, metadata !2, metadata !"uint", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589846, metadata !20, metadata !2, metadata !"uint", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 32, metadata !0, null}
 !10 = metadata !{i32 786688, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index a1030b44552e..2a1916f26c97 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -47,25 +47,25 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !25, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
 !9 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786468, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 786689, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 19509, i32 20, metadata !0, null}
 !14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
-!15 = metadata !{i32 786443, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786443, metadata !1, metadata !16, i32 18086, i32 1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 19514, i32 2, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !1, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ]
 !22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
 !23 = metadata !{i32 19524, i32 1, metadata !18, null}
 !24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8}
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index db25436f5d51..6766dbe9edb0 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -19,9 +19,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -31,7 +31,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 786445, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
 !11 = metadata !{i32 786688, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 786443, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 5, i32 19, metadata !0, null}
 !14 = metadata !{i32 6, i32 14, metadata !12, null}
 !15 = metadata !{metadata !"int", metadata !16}
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index 0dad832e9183..3bc2957963eb 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !3, metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{i32 786473, metadata !4} ; [ DW_TAG_file_type ]
diff --git a/test/CodeGen/X86/early-ifcvt-crash.ll b/test/CodeGen/X86/early-ifcvt-crash.ll
index c8280269689d..d9580503e91e 100644
--- a/test/CodeGen/X86/early-ifcvt-crash.ll
+++ b/test/CodeGen/X86/early-ifcvt-crash.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -x86-early-ifcvt -verify-machineinstrs
 ; RUN: llc < %s -x86-early-ifcvt -stress-early-ifcvt -verify-machineinstrs
+; CPU without a scheduling model:
+; RUN: llc < %s -x86-early-ifcvt -mcpu=pentium3 -verify-machineinstrs
 ;
 ; Run these tests with and without -stress-early-ifcvt to exercise heuristics.
 ;
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index d591f9408b14..5121ed13228d 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep "add	ESP, 8"
+; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; CHECK: add ESP, 8
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/fast-isel-args-fail.ll b/test/CodeGen/X86/fast-isel-args-fail.ll
index 45a2b384de41..e748e1cad1fd 100644
--- a/test/CodeGen/X86/fast-isel-args-fail.ll
+++ b/test/CodeGen/X86/fast-isel-args-fail.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-apple-darwin10
-; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win32 | FileCheck %s 
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win64 | FileCheck %s -check-prefix=WIN64
 ; Requires: Asserts
 
 ; Previously, this would cause an assert.
@@ -13,8 +14,10 @@ entry:
 ; We don't handle the Windows CC, yet.
 define i32 @foo(i32* %p) {
 entry:
-; CHECK: foo
-; CHECK: movl (%rcx), %eax
+; WIN32: foo
+; WIN32: movl (%rcx), %eax
+; WIN64: foo
+; WIN64: movl (%rdi), %eax
   %0 = load i32* %p, align 4
   ret i32 %0
 }
diff --git a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
index 9233d3f7c1a0..21fae4a82051 100644
--- a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
+++ b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; RUN: llc -O0 -relocation-model=pic < %s | FileCheck %s
+; CHECK-NOT: call
 ; rdar://8396318
 
 ; Don't emit a PIC base register if no addresses are needed.
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index b3adb802a8c5..bbbaeb233919 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -fast-isel | grep "LCPI0_0(%rip)"
+; RUN: llc < %s -fast-isel | FileCheck %s
+; CHECK: LCPI0_0(%rip)
+
 ; Make sure fast isel uses rip-relative addressing when required.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
new file mode 100644
index 000000000000..45494f139e24
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = sdiv i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_sdiv64:
+; CHECK: cqto
+; CHECK: idivq
+
+define i64 @test_srem64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = srem i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_srem64:
+; CHECK: cqto
+; CHECK: idivq
+
+define i64 @test_udiv64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = udiv i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_udiv64:
+; CHECK: xorl
+; CHECK: divq
+
+define i64 @test_urem64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = urem i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_urem64:
+; CHECK: xorl
+; CHECK: divq
diff --git a/test/CodeGen/X86/fast-isel-divrem.ll b/test/CodeGen/X86/fast-isel-divrem.ll
new file mode 100644
index 000000000000..7aba7f7b7953
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-divrem.ll
@@ -0,0 +1,122 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = sdiv i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_sdiv8:
+; CHECK: movsbw
+; CHECK: idivb
+
+define i8 @test_srem8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = srem i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_srem8:
+; CHECK: movsbw
+; CHECK: idivb
+
+define i8 @test_udiv8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = udiv i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_udiv8:
+; CHECK: movzbw
+; CHECK: divb
+
+define i8 @test_urem8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = urem i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_urem8:
+; CHECK: movzbw
+; CHECK: divb
+
+define i16 @test_sdiv16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = sdiv i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_sdiv16:
+; CHECK: cwtd
+; CHECK: idivw
+
+define i16 @test_srem16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = srem i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_srem16:
+; CHECK: cwtd
+; CHECK: idivw
+
+define i16 @test_udiv16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = udiv i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_udiv16:
+; CHECK: xorl
+; CHECK: divw
+
+define i16 @test_urem16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = urem i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_urem16:
+; CHECK: xorl
+; CHECK: divw
+
+define i32 @test_sdiv32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = sdiv i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_sdiv32:
+; CHECK: cltd
+; CHECK: idivl
+
+define i32 @test_srem32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = srem i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_srem32:
+; CHECK: cltd
+; CHECK: idivl
+
+define i32 @test_udiv32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = udiv i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_udiv32:
+; CHECK: xorl
+; CHECK: divl
+
+define i32 @test_urem32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = urem i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_urem32:
+; CHECK: xorl
+; CHECK: divl
diff --git a/test/CodeGen/X86/fast-isel-expect.ll b/test/CodeGen/X86/fast-isel-expect.ll
index 1f53348bd07b..c4be7f364f30 100644
--- a/test/CodeGen/X86/fast-isel-expect.ll
+++ b/test/CodeGen/X86/fast-isel-expect.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @glbl = extern_weak constant i8
 
-declare i64 @llvm.expect.i64(i64, i64) #0
+declare i64 @llvm.expect.i64(i64, i64)
 
 define void @test() {
 ; CHECK: movl $glbl
@@ -19,5 +19,3 @@ bb1:
 bb2:
   unreachable
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
index f42a4a245bc8..67fdad299369 100644
--- a/test/CodeGen/X86/fast-isel-fneg.ll
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -1,5 +1,9 @@
 ; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | FileCheck --check-prefix=SSE2 %s
+
+; SSE2: xor
+; SSE2: xor
+; SSE2-NOT: xor
 
 ; CHECK: doo:
 ; CHECK: xor
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index cb2464e746b1..de7509568907 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -fast-isel | grep "_kill@GOTPCREL(%rip)"
+; RUN: llc < %s -fast-isel | FileCheck %s
+; CHECK: _kill@GOTPCREL(%rip)
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 @f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
diff --git a/test/CodeGen/X86/fast-isel-tailcall.ll b/test/CodeGen/X86/fast-isel-tailcall.ll
index c3e527c4e5b4..79ff79d4bca5 100644
--- a/test/CodeGen/X86/fast-isel-tailcall.ll
+++ b/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | FileCheck %s
+; CHECK-NOT: add
 ; PR4154
 
 ; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
diff --git a/test/CodeGen/X86/fast-isel-unaligned-store.ll b/test/CodeGen/X86/fast-isel-unaligned-store.ll
new file mode 100644
index 000000000000..7ce7f676add0
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-unaligned-store.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
+entry:
+  store i32 %value, i32* %addr, align 1
+  ret i32 %value
+}
+
+; CHECK: ret
+
+define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
+entry:
+  store i16 %value, i16* %addr, align 1
+  ret i16 %value
+}
+
+; CHECK: ret
diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
index 33b18bb8cc6e..3569d36541f7 100644
--- a/test/CodeGen/X86/fastcall-correct-mangling.ll
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -7,3 +7,8 @@ define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {
         ret void
 }
 
+define x86_fastcallcc i32 @"\01DoNotMangle"(i32 %a) {
+; CHECK: DoNotMangle:
+entry:
+  ret i32 %a
+}
diff --git a/test/CodeGen/X86/fastcc-2.ll b/test/CodeGen/X86/fastcc-2.ll
index d044a2ad9e84..e11cdd19723a 100644
--- a/test/CodeGen/X86/fastcc-2.ll
+++ b/test/CodeGen/X86/fastcc-2.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
+; CHECK: movsd
+; CHECK-NOT: mov
 
 define i32 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index f1204d677a55..08f4564fc69f 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -tailcallopt=false | grep "movl[[:space:]]*8(%esp), %eax" | count 2
+; RUN: llc < %s -tailcallopt=false | FileCheck %s
+; CHECK: movl 8(%esp), %eax 
+; CHECK: movl 8(%esp), %eax 
+
 ; PR3122
 ; rdar://6400815
 
diff --git a/test/CodeGen/X86/fastcc-sret.ll b/test/CodeGen/X86/fastcc-sret.ll
index d45741884c7d..05705b9431ba 100644
--- a/test/CodeGen/X86/fastcc-sret.ll
+++ b/test/CodeGen/X86/fastcc-sret.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
+; RUN: llc < %s -march=x86 -tailcallopt=false | FileCheck %s
+; CHECK: ret{{[^4]*$}}
 
 	%struct.foo = type { [4 x i32] }
 
diff --git a/test/CodeGen/X86/fastcc3struct.ll b/test/CodeGen/X86/fastcc3struct.ll
index 84f8ef6cf360..e2c54ac8a133 100644
--- a/test/CodeGen/X86/fastcc3struct.ll
+++ b/test/CodeGen/X86/fastcc3struct.ll
@@ -1,7 +1,8 @@
 ; RUN: llc < %s -march=x86 -o %t
-; RUN: grep "movl	.48, %ecx" %t
-; RUN: grep "movl	.24, %edx" %t
-; RUN: grep "movl	.12, %eax" %t
+; RUN: FileCheck %s -input-file %t
+; CHECK: movl {{.}}12, %eax
+; CHECK: movl {{.}}24, %edx
+; CHECK: movl {{.}}48, %ecx
 
 %0 = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/handle-move.ll b/test/CodeGen/X86/handle-move.ll
index 93441cd7e819..ba96275569b3 100644
--- a/test/CodeGen/X86/handle-move.ll
+++ b/test/CodeGen/X86/handle-move.ll
@@ -1,7 +1,6 @@
 ; RUN: llc -march=x86-64 -mcpu=core2 -fast-isel -enable-misched -misched=shuffle -misched-bottomup -verify-machineinstrs < %s
 ; RUN: llc -march=x86-64 -mcpu=core2 -fast-isel -enable-misched -misched=shuffle -misched-topdown -verify-machineinstrs < %s
 ; REQUIRES: asserts
-; XFAIL: cygwin,mingw32
 ;
 ; Test the LiveIntervals::handleMove() function.
 ;
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index a8ad0f1a28b2..9d285bf4e238 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
diff --git a/test/CodeGen/X86/misched-copy.ll b/test/CodeGen/X86/misched-copy.ll
new file mode 100644
index 000000000000..ecdde826c796
--- /dev/null
+++ b/test/CodeGen/X86/misched-copy.ll
@@ -0,0 +1,49 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=x86 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched 2>&1 > /dev/null | FileCheck %s
+;
+; Test scheduling of copy instructions.
+;
+; Argument copies should be hoisted to the top of the block.
+; Return copies should be sunk to the end.
+; MUL_HiLo PhysReg use copies should be just above the mul.
+; MUL_HiLo PhysReg def copies should be just below the mul.
+;
+; CHECK:      *** Final schedule for BB#1 ***
+; CHECK-NEXT: %EAX<def> = COPY
+; CHECK:      MUL32r %vreg{{[0-9]+}}, %EAX<imp-def>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use>;
+; CHECK-NEXT: COPY %E{{[AD]}}X;
+; CHECK-NEXT: COPY %E{{[AD]}}X;
+; CHECK:      DIVSSrm
+define i64 @mulhoist(i32 %a, i32 %b) #0 {
+entry:
+  br label %body
+
+body:
+  %convb = sitofp i32 %b to float
+  ; Generates an iMUL64r to legalize types.
+  %aa = zext i32 %a to i64
+  %mul = mul i64 %aa, 74383
+  ; Do some dependent long latency stuff.
+  %trunc = trunc i64 %mul to i32
+  %convm = sitofp i32 %trunc to float
+  %divm = fdiv float %convm, 0.75
+  ;%addmb = fadd float %divm, %convb
+  ;%divmb = fdiv float %addmb, 0.125
+  ; Do some independent long latency stuff.
+  %conva = sitofp i32 %a to float
+  %diva = fdiv float %conva, 0.75
+  %addab = fadd float %diva, %convb
+  %divab = fdiv float %addab, 0.125
+  br label %end
+
+end:
+  %val = fptosi float %divab to i64
+  %add = add i64 %mul, %val
+  ret i64 %add
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-ilp.ll b/test/CodeGen/X86/misched-ilp.ll
index c6cedb7be871..4ca296ca92e5 100644
--- a/test/CodeGen/X86/misched-ilp.ll
+++ b/test/CodeGen/X86/misched-ilp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
 ;
 ; Basic verification of the ScheduleDAGILP metric.
 ;
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
new file mode 100644
index 000000000000..0f6e442b1a8d
--- /dev/null
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -0,0 +1,228 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s
+;
+; Verify that register pressure heuristics are working in MachineScheduler.
+;
+; When we enable subtree scheduling heuristics on X86, we may need a
+; flag to disable it for this test case.
+;
+; CHECK: @wrap_mul4
+; CHECK: 30 regalloc - Number of spills inserted
+
+define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
+entry:
+  %arrayidx1.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 0
+  %0 = load double* %arrayidx1.i, align 8, !tbaa !0
+  %arrayidx3.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 0
+  %1 = load double* %arrayidx3.i, align 8, !tbaa !0
+  %mul.i = fmul double %0, %1
+  %arrayidx5.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 1
+  %2 = load double* %arrayidx5.i, align 8, !tbaa !0
+  %arrayidx7.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 0
+  %3 = load double* %arrayidx7.i, align 8, !tbaa !0
+  %mul8.i = fmul double %2, %3
+  %add.i = fadd double %mul.i, %mul8.i
+  %arrayidx10.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 2
+  %4 = load double* %arrayidx10.i, align 8, !tbaa !0
+  %arrayidx12.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 0
+  %5 = load double* %arrayidx12.i, align 8, !tbaa !0
+  %mul13.i = fmul double %4, %5
+  %add14.i = fadd double %add.i, %mul13.i
+  %arrayidx16.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 3
+  %6 = load double* %arrayidx16.i, align 8, !tbaa !0
+  %arrayidx18.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 0
+  %7 = load double* %arrayidx18.i, align 8, !tbaa !0
+  %mul19.i = fmul double %6, %7
+  %add20.i = fadd double %add14.i, %mul19.i
+  %arrayidx25.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 1
+  %8 = load double* %arrayidx25.i, align 8, !tbaa !0
+  %mul26.i = fmul double %0, %8
+  %arrayidx30.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 1
+  %9 = load double* %arrayidx30.i, align 8, !tbaa !0
+  %mul31.i = fmul double %2, %9
+  %add32.i = fadd double %mul26.i, %mul31.i
+  %arrayidx36.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 1
+  %10 = load double* %arrayidx36.i, align 8, !tbaa !0
+  %mul37.i = fmul double %4, %10
+  %add38.i = fadd double %add32.i, %mul37.i
+  %arrayidx42.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 1
+  %11 = load double* %arrayidx42.i, align 8, !tbaa !0
+  %mul43.i = fmul double %6, %11
+  %add44.i = fadd double %add38.i, %mul43.i
+  %arrayidx49.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 2
+  %12 = load double* %arrayidx49.i, align 8, !tbaa !0
+  %mul50.i = fmul double %0, %12
+  %arrayidx54.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 2
+  %13 = load double* %arrayidx54.i, align 8, !tbaa !0
+  %mul55.i = fmul double %2, %13
+  %add56.i = fadd double %mul50.i, %mul55.i
+  %arrayidx60.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 2
+  %14 = load double* %arrayidx60.i, align 8, !tbaa !0
+  %mul61.i = fmul double %4, %14
+  %add62.i = fadd double %add56.i, %mul61.i
+  %arrayidx66.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 2
+  %15 = load double* %arrayidx66.i, align 8, !tbaa !0
+  %mul67.i = fmul double %6, %15
+  %add68.i = fadd double %add62.i, %mul67.i
+  %arrayidx73.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 3
+  %16 = load double* %arrayidx73.i, align 8, !tbaa !0
+  %mul74.i = fmul double %0, %16
+  %arrayidx78.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 3
+  %17 = load double* %arrayidx78.i, align 8, !tbaa !0
+  %mul79.i = fmul double %2, %17
+  %add80.i = fadd double %mul74.i, %mul79.i
+  %arrayidx84.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 3
+  %18 = load double* %arrayidx84.i, align 8, !tbaa !0
+  %mul85.i = fmul double %4, %18
+  %add86.i = fadd double %add80.i, %mul85.i
+  %arrayidx90.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 3
+  %19 = load double* %arrayidx90.i, align 8, !tbaa !0
+  %mul91.i = fmul double %6, %19
+  %add92.i = fadd double %add86.i, %mul91.i
+  %arrayidx95.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 0
+  %20 = load double* %arrayidx95.i, align 8, !tbaa !0
+  %mul98.i = fmul double %1, %20
+  %arrayidx100.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 1
+  %21 = load double* %arrayidx100.i, align 8, !tbaa !0
+  %mul103.i = fmul double %3, %21
+  %add104.i = fadd double %mul98.i, %mul103.i
+  %arrayidx106.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 2
+  %22 = load double* %arrayidx106.i, align 8, !tbaa !0
+  %mul109.i = fmul double %5, %22
+  %add110.i = fadd double %add104.i, %mul109.i
+  %arrayidx112.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 3
+  %23 = load double* %arrayidx112.i, align 8, !tbaa !0
+  %mul115.i = fmul double %7, %23
+  %add116.i = fadd double %add110.i, %mul115.i
+  %mul122.i = fmul double %8, %20
+  %mul127.i = fmul double %9, %21
+  %add128.i = fadd double %mul122.i, %mul127.i
+  %mul133.i = fmul double %10, %22
+  %add134.i = fadd double %add128.i, %mul133.i
+  %mul139.i = fmul double %11, %23
+  %add140.i = fadd double %add134.i, %mul139.i
+  %mul146.i = fmul double %12, %20
+  %mul151.i = fmul double %13, %21
+  %add152.i = fadd double %mul146.i, %mul151.i
+  %mul157.i = fmul double %14, %22
+  %add158.i = fadd double %add152.i, %mul157.i
+  %mul163.i = fmul double %15, %23
+  %add164.i = fadd double %add158.i, %mul163.i
+  %mul170.i = fmul double %16, %20
+  %mul175.i = fmul double %17, %21
+  %add176.i = fadd double %mul170.i, %mul175.i
+  %mul181.i = fmul double %18, %22
+  %add182.i = fadd double %add176.i, %mul181.i
+  %mul187.i = fmul double %19, %23
+  %add188.i = fadd double %add182.i, %mul187.i
+  %arrayidx191.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 0
+  %24 = load double* %arrayidx191.i, align 8, !tbaa !0
+  %mul194.i = fmul double %1, %24
+  %arrayidx196.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 1
+  %25 = load double* %arrayidx196.i, align 8, !tbaa !0
+  %mul199.i = fmul double %3, %25
+  %add200.i = fadd double %mul194.i, %mul199.i
+  %arrayidx202.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 2
+  %26 = load double* %arrayidx202.i, align 8, !tbaa !0
+  %mul205.i = fmul double %5, %26
+  %add206.i = fadd double %add200.i, %mul205.i
+  %arrayidx208.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 3
+  %27 = load double* %arrayidx208.i, align 8, !tbaa !0
+  %mul211.i = fmul double %7, %27
+  %add212.i = fadd double %add206.i, %mul211.i
+  %mul218.i = fmul double %8, %24
+  %mul223.i = fmul double %9, %25
+  %add224.i = fadd double %mul218.i, %mul223.i
+  %mul229.i = fmul double %10, %26
+  %add230.i = fadd double %add224.i, %mul229.i
+  %mul235.i = fmul double %11, %27
+  %add236.i = fadd double %add230.i, %mul235.i
+  %mul242.i = fmul double %12, %24
+  %mul247.i = fmul double %13, %25
+  %add248.i = fadd double %mul242.i, %mul247.i
+  %mul253.i = fmul double %14, %26
+  %add254.i = fadd double %add248.i, %mul253.i
+  %mul259.i = fmul double %15, %27
+  %add260.i = fadd double %add254.i, %mul259.i
+  %mul266.i = fmul double %16, %24
+  %mul271.i = fmul double %17, %25
+  %add272.i = fadd double %mul266.i, %mul271.i
+  %mul277.i = fmul double %18, %26
+  %add278.i = fadd double %add272.i, %mul277.i
+  %mul283.i = fmul double %19, %27
+  %add284.i = fadd double %add278.i, %mul283.i
+  %arrayidx287.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 0
+  %28 = load double* %arrayidx287.i, align 8, !tbaa !0
+  %mul290.i = fmul double %1, %28
+  %arrayidx292.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 1
+  %29 = load double* %arrayidx292.i, align 8, !tbaa !0
+  %mul295.i = fmul double %3, %29
+  %add296.i = fadd double %mul290.i, %mul295.i
+  %arrayidx298.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 2
+  %30 = load double* %arrayidx298.i, align 8, !tbaa !0
+  %mul301.i = fmul double %5, %30
+  %add302.i = fadd double %add296.i, %mul301.i
+  %arrayidx304.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 3
+  %31 = load double* %arrayidx304.i, align 8, !tbaa !0
+  %mul307.i = fmul double %7, %31
+  %add308.i = fadd double %add302.i, %mul307.i
+  %mul314.i = fmul double %8, %28
+  %mul319.i = fmul double %9, %29
+  %add320.i = fadd double %mul314.i, %mul319.i
+  %mul325.i = fmul double %10, %30
+  %add326.i = fadd double %add320.i, %mul325.i
+  %mul331.i = fmul double %11, %31
+  %add332.i = fadd double %add326.i, %mul331.i
+  %mul338.i = fmul double %12, %28
+  %mul343.i = fmul double %13, %29
+  %add344.i = fadd double %mul338.i, %mul343.i
+  %mul349.i = fmul double %14, %30
+  %add350.i = fadd double %add344.i, %mul349.i
+  %mul355.i = fmul double %15, %31
+  %add356.i = fadd double %add350.i, %mul355.i
+  %mul362.i = fmul double %16, %28
+  %mul367.i = fmul double %17, %29
+  %add368.i = fadd double %mul362.i, %mul367.i
+  %mul373.i = fmul double %18, %30
+  %add374.i = fadd double %add368.i, %mul373.i
+  %mul379.i = fmul double %19, %31
+  %add380.i = fadd double %add374.i, %mul379.i
+  store double %add20.i, double* %Out, align 8
+  %Res.i.sroa.1.8.idx2 = getelementptr inbounds double* %Out, i64 1
+  store double %add44.i, double* %Res.i.sroa.1.8.idx2, align 8
+  %Res.i.sroa.2.16.idx4 = getelementptr inbounds double* %Out, i64 2
+  store double %add68.i, double* %Res.i.sroa.2.16.idx4, align 8
+  %Res.i.sroa.3.24.idx6 = getelementptr inbounds double* %Out, i64 3
+  store double %add92.i, double* %Res.i.sroa.3.24.idx6, align 8
+  %Res.i.sroa.4.32.idx8 = getelementptr inbounds double* %Out, i64 4
+  store double %add116.i, double* %Res.i.sroa.4.32.idx8, align 8
+  %Res.i.sroa.5.40.idx10 = getelementptr inbounds double* %Out, i64 5
+  store double %add140.i, double* %Res.i.sroa.5.40.idx10, align 8
+  %Res.i.sroa.6.48.idx12 = getelementptr inbounds double* %Out, i64 6
+  store double %add164.i, double* %Res.i.sroa.6.48.idx12, align 8
+  %Res.i.sroa.7.56.idx14 = getelementptr inbounds double* %Out, i64 7
+  store double %add188.i, double* %Res.i.sroa.7.56.idx14, align 8
+  %Res.i.sroa.8.64.idx16 = getelementptr inbounds double* %Out, i64 8
+  store double %add212.i, double* %Res.i.sroa.8.64.idx16, align 8
+  %Res.i.sroa.9.72.idx18 = getelementptr inbounds double* %Out, i64 9
+  store double %add236.i, double* %Res.i.sroa.9.72.idx18, align 8
+  %Res.i.sroa.10.80.idx20 = getelementptr inbounds double* %Out, i64 10
+  store double %add260.i, double* %Res.i.sroa.10.80.idx20, align 8
+  %Res.i.sroa.11.88.idx22 = getelementptr inbounds double* %Out, i64 11
+  store double %add284.i, double* %Res.i.sroa.11.88.idx22, align 8
+  %Res.i.sroa.12.96.idx24 = getelementptr inbounds double* %Out, i64 12
+  store double %add308.i, double* %Res.i.sroa.12.96.idx24, align 8
+  %Res.i.sroa.13.104.idx26 = getelementptr inbounds double* %Out, i64 13
+  store double %add332.i, double* %Res.i.sroa.13.104.idx26, align 8
+  %Res.i.sroa.14.112.idx28 = getelementptr inbounds double* %Out, i64 14
+  store double %add356.i, double* %Res.i.sroa.14.112.idx28, align 8
+  %Res.i.sroa.15.120.idx30 = getelementptr inbounds double* %Out, i64 15
+  store double %add380.i, double* %Res.i.sroa.15.120.idx30, align 8
+  ret void
+}
+
+attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index 65ee7b1d8e00..bb42734833dd 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=sse41 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=sse41 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
 
 define i32 @test1() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 9f7d036cf141..29b9f34464f0 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-block-placement -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
 ; rdar://7236213
 ;
 ; The scheduler's 2-address hack has been disabled, so there is
diff --git a/test/CodeGen/X86/no-compact-unwind.ll b/test/CodeGen/X86/no-compact-unwind.ll
new file mode 100644
index 000000000000..96712814d5b8
--- /dev/null
+++ b/test/CodeGen/X86/no-compact-unwind.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -disable-cfi | FileCheck %s
+
+%"struct.dyld::MappedRanges" = type { [400 x %struct.anon], %"struct.dyld::MappedRanges"* }
+%struct.anon = type { %class.ImageLoader*, i64, i64 }
+%class.ImageLoader = type { i32 (...)**, i8*, i8*, i32, i64, i64, i32, i32, %"struct.ImageLoader::recursive_lock"*, i16, i16, [4 x i8] }
+%"struct.ImageLoader::recursive_lock" = type { i32, i32 }
+
+@G1 = external hidden global %"struct.dyld::MappedRanges", align 8
+
+declare void @OSMemoryBarrier() optsize
+
+; This compact unwind encoding indicates that we could not generate correct
+; compact unwind encodings for this function. This then defaults to using the
+; DWARF EH frame.
+;
+; CHECK: .section __LD,__compact_unwind,regular,debug
+; CHECK: .quad _func
+; CHECK: .long 67108864                ## Compact Unwind Encoding: 0x4000000
+; CHECK: .quad 0                       ## Personality Function
+; CHECK: .quad 0                       ## LSDA
+;
+define void @func(%class.ImageLoader* %image) optsize ssp uwtable {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc10, %entry
+  %p.019 = phi %"struct.dyld::MappedRanges"* [ @G1, %entry ], [ %1, %for.inc10 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
+  %image4 = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 0, i64 %indvars.iv, i32 0
+  %0 = load %class.ImageLoader** %image4, align 8, !tbaa !0
+  %cmp5 = icmp eq %class.ImageLoader* %0, %image
+  br i1 %cmp5, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body3
+  tail call void @OSMemoryBarrier() optsize
+  store %class.ImageLoader* null, %class.ImageLoader** %image4, align 8, !tbaa !0
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 400
+  br i1 %exitcond, label %for.inc10, label %for.body3
+
+for.inc10:                                        ; preds = %for.inc
+  %next = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 1
+  %1 = load %"struct.dyld::MappedRanges"** %next, align 8, !tbaa !0
+  %cmp = icmp eq %"struct.dyld::MappedRanges"* %1, null
+  br i1 %cmp, label %for.end11, label %for.cond1.preheader
+
+for.end11:                                        ; preds = %for.inc10
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/pr15296.ll b/test/CodeGen/X86/pr15296.ll
new file mode 100644
index 000000000000..1187d80cdf75
--- /dev/null
+++ b/test/CodeGen/X86/pr15296.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7-avx | FileCheck %s
+
+define <8 x i32> @shiftInput___vyuunu(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0
+  %smear.1 = insertelement <8 x i32> %smear.0, i32 %shiftval, i32 1
+  %smear.2 = insertelement <8 x i32> %smear.1, i32 %shiftval, i32 2
+  %smear.3 = insertelement <8 x i32> %smear.2, i32 %shiftval, i32 3
+  %smear.4 = insertelement <8 x i32> %smear.3, i32 %shiftval, i32 4
+  %smear.5 = insertelement <8 x i32> %smear.4, i32 %shiftval, i32 5
+  %smear.6 = insertelement <8 x i32> %smear.5, i32 %shiftval, i32 6
+  %smear.7 = insertelement <8 x i32> %smear.6, i32 %shiftval, i32 7
+  %bitop = lshr <8 x i32> %input, %smear.7
+  ret <8 x i32> %bitop
+}
+
+; CHECK: shiftInput___vyuunu
+; CHECK: psrld
+; CHECK: psrld
+; CHECK: ret
+
+define <8 x i32> @shiftInput___canonical(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0
+  %smear.7 = shufflevector <8 x i32> %smear.0, <8 x i32> undef, <8 x i32> zeroinitializer
+  %bitop = lshr <8 x i32> %input, %smear.7
+  ret <8 x i32> %bitop
+}
+
+; CHECK: shiftInput___canonical
+; CHECK: psrld
+; CHECK: psrld
+; CHECK: ret
+
+define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval, <4 x i64> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <4 x i64> undef, i64 %shiftval, i32 0
+  %smear.7 = shufflevector <4 x i64> %smear.0, <4 x i64> undef, <4 x i32> zeroinitializer
+  %bitop = lshr <4 x i64> %input, %smear.7
+  ret <4 x i64> %bitop
+}
+
+; CHECK: shiftInput___64in32bitmode
+; CHECK: psrlq
+; CHECK: psrlq
+; CHECK: ret
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index ec2f302b1499..efb51913c5c1 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW
 
 ; rdar://10538297
 
@@ -9,10 +10,12 @@ entry:
 ; CHECK: prefetcht1
 ; CHECK: prefetcht0
 ; CHECK: prefetchnta
+; PRFCHW: prefetchw
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
 	ret void
 }
 
diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll
new file mode 100644
index 000000000000..35de7ebf7430
--- /dev/null
+++ b/test/CodeGen/X86/rdseed.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdseed | FileCheck %s
+
+declare {i16, i32} @llvm.x86.rdseed.16()
+declare {i32, i32} @llvm.x86.rdseed.32()
+declare {i64, i32} @llvm.x86.rdseed.64()
+
+define i32 @_rdseed16_step(i16* %random_val) {
+  %call = call {i16, i32} @llvm.x86.rdseed.16()
+  %randval = extractvalue {i16, i32} %call, 0
+  store i16 %randval, i16* %random_val
+  %isvalid = extractvalue {i16, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed16_step:
+; CHECK: rdseedw	%ax
+; CHECK: movw	%ax, (%r[[A0:di|cx]])
+; CHECK: movzwl	%ax, %ecx
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%ecx, %eax
+; CHECK: ret
+}
+
+define i32 @_rdseed32_step(i32* %random_val) {
+  %call = call {i32, i32} @llvm.x86.rdseed.32()
+  %randval = extractvalue {i32, i32} %call, 0
+  store i32 %randval, i32* %random_val
+  %isvalid = extractvalue {i32, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed32_step:
+; CHECK: rdseedl	%e[[T0:[a-z]+]]
+; CHECK: movl	%e[[T0]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: ret
+}
+
+define i32 @_rdseed64_step(i64* %random_val) {
+  %call = call {i64, i32} @llvm.x86.rdseed.64()
+  %randval = extractvalue {i64, i32} %call, 0
+  store i64 %randval, i64* %random_val
+  %isvalid = extractvalue {i64, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed64_step:
+; CHECK: rdseedq	%r[[T1:[a-z]+]]
+; CHECK: movq	%r[[T1]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/select-with-and-or.ll b/test/CodeGen/X86/select-with-and-or.ll
new file mode 100644
index 000000000000..1ccf30bf2083
--- /dev/null
+++ b/test/CodeGen/X86/select-with-and-or.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -O3 | \
+; RUN:	llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @test1(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+; CHECK: test1
+; CHECK: cmpnle
+; CHECK-NEXT: andps
+; CHECK: ret
+}
+
+define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c
+  ret <4 x i32> %r
+; CHECK: test2
+; CHECK: cmpnle
+; CHECK-NEXT: orps
+; CHECK: ret
+}
+
+define <4 x i32> @test3(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> %c
+  ret <4 x i32> %r
+; CHECK: test3
+; CHECK: cmple
+; CHECK-NEXT: andps
+; CHECK: ret
+}
+
+define <4 x i32> @test4(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+; CHECK: test4
+; CHECK: cmple
+; CHECK-NEXT: orps
+; CHECK: ret
+}
+
+define <4 x i32> @test5(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+; CHECK: test5
+; CHECK: cmpnle
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test6(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+; CHECK: test6
+; CHECK: cmple
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test7(<4 x float> %a, <4 x float> %b, <4 x i32>* %p) {
+  %f = fcmp ult <4 x float> %a, %b
+  %s = sext <4 x i1> %f to <4 x i32>
+  %l = load <4 x i32>* %p
+  %r = and <4 x i32> %l, %s
+  ret <4 x i32> %r
+; CHECK: test7
+; CHECK: cmpnle
+; CHECK-NEXT: andps
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 2af355905dc3..ceb79ea927a1 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux   -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
+; RUN: llc < %s -mtriple=i686-linux   -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
 
 define void @t1(i32 %x) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
index f364d1fc2dc8..333c4663eb0b 100644
--- a/test/CodeGen/X86/sincos-opt.ll
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -4,6 +4,7 @@
 
 ; Combine sin / cos into a single call.
 ; rdar://13087969
+; rdar://13599493
 
 define float @test1(float %x) nounwind {
 entry:
@@ -14,7 +15,8 @@ entry:
 
 ; OSX_SINCOS: test1:
 ; OSX_SINCOS: callq ___sincosf_stret
-; OSX_SINCOS: addss %xmm1, %xmm0
+; OSX_SINCOS: pshufd $1, %xmm0, %xmm1
+; OSX_SINCOS: addss %xmm0, %xmm1
 
 ; OSX_NOOPT: test1
 ; OSX_NOOPT: callq _cosf
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 649cd61ab78c..2aca5b897d35 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true -schedmodel=false | FileCheck %s
 
 ; Currently, floating-point selects are lowered to CFG triangles.
 ; This means that one side of the select is always unconditionally
diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll
index a7c2517e7dbe..73826ed0b29d 100644
--- a/test/CodeGen/X86/stdcall.ll
+++ b/test/CodeGen/X86/stdcall.ll
@@ -1,16 +1,24 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple="i386-pc-mingw32" < %s | FileCheck %s
 ; PR5851
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-mingw32"
-
 %0 = type { void (...)* }
 
-@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
-; CHECK: _B:
-; CHECK: .long _MyFunc@0
-
 define internal x86_stdcallcc void @MyFunc() nounwind {
 entry:
+; CHECK: MyFunc@0:
+; CHECK: ret
   ret void
 }
+
+; PR14410
+define x86_stdcallcc i32 @"\01DoNotMangle"(i32 %a) {
+; CHECK: DoNotMangle:
+; CHECK: ret $4
+entry:
+  ret i32 %a
+}
+
+@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
+; CHECK: _B:
+; CHECK: .long _MyFunc@0
+
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
deleted file mode 100644
index 0507cb890cd2..000000000000
--- a/test/CodeGen/X86/thiscall-struct-return.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -mtriple=i386-PC-Win32 | FileCheck %s
-
-%class.C = type { i8 }
-%struct.S = type { i32 }
-%struct.M = type { i32, i32 }
-
-declare void @_ZN1CC1Ev(%class.C* %this) unnamed_addr nounwind align 2
-declare x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* noalias sret %agg.result, %class.C* %this) nounwind align 2
-declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result, %class.C* %this) nounwind align 2
-
-define void @testv() nounwind {
-; CHECK: testv:
-; CHECK: leal 16(%esp), %esi
-; CHECK-NEXT: movl	%esi, (%esp)
-; CHECK-NEXT: calll _ZN1CC1Ev
-; CHECK: leal 8(%esp), %eax
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: calll _ZNK1C5SmallEv
-entry:
-  %c = alloca %class.C, align 1
-  %tmp = alloca %struct.S, align 4
-  call void @_ZN1CC1Ev(%class.C* %c)
-  ; This call should put the return structure as a pointer
-  ; into EAX instead of returning directly in EAX.  The this
-  ; pointer should go into ECX
-  call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %tmp, %class.C* %c)
-  ret void
-}
-
-define void @test2v() nounwind {
-; CHECK: test2v:
-; CHECK: leal 16(%esp), %esi
-; CHECK-NEXT: movl	%esi, (%esp)
-; CHECK-NEXT: calll _ZN1CC1Ev
-; CHECK: leal 8(%esp), %eax
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: calll _ZNK1C6MediumEv
-entry:
-  %c = alloca %class.C, align 1
-  %tmp = alloca %struct.M, align 4
-  call void @_ZN1CC1Ev(%class.C* %c)
-  ; This call should put the return structure as a pointer
-  ; into EAX instead of returning directly in EAX/EDX.  The this
-  ; pointer should go into ECX
-  call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %tmp, %class.C* %c)
-  ret void
-}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
index e8a79bfa6ee3..8cdecd81bff5 100644
--- a/test/CodeGen/X86/tls.ll
+++ b/test/CodeGen/X86/tls.ll
@@ -22,13 +22,13 @@ define i32 @f1() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f1:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -49,13 +49,13 @@ define i32* @f2() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f2:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -75,13 +75,13 @@ define i32 @f3() nounwind {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i2@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f3:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -102,13 +102,13 @@ define i32* @f4() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i2@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f4:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -126,13 +126,13 @@ define i32 @f5() nounwind {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i3@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f5:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -153,13 +153,13 @@ define i32* @f6() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i3@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f6:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -234,14 +234,14 @@ define i16 @f11() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movzwl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: # kill
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f11:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: # kill
 ; X64_WIN-NEXT: ret
 
@@ -261,13 +261,13 @@ define i32 @f12() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movswl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f12:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -287,13 +287,13 @@ define i8 @f13() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movb _b1@SECREL(%eax), %al
+; X32_WIN-NEXT: movb _b1@SECREL32(%eax), %al
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f13:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movb b1@SECREL(%rax), %al
+; X64_WIN-NEXT: movb b1@SECREL32(%rax), %al
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -312,13 +312,13 @@ define i32 @f14() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movsbl _b1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f14:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index b89372a3c943..e02e3b54752b 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -21,9 +21,9 @@ entry:
 !llvm.dbg.cu = !{!3}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"producer", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"producer", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index b6d91a3f770e..85d8b2cea367 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -65,3 +65,139 @@ define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
 	%D = sext <2 x i1> %C to <2 x i64>
 	ret <2 x i64> %D
 }
+
+define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test7:
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp sgt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test8:
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp slt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test9:
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp sge <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test10:
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp sle <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test11(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test11:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp ugt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test12(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test12:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp ult <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test13:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp uge <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test14:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp ule <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
index e4a8f46cbc79..863712ff48b3 100644
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
 
 ; PR11674
 define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll
new file mode 100644
index 000000000000..d93f33ba0e58
--- /dev/null
+++ b/test/CodeGen/X86/wide-fma-contraction.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+; CHECK: fmafunc
+define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+  %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
+  ret <16 x float> %ret
+}
+
+declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+
+
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
index 878c6db99286..2bfe5fb1007b 100644
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -1,28 +1,126 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
-; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; We specify -mcpu explicitly to avoid instruction reordering that happens on
+; some setups (e.g., Atom) from affecting the output.
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
-; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
 
 ; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
 ; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
 ; arguments are caller-cleanup like normal arguments.
 
-define void @sret1(i8* sret) nounwind {
+define void @sret1(i8* sret %x) nounwind {
 entry:
-; WIN_X32:    {{ret$}}
-; MINGW_X32:  ret $4
+; WIN32:      sret1
+; WIN32:      movb $42, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret1
+; MINGW_X86:  ret $4
+
+; LINUX:      sret1
 ; LINUX:      ret $4
+
+  store i8 42, i8* %x, align 4
   ret void
 }
 
-define void @sret2(i32* sret %x, i32 %y) nounwind {
+define void @sret2(i8* sret %x, i8 %y) nounwind {
 entry:
-; WIN_X32:    {{ret$}}
-; MINGW_X32:  ret $4
+; WIN32:      sret2
+; WIN32:      movb {{.*}}, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret2
+; MINGW_X86:  ret $4
+
+; LINUX:      sret2
 ; LINUX:      ret $4
-  store i32 %y, i32* %x
+
+  store i8 %y, i8* %x
   ret void
 }
 
+define void @sret3(i8* sret %x, i8* %y) nounwind {
+entry:
+; WIN32:      sret3
+; WIN32:      movb $42, (%eax)
+; WIN32-NOT:  movb $13, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret3
+; MINGW_X86:  ret $4
+
+; LINUX:      sret3
+; LINUX:      ret $4
+
+  store i8 42, i8* %x
+  store i8 13, i8* %y
+  ret void
+}
+
+; PR15556
+%struct.S4 = type { i32, i32, i32 }
+
+define void @sret4(%struct.S4* noalias sret %agg.result) {
+entry:
+; WIN32:     sret4
+; WIN32:     movl $42, (%eax)
+; WIN32-NOT: popl %eax
+; WIN32:   {{ret$}}
+
+; MINGW_X86: sret4
+; MINGW_X86: ret $4
+
+; LINUX:     sret4
+; LINUX:     ret $4
+
+  %x = getelementptr inbounds %struct.S4* %agg.result, i32 0, i32 0
+  store i32 42, i32* %x, align 4
+  ret void
+}
+
+%struct.S5 = type { i32 }
+%class.C5 = type { i8 }
+
+define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* noalias sret %agg.result, %class.C5* %this) {
+entry:
+  %this.addr = alloca %class.C5*, align 4
+  store %class.C5* %this, %class.C5** %this.addr, align 4
+  %this1 = load %class.C5** %this.addr
+  %x = getelementptr inbounds %struct.S5* %agg.result, i32 0, i32 0
+  store i32 42, i32* %x, align 4
+  ret void
+; WIN32:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
+
+; The address of the return structure is passed as an implicit parameter.
+; In the -O0 build, %eax is spilled at the beginning of the function, hence we
+; should match both 4(%esp) and 8(%esp).
+; WIN32:     {{[48]}}(%esp), %eax
+; WIN32:     movl $42, (%eax)
+; WIN32:     ret $4
+}
+
+define void @call_foo5() {
+entry:
+  %c = alloca %class.C5, align 1
+  %s = alloca %struct.S5, align 4
+  call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* sret %s, %class.C5* %c)
+; WIN32:      {{^}}_call_foo5:
+
+; Load the address of the result and put it onto stack
+; (through %ecx in the -O0 build).
+; WIN32:      leal {{[0-9]+}}(%esp), %eax
+; WIN32:      movl %eax, (%e{{[sc][px]}})
+
+; The this pointer goes to ECX.
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
+; WIN32:      ret
+  ret void
+}
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
index 7f8ae07463a6..14591248f354 100644
--- a/test/CodeGen/X86/win_ftol2.ll
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -63,9 +63,9 @@ define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
 
   %1 = fdiv double %x, %y
   %2 = fsub double %x, %z
-  %3 = fptoui double %1 to i64
-  %4 = fptoui double %2 to i64
-  %5 = sub i64 %3, %4
+  %3 = fptoui double %2 to i64
+  %4 = fptoui double %1 to i64
+  %5 = sub i64 %4, %3
   ret i64 %5
 }
 
@@ -121,9 +121,9 @@ define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
 ; FTOL_2: calll __ftol2
 ;; stack is %x
 
-  %1 = fptoui double %x to i64
-  %2 = fptoui double %y to i64
-  %3 = sub i64 %1, %2
+  %1 = fptoui double %y to i64
+  %2 = fptoui double %x to i64
+  %3 = sub i64 %2, %1
   %4 = insertvalue {double, i64} undef, double %x, 0
   %5 = insertvalue {double, i64} %4, i64 %3, 1
   ret {double, i64} %5
diff --git a/test/CodeGen/X86/xtest.ll b/test/CodeGen/X86/xtest.ll
new file mode 100644
index 000000000000..e85565edcd55
--- /dev/null
+++ b/test/CodeGen/X86/xtest.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 -mattr=+rtm | FileCheck %s
+
+declare i32 @llvm.x86.xtest() nounwind
+
+define i32 @test_xtest() nounwind uwtable {
+entry:
+  %0 = tail call i32 @llvm.x86.xtest() nounwind
+  ret i32 %0
+; CHECK: test_xtest
+; CHECK: xtest
+}
diff --git a/test/CodeGen/XCore/DbgValueOtherTargets.test b/test/CodeGen/XCore/DbgValueOtherTargets.test
index 93ab7e0f1591..7c2ecd0312c6 100644
--- a/test/CodeGen/XCore/DbgValueOtherTargets.test
+++ b/test/CodeGen/XCore/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=xcore -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=xcore -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
index f8726af57f79..8756f37fe8a1 100644
--- a/test/CodeGen/XCore/lit.local.cfg
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'XCore' in targets:
diff --git a/test/CodeGen/XCore/global_negative_offset.ll b/test/CodeGen/XCore/offset_folding.ll
similarity index 58%
rename from test/CodeGen/XCore/global_negative_offset.ll
rename to test/CodeGen/XCore/offset_folding.ll
index 0328fb0460f3..30edfe695c3f 100644
--- a/test/CodeGen/XCore/global_negative_offset.ll
+++ b/test/CodeGen/XCore/offset_folding.ll
@@ -1,23 +1,40 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
 
-; Don't fold negative offsets into cp / dp accesses to avoid a relocation
-; error if the address + addend is less than the start of the cp / dp.
-
 @a = external constant [0 x i32], section ".cp.rodata"
 @b = external global [0 x i32]
 
-define i32 *@f() nounwind {
+define i32 *@f1() nounwind {
+entry:
+; CHECK: f1:
+; CHECK: ldaw r11, cp[a+4]
+; CHECK: mov r0, r11
+	%0 = getelementptr [0 x i32]* @a, i32 0, i32 1
+	ret i32* %0
+}
+
+define i32 *@f2() nounwind {
+entry:
+; CHECK: f2:
+; CHECK: ldaw r0, dp[b+4]
+	%0 = getelementptr [0 x i32]* @b, i32 0, i32 1
+	ret i32* %0
+}
+
+; Don't fold negative offsets into cp / dp accesses to avoid a relocation
+; error if the address + addend is less than the start of the cp / dp.
+
+define i32 *@f3() nounwind {
 entry:
-; CHECK: f:
+; CHECK: f3:
 ; CHECK: ldaw r11, cp[a]
 ; CHECK: sub r0, r11, 4
 	%0 = getelementptr [0 x i32]* @a, i32 0, i32 -1
 	ret i32* %0
 }
 
-define i32 *@g() nounwind {
+define i32 *@f4() nounwind {
 entry:
-; CHECK: g:
+; CHECK: f4:
 ; CHECK: ldaw [[REG:r[0-9]+]], dp[b]
 ; CHECK: sub r0, [[REG]], 4
 	%0 = getelementptr [0 x i32]* @b, i32 0, i32 -1
diff --git a/test/CodeGen/XCore/unaligned_load.ll b/test/CodeGen/XCore/unaligned_load.ll
index 0ee8e1c32667..772a847bd220 100644
--- a/test/CodeGen/XCore/unaligned_load.ll
+++ b/test/CodeGen/XCore/unaligned_load.ll
@@ -1,20 +1,19 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl __misaligned_load" %t1.s | count 1
-; RUN: grep ld16s %t1.s | count 2
-; RUN: grep ldw %t1.s | count 2
-; RUN: grep shl %t1.s | count 2
-; RUN: grep shr %t1.s | count 1
-; RUN: grep zext %t1.s | count 1
-; RUN: grep "or " %t1.s | count 2
+; RUN: llc < %s -march=xcore | FileCheck %s
 
-; Byte aligned load. Expands to call to __misaligned_load.
+; Byte aligned load.
+; CHECK: align1
+; CHECK: bl __misaligned_load
 define i32 @align1(i32* %p) nounwind {
 entry:
 	%0 = load i32* %p, align 1		; <i32> [#uses=1]
 	ret i32 %0
 }
 
-; Half word aligned load. Expands to two 16bit loads.
+; Half word aligned load.
+; CHECK: align2:
+; CHECK: ld16s
+; CHECK: ld16s
+; CHECK: or
 define i32 @align2(i32* %p) nounwind {
 entry:
 	%0 = load i32* %p, align 2		; <i32> [#uses=1]
@@ -23,7 +22,11 @@ entry:
 
 @a = global [5 x i8] zeroinitializer, align 4
 
-; Constant offset from word aligned base. Expands to two 32bit loads.
+; Constant offset from word aligned base.
+; CHECK: align3:
+; CHECK: ldw {{r[0-9]+}}, dp
+; CHECK: ldw {{r[0-9]+}}, dp
+; CHECK: or
 define i32 @align3() nounwind {
 entry:
 	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
diff --git a/test/CodeGen/XCore/unaligned_store.ll b/test/CodeGen/XCore/unaligned_store.ll
index 62078e6f6077..94e1852faea7 100644
--- a/test/CodeGen/XCore/unaligned_store.ll
+++ b/test/CodeGen/XCore/unaligned_store.ll
@@ -1,16 +1,18 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl __misaligned_store" %t1.s | count 1
-; RUN: grep st16 %t1.s | count 2
-; RUN: grep shr %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
-; Byte aligned store. Expands to call to __misaligned_store.
+; Byte aligned store.
+; CHECK: align1:
+; CHECK: bl __misaligned_store
 define void @align1(i32* %p, i32 %val) nounwind {
 entry:
 	store i32 %val, i32* %p, align 1
 	ret void
 }
 
-; Half word aligned store. Expands to two 16bit stores.
+; Half word aligned store.
+; CHECK: align2
+; CHECK: st16
+; CHECK: st16
 define void @align2(i32* %p, i32 %val) nounwind {
 entry:
 	store i32 %val, i32* %p, align 2
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
index a7b43b3247ff..5bfca21b3ecb 100644
--- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll
+++ b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
 !dbg = !{!0}
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !1, i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} 
+!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !1, i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 41, metadata !4} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 21, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !3 = metadata !{null}
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index 6f6ccd6066a0..13bd31039522 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -9,10 +9,10 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"fb.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
index e862eb0b284c..83d6ac28223e 100644
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ b/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -12,10 +12,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"cf.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
index 1f7a889c7d70..4f491035b72a 100644
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -1,12 +1,17 @@
-; RUN: llvm-as < %s | opt -verify -disable-output
+; RUN: llvm-as < %s | opt -verify -S -asm-verbose | FileCheck %s
+
+; CHECK: lang 0x8001
 
 define void @Foo(i32 %a, i32 %b) {
 entry:
   call void @llvm.dbg.declare(metadata !{i32* null}, metadata !1)
   ret void
 }
-
+!llvm.dbg.cu = !{!2}
+!2 = metadata !{i32 786449, metadata !4, i32 32769, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [lang 0x8001]
+!3 = metadata !{}
 !0 = metadata !{i32 662302, i32 26, metadata !1, null}
 !1 = metadata !{i32 4, metadata !"foo"}
+!4 = metadata !{metadata !"scratch.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll
index 65e1143e2d5d..06c2367913da 100644
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -41,24 +41,24 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !0 = metadata !{i32 786688, metadata !1, metadata !"s1", metadata !4, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !1 = metadata !{i32 786443, metadata !2, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
 !2 = metadata !{i32 786443, metadata !3, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", metadata !4, i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 786473, metadata !25} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, i32 0, i32 4, metadata !4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 786453, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786449, i32 4, metadata !4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786451, metadata !4, metadata !"S", metadata !10, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 786468, metadata !25, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786451, metadata !26, metadata !4, metadata !"S", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !10 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, i32 0, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", metadata !10, i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 786453, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786478, metadata !10, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !14 = metadata !{metadata !8, metadata !15}
-!15 = metadata !{i32 786447, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
 !16 = metadata !{i32 3, i32 0, metadata !1, null}
 !17 = metadata !{i32 3, i32 0, metadata !3, null}
 !18 = metadata !{i32 786689, metadata !12, metadata !"this", metadata !10, i32 3, metadata !19, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786470, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_const_type ]
-!20 = metadata !{i32 786447, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!19 = metadata !{i32 786470, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_const_type ]
+!20 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
 !21 = metadata !{i32 3, i32 0, metadata !12, null}
 !22 = metadata !{i32 3, i32 0, metadata !23, null}
 !23 = metadata !{i32 786443, metadata !12, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index 949ebdd7f9eb..dd98db904509 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -55,18 +55,18 @@ entry:
 
 !0 = metadata !{i32 786688, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
 !1 = metadata !{i32 786443, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 786478, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786478, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 786473, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, i32 0, i32 4, metadata !3, metadata !"clang 1.5", i1 false, metadata !"", i32 0, null, null, metadata !37, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang 1.5", i1 false, metadata !"", i32 0, null, null, metadata !37, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786434, metadata !3, metadata !"B", metadata !3, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ]
+!7 = metadata !{i32 786468, metadata !3, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786434, metadata !3, metadata !3, metadata !"B", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 786478, i32 0, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !7, metadata !13}
-!13 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786447, metadata !3, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 16, i32 5, metadata !1, null}
 !15 = metadata !{i32 17, i32 3, metadata !1, null}
 !16 = metadata !{i32 18, i32 1, metadata !2, null}
@@ -74,12 +74,12 @@ entry:
 !18 = metadata !{i32 4, i32 7, metadata !10, null}
 !19 = metadata !{i32 786688, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21, i32 0, null} ; [ DW_TAG_auto_variable ]
 !20 = metadata !{i32 786443, metadata !10, i32 4, i32 12} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{i32 786434, metadata !10, metadata !"A", metadata !3, i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ]
+!21 = metadata !{i32 786434, metadata !3, metadata !10, metadata !"A", i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ]
 !22 = metadata !{metadata !23}
-!23 = metadata !{i32 786478, i32 0, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{i32 786478, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !25 = metadata !{metadata !7, metadata !26}
-!26 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
+!26 = metadata !{i32 786447, metadata !3, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
 !27 = metadata !{i32 9, i32 7, metadata !20, null}
 !28 = metadata !{i32 786688, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !29 = metadata !{i32 10, i32 9, metadata !20, null}
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
index bdfa6e601252..f9e90cd1b3d1 100644
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -23,9 +23,9 @@ return:                                           ; preds = %entry
 !9 = metadata !{metadata !1}
 
 !0 = metadata !{i32 2, i32 0, metadata !1, null}
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
index 35af20c097c3..75e4389afef8 100644
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ b/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -32,17 +32,17 @@ return:
 
 !0 = metadata !{i32 3, i32 0, metadata !1, null}
 !1 = metadata !{i32 786443, metadata !2, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 786478, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786478, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, i32 0, i32 1, metadata !3, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !16, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786449, i32 1, metadata !3, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !16, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 3, i32 0, metadata !9, null}
 !9 = metadata !{i32 786443, metadata !10, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 786478, i32 0, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786478, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 786473, metadata !"b.c", metadata !"/tmp/", metadata !12} ; [ DW_TAG_file_type ]
-!12 = metadata !{i32 786449, i32 0, i32 1, metadata !11, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!12 = metadata !{i32 786449, i32 1, metadata !11, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !13 = metadata !{i32 786453, metadata !11, metadata !"", metadata !11, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786468, metadata !11, metadata !"int", metadata !11, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index a305c573794e..e44362d4129e 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -23,25 +23,25 @@ entry:
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !25, metadata !26, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !25, metadata !26, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5}
-!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, metadata !27, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !5}
 !9 = metadata !{i32 786689, metadata !0, metadata !"j", metadata !1, i32 9, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 786688, metadata !11, metadata !"xyz", metadata !1, i32 10, metadata !12, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !0, i32 9, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 786451, metadata !0, metadata !"X", metadata !1, i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{i32 786443, metadata !1, metadata !0, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 786451, metadata !27, metadata !0, metadata !"X", i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ]
 !13 = metadata !{metadata !14, metadata !15}
-!14 = metadata !{i32 786445, metadata !12, metadata !"a", metadata !1, i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!15 = metadata !{i32 786445, metadata !12, metadata !"b", metadata !1, i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"a", i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!15 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"b", i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
 !16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 5, metadata !5, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ]
 !17 = metadata !{i32 15, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !6, i32 14, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !1, metadata !6, i32 14, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !19 = metadata !{i32 9, i32 0, metadata !0, metadata !17}
 !20 = metadata !{null}
 !21 = metadata !{i32 9, i32 0, metadata !11, metadata !17}
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/2010-10-01-crash.ll
index e61f63f40d8f..c4161b49426d 100644
--- a/test/DebugInfo/2010-10-01-crash.ll
+++ b/test/DebugInfo/2010-10-01-crash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O0 %s -o /dev/null
+; XFAIL: hexagon
 ; PR 8235
 
 define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp {
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
index 06e8e2424dbb..673c789fe62c 100644
--- a/test/DebugInfo/AArch64/dwarfdump.ll
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -22,10 +22,10 @@ attributes #0 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3}
-!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"main", metadata !"main", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!3 = metadata !{i32 786478, metadata !4, metadata !"main", metadata !"main", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
 !4 = metadata !{i32 786473, metadata !9} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll
index 3c492a120846..ac3037e04b4b 100644
--- a/test/DebugInfo/AArch64/variable-loc.ll
+++ b/test/DebugInfo/AArch64/variable-loc.ll
@@ -69,24 +69,24 @@ declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !11, metadata !14}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
+!5 = metadata !{i32 786478, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
 !6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !10}
-!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{i32 786478, i32 0, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{i32 786478, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
 !12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{metadata !10, metadata !9, metadata !10}
-!14 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!14 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
 !15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{metadata !10}
 !17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
-!18 = metadata !{i32 786443, metadata !14, i32 18, i32 16, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
-!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!18 = metadata !{i32 786443, metadata !6, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
 !20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
 !22 = metadata !{i32 19, i32 7, metadata !18, null}
 !23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 space
similarity index 100%
rename from test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64
rename to test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 space
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index 1a60b8f11d8d..5169647fa41d 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -31,18 +31,18 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7, metadata !9}
-!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !2, metadata !"X", metadata !2, i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
-!9 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786451, metadata !2, metadata !"Y", metadata !2, i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"X", i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"Y", i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 786445, metadata !10, metadata !"x", metadata !2, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !18, metadata !10, metadata !"x", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !13 = metadata !{i32 7, i32 0, metadata !1, null}
 !14 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 7, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 7, i32 0, metadata !16, null}
diff --git a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
index 3aedf91a6b98..d05dfc6c32be 100644
--- a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
+++ b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
@@ -13,9 +13,9 @@ declare void @bar(i32)
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang 2.8", i1 false, metadata !"", i32 0, null, null, metadata !10, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, null, null, metadata !10, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786471, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201, null} ; [ DW_TAG_constant ]
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 12603bb1f31c..ad55db05a70e 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -18,20 +18,20 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !20} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !12 = metadata !{metadata !14}
 !14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB, null} ; [ DW_TAG_variable ]
 !15 = metadata !{i32 786688, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!16 = metadata !{i32 786443, metadata !5, i32 3, i32 9, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786443, metadata !20, metadata !5, i32 3, i32 9, i32 0} ; [ DW_TAG_lexical_block ]
 !17 = metadata !{i32 4, i32 9, metadata !16, null}
 !18 = metadata !{i32 4, i32 23, metadata !16, null}
 !19 = metadata !{i32 5, i32 5, metadata !16, null}
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index 1ce9db69a478..e248aa60295e 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -88,50 +88,50 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !9}
-!5 = metadata !{i32 720898, null, metadata !"bar", metadata !6, i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
+!5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
 !6 = metadata !{i32 720937, metadata !82} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8, metadata !19, metadata !21}
-!8 = metadata !{i32 720909, metadata !5, metadata !"b", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!9 = metadata !{i32 720898, null, metadata !"baz", metadata !6, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
+!8 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!9 = metadata !{i32 720898, metadata !82, null, metadata !"baz", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
 !10 = metadata !{metadata !11, metadata !13}
-!11 = metadata !{i32 720909, metadata !9, metadata !"h", metadata !6, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
-!12 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!13 = metadata !{i32 720942, i32 0, metadata !9, metadata !"baz", metadata !"baz", metadata !"", metadata !6, i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 720909, metadata !82, metadata !9, metadata !"h", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 720942, metadata !6, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ]
 !14 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !15 = metadata !{null, metadata !16, metadata !12}
 !16 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!19 = metadata !{i32 720909, metadata !5, metadata !"b_ref", metadata !6, i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
+!19 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b_ref", i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
 !20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!21 = metadata !{i32 720942, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 720942, metadata !6, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
 !22 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !23 = metadata !{null, metadata !24, metadata !12}
 !24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !27 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
-!29 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
+!29 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
 !30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !31 = metadata !{metadata !12, metadata !12, metadata !32}
-!32 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
-!33 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
-!34 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!32 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !35 = metadata !{metadata !36}
 !36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!37 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ]
 !38 = metadata !{metadata !39}
 !39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!40 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ]
+!40 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ]
 !41 = metadata !{metadata !42}
 !42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!43 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ]
+!43 = metadata !{i32 720942, metadata !6, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ]
 !44 = metadata !{metadata !45}
 !45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!46 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ]
+!46 = metadata !{i32 720942, metadata !6, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ]
 !47 = metadata !{metadata !48}
 !48 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index 6a68a055c32f..84e3f630976a 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -24,22 +24,22 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786445, metadata !11, metadata !"b", metadata !6, i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !20, metadata !11, metadata !"b", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 3, i32 13, metadata !5, null}
 !18 = metadata !{i32 4, i32 3, metadata !19, null}
-!19 = metadata !{i32 786443, metadata !5, i32 3, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
 !20 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index ecc96899d1ff..356360b09834 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -87,13 +87,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, null, null, metadata !21, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, null, null, metadata !21, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786688, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 786443, metadata !0, i32 4, i32 14, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 786443, metadata !1, metadata !0, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 6, i32 3, metadata !6, null}
 !9 = metadata !{metadata !"int", metadata !10}
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 37fb3068572c..a3ad26cf82bc 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -47,38 +47,38 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10, metadata !20}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !6, i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
 !6 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, i32 0, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", metadata !6, i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
 !11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !13}
 !13 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!14 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [from ]
+!14 = metadata !{i32 786434, metadata !37, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [from ]
 !15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 786445, metadata !14, metadata !"m_a", metadata !6, i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int]
-!17 = metadata !{i32 786478, i32 0, metadata !14, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
+!16 = metadata !{i32 786445, metadata !37, metadata !14, metadata !"m_a", i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int]
+!17 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!20 = metadata !{i32 786478, i32 0, null, metadata !"A", metadata !"A", metadata !"_ZN1AC2Ev", metadata !6, i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC2Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
+!20 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC2Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC2Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
 !21 = metadata !{i32 786688, metadata !22, metadata !"a", metadata !6, i32 8, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 8]
-!22 = metadata !{i32 786443, metadata !5, i32 7, i32 11, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
+!22 = metadata !{i32 786443, metadata !6, metadata !5, i32 7, i32 11, i32 0} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
 !23 = metadata !{i32 8, i32 5, metadata !22, null}
 !24 = metadata !{i32 8, i32 6, metadata !22, null}
 !25 = metadata !{i32 9, i32 3, metadata !22, null}
 !26 = metadata !{i32 786689, metadata !10, metadata !"this", metadata !6, i32 16777219, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 3]
-!27 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
 !28 = metadata !{i32 3, i32 3, metadata !10, null}
 !29 = metadata !{i32 3, i32 18, metadata !10, null}
 !30 = metadata !{i32 786689, metadata !20, metadata !"this", metadata !6, i32 16777219, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 3]
 !31 = metadata !{i32 3, i32 3, metadata !20, null}
 !32 = metadata !{i32 3, i32 9, metadata !33, null}
-!33 = metadata !{i32 786443, metadata !20, i32 3, i32 7, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
+!33 = metadata !{i32 786443, metadata !6, metadata !20, i32 3, i32 7, i32 1} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
 !34 = metadata !{i32 3, i32 18, metadata !33, null}
 !35 = metadata !{i32 7, i32 0, metadata !5, null}
 !36 = metadata !{i32 786689, metadata !5, metadata !"", metadata !6, i32 16777223, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 7]
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index d897ff2c4dbd..07849f352268 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -16,17 +16,17 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !27} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!11 = metadata !{i32 720942, i32 0, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ]
-!12 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!11 = metadata !{i32 720942, metadata !6, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 720898, metadata !27, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
 !13 = metadata !{metadata !11}
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
@@ -34,8 +34,8 @@ entry:
 !17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !18 = metadata !{metadata !20}
 !20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x, null} ; [ DW_TAG_variable ]
-!21 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
-!22 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
+!22 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !25 = metadata !{i32 6, i32 1, metadata !26, null}
 !26 = metadata !{i32 786443, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library"}
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index 460d92118b0c..f60175fb69aa 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -17,26 +17,26 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !17}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
+!7 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
 !8 = metadata !{metadata !9, metadata !11}
-!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15, i32 1} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786445, metadata !28, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15, i32 1} ; [ DW_TAG_subprogram ]
 !12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !13 = metadata !{null, metadata !14}
 !14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{metadata !16}
 !16 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b, null} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 786434, null, metadata !"B", metadata !6, i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ]
+!18 = metadata !{i32 786434, metadata !28, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ]
 !19 = metadata !{metadata !20, metadata !21, metadata !27}
-!20 = metadata !{i32 786445, metadata !18, metadata !"b", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
-!21 = metadata !{i32 786478, i32 0, metadata !18, metadata !"B", metadata !"B", metadata !"", metadata !6, i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786445, metadata !28, metadata !18, metadata !"b", i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
+!21 = metadata !{i32 786478, metadata !6, metadata !18, metadata !"B", metadata !"B", metadata !"", i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ]
 !22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !23 = metadata !{null, metadata !24}
 !24 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index 9990daac0622..a8f6cca750c0 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -26,15 +26,15 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786688, metadata !10, metadata !"x", metadata !6, i32 2, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !5, i32 1, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !6, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 2, i32 7, metadata !10, null}
 !13 = metadata !{i32 3, i32 1, metadata !10, null}
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index 111e9f6c518f..fadea775aadf 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -62,55 +62,55 @@ declare i32 @__objc_personality_v0(...)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!35, !36, !37, !38}
 
-!0 = metadata !{i32 786449, i32 0, i32 16, metadata !6, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !63} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786454, null, metadata !"dispatch_block_t", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_generic", metadata !6, i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786454, metadata !63, null, metadata !"dispatch_block_t", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_generic", i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19}
-!13 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
-!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
-!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
-!18 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
-!19 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
-!20 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 786451, metadata !6, metadata !"__block_descriptor", metadata !6, i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!13 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__reserved", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!18 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!19 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_descriptor", i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !22 = metadata !{metadata !23, metadata !25}
-!23 = metadata !{i32 786445, metadata !6, metadata !"reserved", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
-!24 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!25 = metadata !{i32 786445, metadata !6, metadata !"Size", metadata !6, i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
+!23 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
+!24 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
 !26 = metadata !{metadata !27}
 !27 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!28 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", metadata !6, i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26, i32 7} ; [ DW_TAG_subprogram ]
+!28 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26, i32 7} ; [ DW_TAG_subprogram ]
 !29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !30 = metadata !{null, metadata !14}
-!31 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", metadata !6, i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
+!31 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
 !32 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !33 = metadata !{null, metadata !14, metadata !14}
-!34 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", metadata !6, i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
+!34 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
 !35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
 !36 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
 !37 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
 !38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
 !39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!40 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
-!41 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_1", metadata !6, i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!40 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
+!41 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_1", i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50}
-!43 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
-!44 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
-!45 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
-!46 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
-!47 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
-!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
-!49 = metadata !{i32 786451, null, metadata !"__block_descriptor_withcopydispose", metadata !6, i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
-!50 = metadata !{i32 786445, metadata !6, metadata !"block", metadata !6, i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
+!43 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!44 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!45 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__reserved", i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!46 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!47 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
+!48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
+!49 = metadata !{i32 786451, metadata !63, null, metadata !"__block_descriptor_withcopydispose", i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
+!50 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"block", i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
 !51 = metadata !{i32 7, i32 18, metadata !28, null}
 !52 = metadata !{i32 7, i32 19, metadata !28, null}
 !53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ]
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index b036b5e6cff6..48e1defd4c95 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -34,41 +34,41 @@ declare void @_Z8moz_freePv(i8*)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
-!5 = metadata !{i32 720942, i32 0, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !59} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!12 = metadata !{i32 720942, i32 0, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 11} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ]
+!11 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 11} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ]
 !14 = metadata !{metadata !12, metadata !15}
-!15 = metadata !{i32 720942, i32 0, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", metadata !6, i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ]
 !16 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null, metadata !10}
 !18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !20 = metadata !{metadata !22}
 !22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ]
 !24 = metadata !{metadata !26}
 !26 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ]
+!27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ]
 !28 = metadata !{metadata !30}
 !30 = metadata !{i32 786689, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 720942, i32 0, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ]
+!31 = metadata !{i32 720942, metadata !6, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ]
 !32 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !33 = metadata !{metadata !9, metadata !34, metadata !9}
 !34 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
-!35 = metadata !{i32 786451, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!36 = metadata !{i32 720942, i32 0, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
-!37 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ]
+!35 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!36 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ]
 !38 = metadata !{metadata !39, metadata !40, metadata !36}
-!39 = metadata !{i32 786445, metadata !37, metadata !"mValue", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!40 = metadata !{i32 720942, i32 0, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", metadata !6, i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ]
+!39 = metadata !{i32 786445, metadata !59, metadata !37, metadata !"mValue", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!40 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ]
 !41 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !42 = metadata !{null, metadata !34}
 !43 = metadata !{metadata !45, metadata !46}
@@ -77,11 +77,11 @@ declare void @_Z8moz_freePv(i8*)
 !47 = metadata !{metadata !49}
 !49 = metadata !{i32 720948, i32 0, null, metadata !"mRefCnt", metadata !"mRefCnt", metadata !"", metadata !6, i32 9, metadata !37, i32 0, i32 1, i32* null, null} ; [ DW_TAG_variable ]
 !50 = metadata !{i32 5, i32 5, metadata !51, metadata !52}
-!51 = metadata !{i32 786443, metadata !31, i32 4, i32 29, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!51 = metadata !{i32 786443, metadata !6, metadata !31, i32 4, i32 29, i32 2} ; [ DW_TAG_lexical_block ]
 !52 = metadata !{i32 15, i32 0, metadata !53, null}
-!53 = metadata !{i32 786443, metadata !5, i32 14, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!53 = metadata !{i32 786443, metadata !6, metadata !5, i32 14, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
 !54 = metadata !{i32 19, i32 3, metadata !55, metadata !56}
-!55 = metadata !{i32 786443, metadata !27, i32 18, i32 41, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!55 = metadata !{i32 786443, metadata !6, metadata !27, i32 18, i32 41, i32 1} ; [ DW_TAG_lexical_block ]
 !56 = metadata !{i32 18, i32 41, metadata !23, metadata !52}
 !57 = metadata !{i32 19, i32 3, metadata !55, metadata !58}
 !58 = metadata !{i32 18, i32 41, metadata !23, null}
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index 436821394e34..e7a554ff868d 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -47,30 +47,30 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !42} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, null, null, metadata !39, metadata !40, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, null, null, metadata !39, metadata !40, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786454, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 786451, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786454, metadata !42, metadata !2, metadata !"S1", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !13 = metadata !{metadata !14, metadata !17}
-!14 = metadata !{i32 786445, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
-!15 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786445, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"m", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
+!15 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"nums", i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
 !18 = metadata !{i32 786689, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786484, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p, null} ; [ DW_TAG_variable ]
 !20 = metadata !{i32 7, i32 13, metadata !0, null}
 !21 = metadata !{i32 7, i32 21, metadata !0, null}
 !22 = metadata !{i32 9, i32 3, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !1, metadata !0, i32 8, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{metadata !"int", metadata !25}
 !25 = metadata !{metadata !"omnipotent char", metadata !26}
 !26 = metadata !{metadata !"Simple C/C++ TBAA", null}
@@ -80,7 +80,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !30 = metadata !{%struct.S1* @p}
 !31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
 !32 = metadata !{i32 16, i32 3, metadata !33, null}
-!33 = metadata !{i32 786443, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!33 = metadata !{i32 786443, metadata !1, metadata !6, i32 15, i32 15, i32 1} ; [ DW_TAG_lexical_block ]
 !34 = metadata !{i32 1}
 !35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
 !36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
diff --git a/test/DebugInfo/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
similarity index 58%
rename from test/DebugInfo/debug-info-block-captured-self.ll
rename to test/DebugInfo/X86/debug-info-block-captured-self.ll
index 2a5f324fefbe..77e02c62aada 100644
--- a/test/DebugInfo/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -77,26 +77,26 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 }
 
 !llvm.dbg.cu = !{!0}
-!0 = metadata !{i32 786449, i32 0, i32 16, metadata !1, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
+!0 = metadata !{i32 786449, i32 16, metadata !1, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
 !1 = metadata !{i32 786473, metadata !107} ; [ DW_TAG_file_type ]
 !2 = metadata !{metadata !3}
-!3 = metadata !{i32 786436, null, metadata !"", metadata !1, i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [from ]
+!3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [from ]
 !4 = metadata !{}
 !15 = metadata !{i32 0}
 !23 = metadata !{metadata !38, metadata !42}
-!27 = metadata !{i32 786454, null, metadata !"id", metadata !1, i32 31, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [id] [line 31, size 0, align 0, offset 0] [from ]
-!28 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!29 = metadata !{i32 786451, null, metadata !"objc_object", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!27 = metadata !{i32 786454, metadata !107, null, metadata !"id", i32 31, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [id] [line 31, size 0, align 0, offset 0] [from ]
+!28 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!29 = metadata !{i32 786451, metadata !107, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
 !30 = metadata !{metadata !31}
-!31 = metadata !{i32 786445, metadata !29, metadata !"isa", metadata !1, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
-!32 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!33 = metadata !{i32 786451, null, metadata !"objc_class", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
-!34 = metadata !{i32 786451, i32 0, metadata !"Main", metadata !1, i32 23, i64 0, i64 0, i32 0, i32 1092, i32 0, i32 0, i32 16} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [fwd] [from ]
-!38 = metadata !{i32 786478, i32 0, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"", metadata !1, i32 33, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", null, null, metadata !15, i32 33} ; [ DW_TAG_subprogram ] [line 33] [local] [def] [__24-[Main initWithContext:]_block_invoke]
-!39 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!31 = metadata !{i32 786445, metadata !107, metadata !29, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!32 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!33 = metadata !{i32 786451, metadata !107, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!34 = metadata !{i32 786451, metadata !107, null, metadata !"Main", i32 23, i64 0, i64 0, i32 0, i32 1092, i32 0, i32 0, i32 16} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [fwd] [from ]
+!38 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"", i32 33, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", null, null, metadata !15, i32 33} ; [ DW_TAG_subprogram ] [line 33] [local] [def] [__24-[Main initWithContext:]_block_invoke]
+!39 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !40 = metadata !{null, metadata !41, metadata !27}
-!41 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!42 = metadata !{i32 786478, i32 0, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"", metadata !1, i32 35, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, metadata !15, i32 35} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2]
+!41 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!42 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"", i32 35, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, metadata !15, i32 35} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2]
 !84 = metadata !{i32 33, i32 0, metadata !38, null}
 !86 = metadata !{i32 786688, metadata !38, metadata !"self", metadata !1, i32 41, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 41]
 !87 = metadata !{i32 41, i32 0, metadata !38, null}
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
new file mode 100644
index 000000000000..36ab61100856
--- /dev/null
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -0,0 +1,372 @@
+; RUN: llc -mtriple x86_64-apple-darwin -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
+
+; Generated from llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m
+; rdar://problem/9279956
+; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} )
+
+; CHECK: DW_AT_name{{.*}}_block_invoke
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: .block_descriptor
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_location
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name{{.*}}"self"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_type{{.*}}{[[APTR:.*]]}
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_artificial
+; CHECK-NOT: DW_TAG
+; 0x06 = DW_OP_deref
+; 0x23 = DW_OP_uconst
+; 0x91 = DW_OP_fbreg
+; CHECK: DW_AT_location{{.*}}91 {{[0-9]+}} 06 23 {{[0-9]+}} )
+; CHECK: DW_TAG_structure_type
+; CHECK: [[A:.*]]:   DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_APPLE_objc_complete_type
+; CHECK-NEXT: DW_AT_name{{.*}}"A"
+; CHECK: [[APTR]]:   DW_TAG_pointer_type [5]
+; CHECK-NEXT: {[[A]]}
+
+
+; ModuleID = 'llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+%0 = type opaque
+%1 = type opaque
+%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
+%struct._objc_cache = type opaque
+%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
+%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
+%struct._objc_method = type { i8*, i8*, i8* }
+%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
+%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8** }
+%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+%struct._prop_t = type { i8*, i8* }
+%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
+%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
+%struct._message_ref_t = type { i8*, i8* }
+%struct._objc_super = type { i8*, i8* }
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+
+@"OBJC_CLASS_$_A" = global %struct._class_t { %struct._class_t* @"OBJC_METACLASS_$_A", %struct._class_t* @"OBJC_CLASS_$_NSObject", %struct._objc_cache* @_objc_empty_cache, i8* (i8*, i8*)** @_objc_empty_vtable, %struct._class_ro_t* @"\01l_OBJC_CLASS_RO_$_A" }, section "__DATA, __objc_data", align 8
+@"\01L_OBJC_CLASSLIST_SUP_REFS_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_A", section "__DATA, __objc_superrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [5 x i8] c"init\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal externally_initialized global i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"OBJC_CLASS_$_NSMutableDictionary" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_NSMutableDictionary", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_METH_VAR_NAME_1" = internal global [6 x i8] c"alloc\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01l_objc_msgSend_fixup_alloc" = weak hidden global { i8* (i8*, %struct._message_ref_t*, ...)*, i8* } { i8* (i8*, %struct._message_ref_t*, ...)* @objc_msgSend_fixup, i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0) }, section "__DATA, __objc_msgrefs, coalesced", align 16
+@"\01L_OBJC_METH_VAR_NAME_2" = internal global [6 x i8] c"count\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01l_objc_msgSend_fixup_count" = weak hidden global { i8* (i8*, %struct._message_ref_t*, ...)*, i8* } { i8* (i8*, %struct._message_ref_t*, ...)* @objc_msgSend_fixup, i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_2", i32 0, i32 0) }, section "__DATA, __objc_msgrefs, coalesced", align 16
+@"OBJC_IVAR_$_A.ivar" = global i64 0, section "__DATA, __objc_ivar", align 8
+@_NSConcreteStackBlock = external global i8*
+@.str = private unnamed_addr constant [6 x i8] c"v8@?0\00", align 1
+@__block_descriptor_tmp = internal constant { i64, i64, i8*, i8*, i8*, i64 } { i64 0, i64 40, i8* bitcast (void (i8*, i8*)* @__copy_helper_block_ to i8*), i8* bitcast (void (i8*)* @__destroy_helper_block_ to i8*), i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i64 256 }
+@_objc_empty_cache = external global %struct._objc_cache
+@_objc_empty_vtable = external global i8* (i8*, i8*)*
+@"OBJC_METACLASS_$_NSObject" = external global %struct._class_t
+@"\01L_OBJC_CLASS_NAME_" = internal global [2 x i8] c"A\00", section "__TEXT,__objc_classname,cstring_literals", align 1
+@"\01l_OBJC_METACLASS_RO_$_A" = internal global %struct._class_ro_t { i32 1, i32 40, i32 40, i8* null, i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._ivar_list_t* null, i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
+@"OBJC_METACLASS_$_A" = global %struct._class_t { %struct._class_t* @"OBJC_METACLASS_$_NSObject", %struct._class_t* @"OBJC_METACLASS_$_NSObject", %struct._objc_cache* @_objc_empty_cache, i8* (i8*, i8*)** @_objc_empty_vtable, %struct._class_ro_t* @"\01l_OBJC_METACLASS_RO_$_A" }, section "__DATA, __objc_data", align 8
+@"OBJC_CLASS_$_NSObject" = external global %struct._class_t
+@"\01L_OBJC_METH_VAR_TYPE_" = internal global [8 x i8] c"@16@0:8\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+@"\01l_OBJC_$_INSTANCE_METHODS_A" = internal global { i32, i32, [1 x %struct._objc_method] } { i32 24, i32 1, [1 x %struct._objc_method] [%struct._objc_method { i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast (i8* (%0*, i8*)* @"\01-[A init]" to i8*) }] }, section "__DATA, __objc_const", align 8
+@"\01L_OBJC_METH_VAR_NAME_3" = internal global [5 x i8] c"ivar\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_METH_VAR_TYPE_4" = internal global [2 x i8] c"i\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+@"\01l_OBJC_$_INSTANCE_VARIABLES_A" = internal global { i32, i32, [1 x %struct._ivar_t] } { i32 32, i32 1, [1 x %struct._ivar_t] [%struct._ivar_t { i64* @"OBJC_IVAR_$_A.ivar", i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_METH_VAR_TYPE_4", i32 0, i32 0), i32 2, i32 4 }] }, section "__DATA, __objc_const", align 8
+@"\01l_OBJC_CLASS_RO_$_A" = internal global %struct._class_ro_t { i32 0, i32 0, i32 4, i8* null, i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct.__method_list_t* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_A" to %struct.__method_list_t*), %struct._objc_protocol_list* null, %struct._ivar_list_t* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* @"\01l_OBJC_$_INSTANCE_VARIABLES_A" to %struct._ivar_list_t*), i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_5" = internal global %struct._class_t* @"OBJC_CLASS_$_A", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_LABEL_CLASS_$" = internal global [1 x i8*] [i8* bitcast (%struct._class_t* @"OBJC_CLASS_$_A" to i8*)], section "__DATA, __objc_classlist, regular, no_dead_strip", align 8
+@llvm.used = appending global [14 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_2", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_A" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_METH_VAR_TYPE_4", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* @"\01l_OBJC_$_INSTANCE_VARIABLES_A" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
+
+define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 {
+  %1 = alloca %0*, align 8
+  %2 = alloca i8*, align 8
+  %3 = alloca %struct._objc_super
+  %4 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
+  store %0* %self, %0** %1, align 8
+  call void @llvm.dbg.declare(metadata !{%0** %1}, metadata !60), !dbg !62
+  store i8* %_cmd, i8** %2, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %2}, metadata !63), !dbg !62
+  %5 = load %0** %1, !dbg !65
+  %6 = bitcast %0* %5 to i8*, !dbg !65
+  %7 = getelementptr inbounds %struct._objc_super* %3, i32 0, i32 0, !dbg !65
+  store i8* %6, i8** %7, !dbg !65
+  %8 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_", !dbg !65
+  %9 = bitcast %struct._class_t* %8 to i8*, !dbg !65
+  %10 = getelementptr inbounds %struct._objc_super* %3, i32 0, i32 1, !dbg !65
+  store i8* %9, i8** %10, !dbg !65
+  %11 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !65, !invariant.load !67
+  %12 = call i8* bitcast (i8* (%struct._objc_super*, i8*, ...)* @objc_msgSendSuper2 to i8* (%struct._objc_super*, i8*)*)(%struct._objc_super* %3, i8* %11), !dbg !65
+  %13 = bitcast i8* %12 to %0*, !dbg !65
+  store %0* %13, %0** %1, align 8, !dbg !65
+  %14 = icmp ne %0* %13, null, !dbg !65
+  br i1 %14, label %15, label %24, !dbg !65
+
+; <label>:15                                      ; preds = %0
+  %16 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 0, !dbg !68
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %16, !dbg !68
+  %17 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 1, !dbg !68
+  store i32 -1040187392, i32* %17, !dbg !68
+  %18 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 2, !dbg !68
+  store i32 0, i32* %18, !dbg !68
+  %19 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 3, !dbg !68
+  store i8* bitcast (void (i8*)* @"__9-[A init]_block_invoke" to i8*), i8** %19, !dbg !68
+  %20 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 4, !dbg !68
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i64 }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %20, !dbg !68
+  %21 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !68
+  %22 = load %0** %1, align 8, !dbg !68
+  store %0* %22, %0** %21, align 8, !dbg !68
+  %23 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4 to void ()*, !dbg !68
+  call void @run(void ()* %23), !dbg !68
+  br label %24, !dbg !70
+
+; <label>:24                                      ; preds = %15, %0
+  %25 = load %0** %1, align 8, !dbg !71
+  %26 = bitcast %0* %25 to i8*, !dbg !71
+  ret i8* %26, !dbg !71
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare i8* @objc_msgSendSuper2(%struct._objc_super*, i8*, ...)
+
+define internal void @run(void ()* %block) #0 {
+  %1 = alloca void ()*, align 8
+  store void ()* %block, void ()** %1, align 8
+  call void @llvm.dbg.declare(metadata !{void ()** %1}, metadata !72), !dbg !73
+  %2 = load void ()** %1, align 8, !dbg !74
+  %3 = bitcast void ()* %2 to %struct.__block_literal_generic*, !dbg !74
+  %4 = getelementptr inbounds %struct.__block_literal_generic* %3, i32 0, i32 3, !dbg !74
+  %5 = bitcast %struct.__block_literal_generic* %3 to i8*, !dbg !74
+  %6 = load i8** %4, !dbg !74
+  %7 = bitcast i8* %6 to void (i8*)*, !dbg !74
+  call void %7(i8* %5), !dbg !74
+  ret void, !dbg !75
+}
+
+define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 {
+  %1 = alloca i8*, align 8
+  %2 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, align 8
+  %d = alloca %1*, align 8
+  store i8* %.block_descriptor, i8** %1, align 8
+  %3 = load i8** %1
+  call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !76), !dbg !88
+  call void @llvm.dbg.declare(metadata !{i8* %.block_descriptor}, metadata !76), !dbg !88
+  %4 = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !88
+  store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2, align 8, !dbg !88
+  %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !88
+  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2}, metadata !89), !dbg !90
+  call void @llvm.dbg.declare(metadata !{%1** %d}, metadata !91), !dbg !100
+  %6 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", !dbg !100
+  %7 = bitcast %struct._class_t* %6 to i8*, !dbg !100
+  %8 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !100
+  %9 = bitcast i8* %8 to i8* (i8*, i8*)*, !dbg !100
+  %10 = call i8* %9(i8* %7, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to i8*)), !dbg !100
+  %11 = bitcast i8* %10 to %1*, !dbg !100
+  %12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !100, !invariant.load !67
+  %13 = bitcast %1* %11 to i8*, !dbg !100
+  %14 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %13, i8* %12), !dbg !100
+  %15 = bitcast i8* %14 to %1*, !dbg !100
+  store %1* %15, %1** %d, align 8, !dbg !100
+  %16 = load %1** %d, align 8, !dbg !101
+  %17 = bitcast %1* %16 to i8*, !dbg !101
+  %18 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to %struct._message_ref_t*), i32 0, i32 0), !dbg !101
+  %19 = bitcast i8* %18 to i32 (i8*, i8*)*, !dbg !101
+  %20 = call i32 %19(i8* %17, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to i8*)), !dbg !101
+  %21 = add nsw i32 42, %20, !dbg !101
+  %22 = load %0** %5, align 8, !dbg !101
+  %23 = load i64* @"OBJC_IVAR_$_A.ivar", !dbg !101, !invariant.load !67
+  %24 = bitcast %0* %22 to i8*, !dbg !101
+  %25 = getelementptr inbounds i8* %24, i64 %23, !dbg !101
+  %26 = bitcast i8* %25 to i32*, !dbg !101
+  store i32 %21, i32* %26, align 4, !dbg !101
+  ret void, !dbg !90
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+declare i8* @objc_msgSend_fixup(i8*, %struct._message_ref_t*, ...)
+
+declare i8* @objc_msgSend(i8*, i8*, ...) #2
+
+define internal void @__copy_helper_block_(i8*, i8*) {
+  %3 = alloca i8*, align 8
+  %4 = alloca i8*, align 8
+  store i8* %0, i8** %3, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %3}, metadata !102), !dbg !103
+  store i8* %1, i8** %4, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %4}, metadata !104), !dbg !103
+  %5 = load i8** %4, !dbg !103
+  %6 = bitcast i8* %5 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
+  %7 = load i8** %3, !dbg !103
+  %8 = bitcast i8* %7 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
+  %9 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %6, i32 0, i32 5, !dbg !103
+  %10 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %8, i32 0, i32 5, !dbg !103
+  %11 = load %0** %9, !dbg !103
+  %12 = bitcast %0* %11 to i8*, !dbg !103
+  %13 = bitcast %0** %10 to i8*, !dbg !103
+  call void @_Block_object_assign(i8* %13, i8* %12, i32 3) #3, !dbg !103
+  ret void, !dbg !103
+}
+
+declare void @_Block_object_assign(i8*, i8*, i32)
+
+define internal void @__destroy_helper_block_(i8*) {
+  %2 = alloca i8*, align 8
+  store i8* %0, i8** %2, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %2}, metadata !105), !dbg !106
+  %3 = load i8** %2, !dbg !106
+  %4 = bitcast i8* %3 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !106
+  %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !106
+  %6 = load %0** %5, !dbg !106
+  %7 = bitcast %0* %6 to i8*, !dbg !106
+  call void @_Block_object_dispose(i8* %7, i32 3) #3, !dbg !106
+  ret void, !dbg !106
+}
+
+declare void @_Block_object_dispose(i8*, i32)
+
+define i32 @main() #0 {
+  %1 = alloca i32, align 4
+  %a = alloca %0*, align 8
+  store i32 0, i32* %1
+  call void @llvm.dbg.declare(metadata !{%0** %a}, metadata !107), !dbg !108
+  %2 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5", !dbg !108
+  %3 = bitcast %struct._class_t* %2 to i8*, !dbg !108
+  %4 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !108
+  %5 = bitcast i8* %4 to i8* (i8*, i8*)*, !dbg !108
+  %6 = call i8* %5(i8* %3, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to i8*)), !dbg !108
+  %7 = bitcast i8* %6 to %0*, !dbg !108
+  %8 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !108, !invariant.load !67
+  %9 = bitcast %0* %7 to i8*, !dbg !108
+  %10 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %9, i8* %8), !dbg !108
+  %11 = bitcast i8* %10 to %0*, !dbg !108
+  store %0* %11, %0** %a, align 8, !dbg !108
+  ret i32 0, !dbg !109
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nonlazybind }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!56, !57, !58, !59}
+
+!0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !12, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC]
+!1 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/<unknown>", metadata !"llvm/_build.ninja.Debug"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"A", i32 33, i64 32, i64 32, i32 0, i32 512, null, metadata !7, i32 16, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [from ]
+!5 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m", metadata !"llvm/_build.ninja.Debug"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!7 = metadata !{metadata !8, metadata !10}
+!8 = metadata !{i32 786460, null, metadata !4, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
+!9 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSObject", i32 21, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [from ]
+!10 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"ivar", i32 35, i64 32, i64 32, i64 0, i32 0, metadata !11, null} ; [ DW_TAG_member ] [ivar] [line 35, size 32, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13, metadata !27, metadata !31, metadata !35, metadata !36, metadata !39}
+!13 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"-[A init]", metadata !"-[A init]", metadata !"", i32 46, metadata !14, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*)* @"\01-[A init]", null, null, metadata !2, i32 46} ; [ DW_TAG_subprogram ] [line 46] [local] [def] [-[A init]]
+!14 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{metadata !16, metadata !23, metadata !24}
+!16 = metadata !{i32 786454, metadata !5, null, metadata !"id", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [id] [line 46, size 0, align 0, offset 0] [from ]
+!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!22 = metadata !{i32 786451, metadata !1, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!23 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!24 = metadata !{i32 786454, metadata !5, i32 0, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 64, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ]
+!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
+!26 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!27 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__9-[A init]_block_invoke", metadata !"__9-[A init]_block_invoke", metadata !"", i32 49, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @"__9-[A init]_block_invoke", null, null, metadata !2, i32 49} ; [ DW_TAG_subprogram ] [line 49] [local] [def] [__9-[A init]_block_invoke]
+!28 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{null, metadata !30}
+!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!31 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 52, metadata !33, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*, i8*)* @__copy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__copy_helper_block_]
+!32 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/<unknown>]
+!33 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = metadata !{null, metadata !30, metadata !30}
+!35 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 52, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*)* @__destroy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__destroy_helper_block_]
+!36 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !37, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 60} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 60] [main]
+!37 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !38, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!38 = metadata !{metadata !11}
+!39 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 39, metadata !40, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (void ()*)* @run, null, null, metadata !2, i32 40} ; [ DW_TAG_subprogram ] [line 39] [local] [def] [scope 40] [run]
+!40 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!41 = metadata !{null, metadata !42}
+!42 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !43} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_generic]
+!43 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_generic", i32 40, i64 256, i64 0, i32 0, i32 8, null, metadata !44, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [from ]
+!44 = metadata !{metadata !45, metadata !46, metadata !47, metadata !48, metadata !49}
+!45 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ]
+!46 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 64] [from int]
+!47 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__reserved", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !11} ; [ DW_TAG_member ] [__reserved] [line 0, size 32, align 32, offset 96] [from int]
+!48 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 0, size 64, align 64, offset 128] [from ]
+!49 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 40, i64 64, i64 64, i64 192, i32 0, metadata !50} ; [ DW_TAG_member ] [__descriptor] [line 40, size 64, align 64, offset 192] [from ]
+!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_descriptor]
+!51 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_descriptor", i32 40, i64 128, i64 0, i32 0, i32 8, null, metadata !52, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [from ]
+!52 = metadata !{metadata !53, metadata !55}
+!53 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !54} ; [ DW_TAG_member ] [reserved] [line 0, size 64, align 64, offset 0] [from long unsigned int]
+!54 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!55 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !54} ; [ DW_TAG_member ] [Size] [line 0, size 64, align 64, offset 64] [from long unsigned int]
+!56 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!57 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!58 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!59 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!60 = metadata !{i32 786689, metadata !13, metadata !"self", metadata !32, i32 16777262, metadata !61, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [self] [line 46]
+!61 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!62 = metadata !{i32 46, i32 0, metadata !13, null}
+!63 = metadata !{i32 786689, metadata !13, metadata !"_cmd", metadata !32, i32 33554478, metadata !64, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [_cmd] [line 46]
+!64 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [from ]
+!65 = metadata !{i32 48, i32 0, metadata !66, null}
+!66 = metadata !{i32 786443, metadata !5, metadata !13, i32 47, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!67 = metadata !{}
+!68 = metadata !{i32 49, i32 0, metadata !69, null}
+!69 = metadata !{i32 786443, metadata !5, metadata !66, i32 48, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!70 = metadata !{i32 53, i32 0, metadata !69, null}
+!71 = metadata !{i32 54, i32 0, metadata !66, null}
+!72 = metadata !{i32 786689, metadata !39, metadata !"block", metadata !6, i32 16777255, metadata !42, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [block] [line 39]
+!73 = metadata !{i32 39, i32 0, metadata !39, null}
+!74 = metadata !{i32 41, i32 0, metadata !39, null}
+!75 = metadata !{i32 42, i32 0, metadata !39, null}
+!76 = metadata !{i32 786689, metadata !27, metadata !".block_descriptor", metadata !6, i32 16777265, metadata !77, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [.block_descriptor] [line 49]
+!77 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_1]
+!78 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_1", i32 49, i64 320, i64 64, i32 0, i32 0, null, metadata !79, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [from ]
+!79 = metadata !{metadata !80, metadata !81, metadata !82, metadata !83, metadata !84, metadata !87}
+!80 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 49, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 49, size 64, align 64, offset 0] [from ]
+!81 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 49, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 49, size 32, align 32, offset 64] [from int]
+!82 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__reserved", i32 49, i64 32, i64 32, i64 96, i32 0, metadata !11} ; [ DW_TAG_member ] [__reserved] [line 49, size 32, align 32, offset 96] [from int]
+!83 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 49, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 49, size 64, align 64, offset 128] [from ]
+!84 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 49, i64 64, i64 64, i64 192, i32 0, metadata !85} ; [ DW_TAG_member ] [__descriptor] [line 49, size 64, align 64, offset 192] [from ]
+!85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !86} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from __block_descriptor_withcopydispose]
+!86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [fwd] [from ]
+!87 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"self", i32 49, i64 64, i64 64, i64 256, i32 0, metadata !61} ; [ DW_TAG_member ] [self] [line 49, size 64, align 64, offset 256] [from ]
+!88 = metadata !{i32 49, i32 0, metadata !27, null}
+!89 = metadata !{i32 786688, metadata !27, metadata !"self", metadata !32, i32 52, metadata !23, i32 0, i32 0, i64 2, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 52]
+!90 = metadata !{i32 52, i32 0, metadata !27, null}
+!91 = metadata !{i32 786688, metadata !92, metadata !"d", metadata !6, i32 50, metadata !93, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 50]
+!92 = metadata !{i32 786443, metadata !5, metadata !27, i32 49, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!93 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !94} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from NSMutableDictionary]
+!94 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSMutableDictionary", i32 30, i64 0, i64 8, i32 0, i32 0, null, metadata !95, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [from ]
+!95 = metadata !{metadata !96}
+!96 = metadata !{i32 786460, null, metadata !94, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !97} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSDictionary]
+!97 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSDictionary", i32 26, i64 0, i64 8, i32 0, i32 0, null, metadata !98, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [from ]
+!98 = metadata !{metadata !99}
+!99 = metadata !{i32 786460, null, metadata !97, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
+!100 = metadata !{i32 50, i32 0, metadata !92, null}
+!101 = metadata !{i32 51, i32 0, metadata !92, null}
+!102 = metadata !{i32 786689, metadata !31, metadata !"", metadata !32, i32 16777268, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
+!103 = metadata !{i32 52, i32 0, metadata !31, null}
+!104 = metadata !{i32 786689, metadata !31, metadata !"", metadata !32, i32 33554484, metadata !30, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
+!105 = metadata !{i32 786689, metadata !35, metadata !"", metadata !32, i32 16777268, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
+!106 = metadata !{i32 52, i32 0, metadata !35, null}
+!107 = metadata !{i32 786688, metadata !36, metadata !"a", metadata !6, i32 61, metadata !61, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 61]
+!108 = metadata !{i32 61, i32 0, metadata !36, null}
+!109 = metadata !{i32 62, i32 0, metadata !36, null}
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
index 7a4af0468d7c..50a2b3fa5163 100644
--- a/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -58,30 +58,30 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
+!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
 !6 = metadata !{i32 786473, metadata !33} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !12, metadata !27, metadata !28}
 !12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"a", metadata !"a", metadata !"_ZN1C1aE", metadata !6, i32 14, metadata !9, i32 0, i32 1, i32* @_ZN1C1aE, metadata !15} ; [ DW_TAG_variable ] [a] [line 14] [def]
-!13 = metadata !{i32 786434, null, metadata !"C", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [from ]
+!13 = metadata !{i32 786434, metadata !33, null, metadata !"C", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [from ]
 !14 = metadata !{metadata !15, metadata !16, metadata !19, metadata !20, metadata !23, metadata !24, metadata !26}
-!15 = metadata !{i32 786445, metadata !13, metadata !"a", metadata !6, i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int]
-!16 = metadata !{i32 786445, metadata !13, metadata !"const_a", metadata !6, i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ]
-!17 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from bool]
-!18 = metadata !{i32 786468, null, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
-!19 = metadata !{i32 786445, metadata !13, metadata !"b", metadata !6, i32 6, i64 0, i64 0, i64 0, i32 4098, metadata !9, null} ; [ DW_TAG_member ] [b] [line 6, size 0, align 0, offset 0] [protected] [static] [from int]
-!20 = metadata !{i32 786445, metadata !13, metadata !"const_b", metadata !6, i32 7, i64 0, i64 0, i64 0, i32 4098, metadata !21, float 0x40091EB860000000} ; [ DW_TAG_member ] [const_b] [line 7, size 0, align 0, offset 0] [protected] [static] [from ]
-!21 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from float]
-!22 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
-!23 = metadata !{i32 786445, metadata !13, metadata !"c", metadata !6, i32 9, i64 0, i64 0, i64 0, i32 4096, metadata !9, null} ; [ DW_TAG_member ] [c] [line 9, size 0, align 0, offset 0] [static] [from int]
-!24 = metadata !{i32 786445, metadata !13, metadata !"const_c", metadata !6, i32 10, i64 0, i64 0, i64 0, i32 4096, metadata !25, i32 18} ; [ DW_TAG_member ] [const_c] [line 10, size 0, align 0, offset 0] [static] [from ]
-!25 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
-!26 = metadata !{i32 786445, metadata !13, metadata !"d", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [d] [line 11, size 32, align 32, offset 0] [from int]
+!15 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"a", i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int]
+!16 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_a", i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ]
+!17 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from bool]
+!18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!19 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"b", i32 6, i64 0, i64 0, i64 0, i32 4098, metadata !9, null} ; [ DW_TAG_member ] [b] [line 6, size 0, align 0, offset 0] [protected] [static] [from int]
+!20 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_b", i32 7, i64 0, i64 0, i64 0, i32 4098, metadata !21, float 0x40091EB860000000} ; [ DW_TAG_member ] [const_b] [line 7, size 0, align 0, offset 0] [protected] [static] [from ]
+!21 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from float]
+!22 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!23 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"c", i32 9, i64 0, i64 0, i64 0, i32 4096, metadata !9, null} ; [ DW_TAG_member ] [c] [line 9, size 0, align 0, offset 0] [static] [from int]
+!24 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_c", i32 10, i64 0, i64 0, i64 0, i32 4096, metadata !25, i32 18} ; [ DW_TAG_member ] [const_c] [line 10, size 0, align 0, offset 0] [static] [from ]
+!25 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!26 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"d", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [d] [line 11, size 32, align 32, offset 0] [from int]
 !27 = metadata !{i32 786484, i32 0, metadata !13, metadata !"b", metadata !"b", metadata !"_ZN1C1bE", metadata !6, i32 15, metadata !9, i32 0, i32 1, i32* @_ZN1C1bE, metadata !19} ; [ DW_TAG_variable ] [b] [line 15] [def]
 !28 = metadata !{i32 786484, i32 0, metadata !13, metadata !"c", metadata !"c", metadata !"_ZN1C1cE", metadata !6, i32 16, metadata !9, i32 0, i32 1, i32* @_ZN1C1cE, metadata !23} ; [ DW_TAG_variable ] [c] [line 16] [def]
 !29 = metadata !{i32 786688, metadata !5, metadata !"instance_C", metadata !6, i32 20, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [instance_C] [line 20]
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index 687bab4cc9b6..30e8c2e27430 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -58,36 +58,36 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !53, i32 4, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !31}
-!5 = metadata !{i32 786478, i32 0, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", metadata !6, i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
+!5 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
 !6 = metadata !{i32 786473, metadata !53} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
-!10 = metadata !{i32 786434, null, metadata !"D", metadata !6, i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [from ]
+!10 = metadata !{i32 786434, metadata !53, null, metadata !"D", i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [from ]
 !11 = metadata !{metadata !12, metadata !14, metadata !15, metadata !16, metadata !17, metadata !20}
-!12 = metadata !{i32 786445, metadata !10, metadata !"c1", metadata !6, i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int]
-!13 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{i32 786445, metadata !10, metadata !"c2", metadata !6, i32 7, i64 32, i64 32, i64 32, i32 1, metadata !13} ; [ DW_TAG_member ] [c2] [line 7, size 32, align 32, offset 32] [private] [from int]
-!15 = metadata !{i32 786445, metadata !10, metadata !"c3", metadata !6, i32 8, i64 32, i64 32, i64 64, i32 1, metadata !13} ; [ DW_TAG_member ] [c3] [line 8, size 32, align 32, offset 64] [private] [from int]
-!16 = metadata !{i32 786445, metadata !10, metadata !"c4", metadata !6, i32 9, i64 32, i64 32, i64 96, i32 1, metadata !13} ; [ DW_TAG_member ] [c4] [line 9, size 32, align 32, offset 96] [private] [from int]
-!17 = metadata !{i32 786478, i32 0, metadata !10, metadata !"D", metadata !"D", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [D]
+!12 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c1", i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int]
+!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c2", i32 7, i64 32, i64 32, i64 32, i32 1, metadata !13} ; [ DW_TAG_member ] [c2] [line 7, size 32, align 32, offset 32] [private] [from int]
+!15 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c3", i32 8, i64 32, i64 32, i64 64, i32 1, metadata !13} ; [ DW_TAG_member ] [c3] [line 8, size 32, align 32, offset 64] [private] [from int]
+!16 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c4", i32 9, i64 32, i64 32, i64 96, i32 1, metadata !13} ; [ DW_TAG_member ] [c4] [line 9, size 32, align 32, offset 96] [private] [from int]
+!17 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [D]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!20 = metadata !{i32 786478, i32 0, metadata !10, metadata !"D", metadata !"D", metadata !"", metadata !6, i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D]
+!20 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D]
 !21 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{null, metadata !9, metadata !23}
 !23 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!24 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
+!24 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !27 = metadata !{metadata !28}
 !28 = metadata !{metadata !29}
 !29 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777228, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 12]
-!30 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
-!31 = metadata !{i32 786478, i32 0, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2ERKS_", metadata !6, i32 19, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, metadata !20, metadata !32, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [D]
+!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
+!31 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2ERKS_", i32 19, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, metadata !20, metadata !32, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [D]
 !32 = metadata !{metadata !33}
 !33 = metadata !{metadata !34, metadata !35}
 !34 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777235, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 19]
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index d506e3ff4bdd..6e59915fe13e 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -59,30 +59,30 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"func", metadata !"func", metadata !"", metadata !6, i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"func", metadata !"func", metadata !"", i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
 !6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786688, metadata !11, metadata !"my_foo", metadata !6, i32 12, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_foo] [line 12]
-!11 = metadata !{i32 786443, metadata !5, i32 11, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c]
-!12 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [from ]
+!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c]
+!12 = metadata !{i32 786451, metadata !32, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [from ]
 !13 = metadata !{metadata !14, metadata !15}
-!14 = metadata !{i32 786445, metadata !12, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
-!15 = metadata !{i32 786445, metadata !12, metadata !"b", metadata !6, i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ]
-!16 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
+!14 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!15 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"b", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ]
+!16 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786465, i64 0, i64 1} ; [ DW_TAG_subrange_type ] [0, 1]
 !19 = metadata !{i32 12, i32 0, metadata !11, null}
 !20 = metadata !{i32 786688, metadata !11, metadata !"my_bar", metadata !6, i32 13, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_bar] [line 13]
-!21 = metadata !{i32 786451, null, metadata !"bar", metadata !6, i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [from ]
+!21 = metadata !{i32 786451, metadata !32, null, metadata !"bar", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [from ]
 !22 = metadata !{metadata !23, metadata !24}
-!23 = metadata !{i32 786445, metadata !21, metadata !"a", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int]
-!24 = metadata !{i32 786445, metadata !21, metadata !"b", metadata !6, i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ]
-!25 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!23 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"a", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int]
+!24 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"b", i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ]
+!25 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !26 = metadata !{metadata !27}
 !27 = metadata !{i32 786465, i64 0, i64 0} ; [ DW_TAG_subrange_type ] [0, 0]
 !28 = metadata !{i32 13, i32 0, metadata !11, null}
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index 0d8c245757dc..ace115610ebc 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -24,19 +24,19 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !7, metadata !"x", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
-!10 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
-!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] [unbound]
-!14 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
+!14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
 !15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17}
 !17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index 457f38e46487..6de15f6404cb 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -28,10 +28,10 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !19, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !9}
@@ -41,7 +41,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !12 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 5, i32 5, metadata !5, null}
 !14 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !6, i32 8, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !5, i32 7, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 786443, metadata !19, metadata !5, i32 7, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 8, i32 9, metadata !15, null}
 !17 = metadata !{i32 8, i32 18, metadata !15, null}
 !18 = metadata !{i32 9, i32 5, metadata !15, null}
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index b30b1c10d4f3..22728116d9bb 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -7,18 +7,18 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !22, i32 4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !3, metadata !8, metadata !12}
-!3 = metadata !{i32 786436, null, metadata !"A", metadata !4, i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !4 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786472, metadata !"A1", i64 1} ; [ DW_TAG_enumerator ]
-!8 = metadata !{i32 786436, null, metadata !"B", metadata !4, i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!9 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786436, metadata !4, null, metadata !"B", i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786472, metadata !"B1", i64 1} ; [ DW_TAG_enumerator ]
-!12 = metadata !{i32 786436, null, metadata !"C", metadata !4, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!12 = metadata !{i32 786436, metadata !4, null, metadata !"C", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !13 = metadata !{metadata !14}
 !14 = metadata !{i32 786472, metadata !"C1", i64 1} ; [ DW_TAG_enumerator ]
 !15 = metadata !{i32 0}
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index c2ab1bf355c2..33d807e30548 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -5,12 +5,12 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e, null} ; [ DW_TAG_variable ] [e] [line 2] [def]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786436, null, metadata !"E", metadata !6, i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ]
+!7 = metadata !{i32 786436, metadata !6, null, metadata !"E", i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ]
 
 ; CHECK: DW_TAG_enumeration_type
 ; CHECK-NEXT: DW_AT_name
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
index f72944995537..71381cec1158 100644
--- a/test/DebugInfo/X86/fission-cu.ll
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -1,16 +1,17 @@
 ; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
 ; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+; RUN: llvm-readobj --relocations %t | FileCheck --check-prefix=OBJ %s
 
 @a = common global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !8 = metadata !{metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp"}
 
 ; Check that the skeleton compile unit contains the proper attributes:
@@ -48,9 +49,9 @@
 ; CHECK: DW_AT_producer  DW_FORM_GNU_str_index
 ; CHECK: DW_AT_language  DW_FORM_data2
 ; CHECK: DW_AT_name      DW_FORM_GNU_str_index
-; CHECK: DW_AT_low_pc    DW_FORM_GNU_addr_index
-; CHECK: DW_AT_stmt_list DW_FORM_data4
-; CHECK: DW_AT_comp_dir  DW_FORM_GNU_str_index
+; CHECK-NOT: DW_AT_low_pc
+; CHECK-NOT: DW_AT_stmt_list
+; CHECK-NOT: DW_AT_comp_dir
 ; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
 
 ; CHECK: [2] DW_TAG_base_type    DW_CHILDREN_no
@@ -72,29 +73,40 @@
 ; CHECK: DW_AT_producer [DW_FORM_GNU_str_index] ( indexed (00000000) string = "clang version 3.3 (trunk 169021) (llvm/trunk 169020)")
 ; CHECK: DW_AT_language [DW_FORM_data2]        (0x000c)
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]    ( indexed (00000001) string = "baz.c")
-; CHECK: DW_AT_low_pc [DW_FORM_GNU_addr_index]     ( indexed (00000000) address = 0x0000000000000000)
+; CHECK-NOT: DW_AT_low_pc
+; CHECK-NOT: DW_AT_stmt_list
+; CHECK-NOT: DW_AT_comp_dir
 ; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
 ; CHECK: DW_TAG_base_type
-; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000004) string = "int")
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "int")
 ; CHECK: DW_TAG_variable
-; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "a")
-; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x001e => {0x0000001e})
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000002) string = "a")
+; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x0018 => {0x00000018})
 ; CHECK: DW_AT_external [DW_FORM_flag_present]   (true)
 ; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
 ; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
-; CHECK: DW_AT_location [DW_FORM_block1] (<0x02> fb 01 )
+; CHECK: DW_AT_location [DW_FORM_block1] (<0x02> fb 00 )
 
 
 ; CHECK: .debug_str.dwo contents:
 ; CHECK: 0x00000000: "clang version 3.3 (trunk 169021) (llvm/trunk 169020)"
 ; CHECK: 0x00000035: "baz.c"
-; CHECK: 0x0000003b: "/usr/local/google/home/echristo/tmp"
-; CHECK: 0x0000005f: "a"
-; CHECK: 0x00000061: "int"
+; CHECK: 0x0000003b: "a"
+; CHECK: 0x0000003d: "int"
 
 ; CHECK: .debug_str_offsets.dwo contents:
 ; CHECK: 0x00000000: 00000000
 ; CHECK: 0x00000004: 00000035
 ; CHECK: 0x00000008: 0000003b
-; CHECK: 0x0000000c: 0000005f
-; CHECK: 0x00000010: 00000061
+; CHECK: 0x0000000c: 0000003d
+
+; Object file checks
+; For x86-64-linux we should have this set of relocations for the debug info section
+;
+; OBJ: .debug_info
+; OBJ-NEXT: R_X86_64_32 .debug_abbrev
+; OBJ-NEXT: R_X86_64_32 .debug_str
+; OBJ-NEXT: R_X86_64_32 .debug_addr
+; OBJ-NEXT: R_X86_64_32 .debug_line
+; OBJ-NEXT: R_X86_64_32 .debug_str
+; OBJ-NEXT: }
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
index 42875eed0f27..0c0a7ab51d4b 100644
--- a/test/DebugInfo/X86/line-info.ll
+++ b/test/DebugInfo/X86/line-info.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin -filetype=obj -O0 < %s > %t
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 
-; CHECK: 2      0      1   0  is_stmt
+; CHECK: [[FILEID:[0-9]+]]]{{.*}}list0.h
+; CHECK: [[FILEID]]      0      1   0  is_stmt{{$}}
 
 ; IR generated from clang -g -emit-llvm with the following source:
 ; list0.h:
@@ -13,7 +14,7 @@
 ; int main() {
 ; }
 
-define i32 @foo(i32 %x) nounwind uwtable {
+define i32 @foo(i32 %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
@@ -24,29 +25,34 @@ entry:
   ret i32 %inc, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
 
-define i32 @main() nounwind uwtable {
+define i32 @main() #0 {
 entry:
   ret i32 0, !dbg !17
 }
 
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr14566/list0.c] [DW_LANG_C99]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5, metadata !10}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = metadata !{i32 786473, metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch/pr14566", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !10}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/./list0.h]
+!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, i32 0, metadata !11, metadata !"main", metadata !"main", metadata !"", metadata !11, i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
-!11 = metadata !{i32 786473, metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch/pr14566", null} ; [ DW_TAG_file_type ]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
+!12 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{metadata !9}
-!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
-!15 = metadata !{i32 1, i32 0, metadata !5, null}
-!16 = metadata !{i32 2, i32 0, metadata !5, null}
+!14 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!15 = metadata !{i32 1, i32 0, metadata !4, null}
+!16 = metadata !{i32 2, i32 0, metadata !4, null}
 !17 = metadata !{i32 3, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !10, metadata !11} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/pr14566/list0.c]
+!18 = metadata !{i32 786443, metadata !11, metadata !10} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index c33a91ca88ec..9440f3a994e1 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -26,18 +26,18 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10, metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, i32 0, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
@@ -45,10 +45,10 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !18 = metadata !{metadata !20}
 !20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
 !21 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777221, metadata !22, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
 !23 = metadata !{i32 5, i32 8, metadata !5, null}
 !24 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554437, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !25 = metadata !{i32 5, i32 14, metadata !5, null}
 !26 = metadata !{i32 6, i32 4, metadata !27, null}
-!27 = metadata !{i32 786443, metadata !5, i32 5, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index 1e57fcd4a864..4dd5aafe18ea 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -14,16 +14,16 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !12}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"q", metadata !"q", metadata !"_Z1qv", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !"_Z1qv", i32 0, metadata !6, metadata !"q", metadata !"q", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"t", metadata !"t", metadata !"", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 786478, metadata !"", i32 0, metadata !6, metadata !"t", metadata !"t", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !13 = metadata !{i32 7, i32 1, metadata !14, null}
 !14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
index 7fe8441102ec..0980e23b7517 100644
--- a/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -89,9 +89,9 @@ attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !3, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, i32 12, metadata !3, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
-!2 = metadata !{i32 786436, null, metadata !"", metadata !3, i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ]
+!2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ]
 !3 = metadata !{i32 786473, metadata !82} ; [ DW_TAG_file_type ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8, metadata !9}
 !5 = metadata !{i32 786472, metadata !"Ident1", i64 0} ; [ DW_TAG_enumerator ] [Ident1 :: 0]
@@ -101,16 +101,16 @@ attributes #1 = { nounwind readnone }
 !9 = metadata !{i32 786472, metadata !"Ident5", i64 10003} ; [ DW_TAG_enumerator ] [Ident5 :: 10003]
 !10 = metadata !{i32 0}
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, i32 0, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", metadata !3, i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
+!12 = metadata !{i32 786478, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", metadata !3, i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
 !13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{null, metadata !15, metadata !17, metadata !21, metadata !21}
-!15 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!17 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!18 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
+!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!18 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
 !19 = metadata !{metadata !20}
 !20 = metadata !{i32 786465, i64 0, i64 51}       ; [ DW_TAG_subrange_type ] [0, 50]
-!21 = metadata !{i32 786454, null, metadata !"OneToFifty", metadata !3, i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int]
+!21 = metadata !{i32 786454, metadata !82, null, metadata !"OneToFifty", i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int]
 !22 = metadata !{metadata !23, metadata !24, metadata !25, metadata !26, metadata !27, metadata !28}
 !23 = metadata !{i32 786689, metadata !12, metadata !"Array1Par", metadata !3, i32 16777397, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array1Par] [line 181]
 !24 = metadata !{i32 786689, metadata !12, metadata !"Array2Par", metadata !3, i32 33554614, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array2Par] [line 182]
@@ -120,36 +120,36 @@ attributes #1 = { nounwind readnone }
 !28 = metadata !{i32 786688, metadata !12, metadata !"IntIndex", metadata !3, i32 187, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntIndex] [line 187]
 !29 = metadata !{metadata !30, metadata !35, metadata !36, metadata !38, metadata !39, metadata !40, metadata !42, metadata !46, metadata !63}
 !30 = metadata !{i32 786484, i32 0, null, metadata !"Version", metadata !"Version", metadata !"", metadata !3, i32 111, metadata !31, i32 0, i32 1, [4 x i8]* @Version, null} ; [ DW_TAG_variable ] [Version] [line 111] [def]
-!31 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
-!32 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!31 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
+!32 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !33 = metadata !{metadata !34}
 !34 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
 !35 = metadata !{i32 786484, i32 0, null, metadata !"IntGlob", metadata !"IntGlob", metadata !"", metadata !3, i32 171, metadata !16, i32 0, i32 1, i32* @IntGlob, null} ; [ DW_TAG_variable ] [IntGlob] [line 171] [def]
 !36 = metadata !{i32 786484, i32 0, null, metadata !"BoolGlob", metadata !"BoolGlob", metadata !"", metadata !3, i32 172, metadata !37, i32 0, i32 1, i32* @BoolGlob, null} ; [ DW_TAG_variable ] [BoolGlob] [line 172] [def]
-!37 = metadata !{i32 786454, null, metadata !"boolean", metadata !3, i32 149, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [boolean] [line 149, size 0, align 0, offset 0] [from int]
+!37 = metadata !{i32 786454, metadata !82, null, metadata !"boolean", i32 149, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [boolean] [line 149, size 0, align 0, offset 0] [from int]
 !38 = metadata !{i32 786484, i32 0, null, metadata !"Char1Glob", metadata !"Char1Glob", metadata !"", metadata !3, i32 173, metadata !32, i32 0, i32 1, i8* @Char1Glob, null} ; [ DW_TAG_variable ] [Char1Glob] [line 173] [def]
 !39 = metadata !{i32 786484, i32 0, null, metadata !"Char2Glob", metadata !"Char2Glob", metadata !"", metadata !3, i32 174, metadata !32, i32 0, i32 1, i8* @Char2Glob, null} ; [ DW_TAG_variable ] [Char2Glob] [line 174] [def]
 !40 = metadata !{i32 786484, i32 0, null, metadata !"Array1Glob", metadata !"Array1Glob", metadata !"", metadata !3, i32 175, metadata !41, i32 0, i32 1, [51 x i32]* @Array1Glob, null} ; [ DW_TAG_variable ] [Array1Glob] [line 175] [def]
-!41 = metadata !{i32 786454, null, metadata !"Array1Dim", metadata !3, i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ]
+!41 = metadata !{i32 786454, metadata !82, null, metadata !"Array1Dim", i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ]
 !42 = metadata !{i32 786484, i32 0, null, metadata !"Array2Glob", metadata !"Array2Glob", metadata !"", metadata !3, i32 176, metadata !43, i32 0, i32 1, [51 x [51 x i32]]* @Array2Glob, null} ; [ DW_TAG_variable ] [Array2Glob] [line 176] [def]
-!43 = metadata !{i32 786454, null, metadata !"Array2Dim", metadata !3, i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ]
-!44 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
+!43 = metadata !{i32 786454, metadata !82, null, metadata !"Array2Dim", i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ]
+!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
 !45 = metadata !{metadata !20, metadata !20}
 !46 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlb", metadata !"PtrGlb", metadata !"", metadata !3, i32 177, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlb, null} ; [ DW_TAG_variable ] [PtrGlb] [line 177] [def]
-!47 = metadata !{i32 786454, null, metadata !"RecordPtr", metadata !3, i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ]
-!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType]
-!49 = metadata !{i32 786454, null, metadata !"RecordType", metadata !3, i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record]
-!50 = metadata !{i32 786451, null, metadata !"Record", metadata !3, i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [from ]
+!47 = metadata !{i32 786454, metadata !82, null, metadata !"RecordPtr", i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ]
+!48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType]
+!49 = metadata !{i32 786454, metadata !82, null, metadata !"RecordType", i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record]
+!50 = metadata !{i32 786451, metadata !82, null, metadata !"Record", i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [from ]
 !51 = metadata !{metadata !52, metadata !54, metadata !56, metadata !57, metadata !58}
-!52 = metadata !{i32 786445, metadata !50, metadata !"PtrComp", metadata !3, i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ]
-!53 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record]
-!54 = metadata !{i32 786445, metadata !50, metadata !"Discr", metadata !3, i32 141, i64 32, i64 32, i64 64, i32 0, metadata !55} ; [ DW_TAG_member ] [Discr] [line 141, size 32, align 32, offset 64] [from Enumeration]
-!55 = metadata !{i32 786454, null, metadata !"Enumeration", metadata !3, i32 128, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_typedef ] [Enumeration] [line 128, size 0, align 0, offset 0] [from ]
-!56 = metadata !{i32 786445, metadata !50, metadata !"EnumComp", metadata !3, i32 142, i64 32, i64 32, i64 96, i32 0, metadata !55} ; [ DW_TAG_member ] [EnumComp] [line 142, size 32, align 32, offset 96] [from Enumeration]
-!57 = metadata !{i32 786445, metadata !50, metadata !"IntComp", metadata !3, i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty]
-!58 = metadata !{i32 786445, metadata !50, metadata !"StringComp", metadata !3, i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30]
-!59 = metadata !{i32 786454, null, metadata !"String30", metadata !3, i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ]
-!60 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
+!52 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"PtrComp", i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ]
+!53 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record]
+!54 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"Discr", i32 141, i64 32, i64 32, i64 64, i32 0, metadata !55} ; [ DW_TAG_member ] [Discr] [line 141, size 32, align 32, offset 64] [from Enumeration]
+!55 = metadata !{i32 786454, metadata !82, null, metadata !"Enumeration", i32 128, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_typedef ] [Enumeration] [line 128, size 0, align 0, offset 0] [from ]
+!56 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"EnumComp", i32 142, i64 32, i64 32, i64 96, i32 0, metadata !55} ; [ DW_TAG_member ] [EnumComp] [line 142, size 32, align 32, offset 96] [from Enumeration]
+!57 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"IntComp", i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty]
+!58 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"StringComp", i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30]
+!59 = metadata !{i32 786454, metadata !82, null, metadata !"String30", i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ]
+!60 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
 !61 = metadata !{metadata !62}
 !62 = metadata !{i32 786465, i64 0, i64 31}       ; [ DW_TAG_subrange_type ] [0, 30]
 !63 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlbNext", metadata !"PtrGlbNext", metadata !"", metadata !3, i32 178, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlbNext, null} ; [ DW_TAG_variable ] [PtrGlbNext] [line 178] [def]
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
index c7e5e949b241..f6ca10bcc4ca 100644
--- a/test/DebugInfo/X86/multiple-at-const-val.ll
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -31,25 +31,25 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !961, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !961, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !26}
 !4 = metadata !{i32 786489, null, metadata !"std", metadata !5, i32 48} ; [ DW_TAG_namespace ]
 !5 = metadata !{i32 786473, metadata !1801} ; [ DW_TAG_file_type ]
 !25 = metadata !{i32 786472, metadata !"_S_os_fmtflags_end", i64 65536} ; [ DW_TAG_enumerator ]
-!26 = metadata !{i32 786436, metadata !4, metadata !"_Ios_Iostate", metadata !5, i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!26 = metadata !{i32 786436, metadata !1801, metadata !4, metadata !"_Ios_Iostate", i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !27 = metadata !{metadata !28, metadata !29, metadata !30, metadata !31, metadata !32}
 !28 = metadata !{i32 786472, metadata !"_S_goodbit", i64 0} ; [ DW_TAG_enumerator ] [_S_goodbit :: 0]
 !29 = metadata !{i32 786472, metadata !"_S_badbit", i64 1} ; [ DW_TAG_enumerator ] [_S_badbit :: 1]
 !30 = metadata !{i32 786472, metadata !"_S_eofbit", i64 2} ; [ DW_TAG_enumerator ] [_S_eofbit :: 2]
 !31 = metadata !{i32 786472, metadata !"_S_failbit", i64 4} ; [ DW_TAG_enumerator ] [_S_failbit :: 4]
 !32 = metadata !{i32 786472, metadata !"_S_os_ostate_end", i64 65536} ; [ DW_TAG_enumerator ] [_S_os_ostate_end :: 65536]
-!49 = metadata !{i32 786434, metadata !4, metadata !"os_base", metadata !5, i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null} ; [ DW_TAG_class_type ]
+!49 = metadata !{i32 786434, metadata !1801, metadata !4, metadata !"os_base", i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null} ; [ DW_TAG_class_type ]
 !50 = metadata !{metadata !77}
 !54 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !55 = metadata !{metadata !56}
-!56 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!77 = metadata !{i32 786445, metadata !49, metadata !"badbit", metadata !5, i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ]
-!78 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !79} ; [ DW_TAG_const_type ]
+!56 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!77 = metadata !{i32 786445, metadata !1801, metadata !49, metadata !"badbit", i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ]
+!78 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !79} ; [ DW_TAG_const_type ]
 !79 = metadata !{i32 786454, metadata !49, metadata !"ostate", metadata !5, i32 327, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_typedef ]
 !955 = metadata !{i32 0}
 !956 = metadata !{metadata !960}
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index c09b1ea2c4af..33a6f8ba9e50 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -27,19 +27,19 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !7, metadata !"x", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
-!10 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
-!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786465, i64 -3, i64 42} ; [ DW_TAG_subrange_type ] [-3, 39]
-!14 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
+!14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
 !15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17}
 !17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 6e64742af772..1847d2c10fdf 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -12,13 +12,13 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10, !11, !12}
 
-!0 = metadata !{i32 786449, i32 0, i32 16, metadata !6, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !13, i32 16, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, null, metadata !"FooBarBaz", metadata !6, i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !13, null, metadata !"FooBarBaz", i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
 !9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
 !10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
 !11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index 308620708ee6..3bb93e7251b8 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -59,29 +59,29 @@ declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 1, i32 26, metadata !5, null}
 !12 = metadata !{i32 3, i32 13, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !5, i32 2, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786443, metadata !6, metadata !5, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 0, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !18 = metadata !{i32 3, i32 7, metadata !13, null}
 !19 = metadata !{i32 786688, metadata !13, metadata !"i", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !20 = metadata !{i32 4, i32 7, metadata !13, null}
 !21 = metadata !{i32 5, i32 8, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !13, i32 5, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !6, metadata !13, i32 5, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 6, i32 5, metadata !24, null}
-!24 = metadata !{i32 786443, metadata !22, i32 5, i32 27, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 786443, metadata !6, metadata !22, i32 5, i32 27, i32 2} ; [ DW_TAG_lexical_block ]
 !25 = metadata !{i32 7, i32 3, metadata !24, null}
 !26 = metadata !{i32 5, i32 22, metadata !22, null}
 !27 = metadata !{i32 8, i32 1, metadata !13, null}
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index a38a96bb1c88..aa560587a602 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -10,15 +10,15 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !13} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, null, metadata !"crass", metadata !6, i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !13, null, metadata !"crass", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786445, metadata !7, metadata !"ptr", metadata !6, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
-!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786445, metadata !13, metadata !7, metadata !"ptr", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !13 = metadata !{metadata !"foo.c", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 29042e2c9384..61df4ad0baa6 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -31,17 +31,17 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !20}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !32} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 720898, metadata !32, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 720942, i32 0, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 2} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 720942, metadata !6, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 2} ; [ DW_TAG_subprogram ]
 !13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !14 = metadata !{null, metadata !15}
 !15 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
@@ -49,16 +49,16 @@ entry:
 !17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!20 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ]
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !23 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 4, i32 15, metadata !5, null}
 !25 = metadata !{i32 4, i32 20, metadata !26, null}
-!26 = metadata !{i32 786443, metadata !5, i32 4, i32 18, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 18, i32 0} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{i32 4, i32 30, metadata !26, null}
 !28 = metadata !{i32 786689, metadata !20, metadata !"this", metadata !6, i32 16777218, metadata !15, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !29 = metadata !{i32 2, i32 8, metadata !20, null}
 !30 = metadata !{i32 2, i32 15, metadata !31, null}
-!31 = metadata !{i32 786443, metadata !20, i32 2, i32 14, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 786443, metadata !6, metadata !20, i32 2, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
 !32 = metadata !{metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build"}
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index a24e477d5245..295c018c5e13 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -77,10 +77,10 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !159, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !159, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
-!5 = metadata !{i32 786478, i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
@@ -91,32 +91,32 @@ entry:
 !13 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 786434, null, metadata !"BPLModuleWriter", metadata !6, i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ]
 !15 = metadata !{metadata !16}
-!16 = metadata !{i32 786478, i32 0, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", metadata !6, i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 786478, metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", i32 0, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !6, i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !18 = metadata !{null, metadata !19, metadata !20}
 !19 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ]
 !20 = metadata !{i32 786434, null, metadata !"function<void ()>", metadata !6, i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97} ; [ DW_TAG_class_type ]
 !21 = metadata !{metadata !22, metadata !51, metadata !58, metadata !86, metadata !92}
-!22 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"", metadata !6, i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
 !23 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !24 = metadata !{null, metadata !25, metadata !26}
 !25 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
 !26 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null} ; [ DW_TAG_class_type ]
 !27 = metadata !{metadata !28, metadata !35, metadata !41}
-!28 = metadata !{i32 786478, i32 0, metadata !26, metadata !"operator()", metadata !"operator()", metadata !"", metadata !6, i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
+!28 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"operator()", metadata !"operator()", metadata !6, i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
 !29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !30 = metadata !{null, metadata !31}
 !31 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ]
 !32 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ]
 !33 = metadata !{metadata !34}
 !34 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 786478, i32 0, metadata !26, metadata !"~", metadata !"~", metadata !"", metadata !6, i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
+!35 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"~", metadata !"~", metadata !6, i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
 !36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !37 = metadata !{null, metadata !38}
 !38 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ]
 !39 = metadata !{metadata !40}
 !40 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!41 = metadata !{i32 786478, i32 0, metadata !26, metadata !"", metadata !"", metadata !"", metadata !6, i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
+!41 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"", metadata !"", metadata !6, i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
 !42 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !43 = metadata !{null, metadata !38, metadata !44}
 !44 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_rvalue_reference_type ]
@@ -126,32 +126,32 @@ entry:
 !48 = metadata !{i32 786479, null, metadata !"_Functor", metadata !26, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !49 = metadata !{metadata !50}
 !50 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!51 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !"", metadata !6, i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
+!51 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !6, i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
 !52 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !53 = metadata !{null, metadata !25, metadata !20}
 !54 = metadata !{metadata !55}
 !55 = metadata !{i32 786479, null, metadata !"_Functor", metadata !20, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !56 = metadata !{metadata !57}
 !57 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!58 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"", metadata !6, i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
+!58 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
 !59 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !60 = metadata !{null, metadata !25, metadata !61}
 !61 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null} ; [ DW_TAG_class_type ]
 !62 = metadata !{metadata !63, metadata !70, metadata !76}
-!63 = metadata !{i32 786478, i32 0, metadata !61, metadata !"operator()", metadata !"operator()", metadata !"", metadata !6, i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
+!63 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"operator()", metadata !"operator()", metadata !6, i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
 !64 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !65 = metadata !{null, metadata !66}
 !66 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ]
 !67 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ]
 !68 = metadata !{metadata !69}
 !69 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!70 = metadata !{i32 786478, i32 0, metadata !61, metadata !"~", metadata !"~", metadata !"", metadata !6, i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
+!70 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"~", metadata !"~", metadata !6, i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
 !71 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !72 = metadata !{null, metadata !73}
 !73 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ]
 !74 = metadata !{metadata !75}
 !75 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!76 = metadata !{i32 786478, i32 0, metadata !61, metadata !"", metadata !"", metadata !"", metadata !6, i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
+!76 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"", metadata !"", metadata !6, i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
 !77 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !78 = metadata !{null, metadata !73, metadata !79}
 !79 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_rvalue_reference_type ]
@@ -161,13 +161,13 @@ entry:
 !83 = metadata !{i32 786479, null, metadata !"_Functor", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !84 = metadata !{metadata !85}
 !85 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!86 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function", metadata !"function", metadata !"", metadata !6, i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
+!86 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function", metadata !"function", metadata !6, i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
 !87 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !88 = metadata !{null, metadata !25, metadata !89}
 !89 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_rvalue_reference_type ]
 !90 = metadata !{metadata !91}
 !91 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!92 = metadata !{i32 786478, i32 0, metadata !20, metadata !"~function", metadata !"~function", metadata !"", metadata !6, i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
+!92 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"~function", metadata !"~function", metadata !6, i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
 !93 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !94 = metadata !{null, metadata !25}
 !95 = metadata !{metadata !96}
@@ -178,20 +178,20 @@ entry:
 !100 = metadata !{null}
 !101 = metadata !{metadata !102}
 !102 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!103 = metadata !{i32 786478, i32 0, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ]
+!103 = metadata !{i32 786478, metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 0, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !6, i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ]
 !104 = metadata !{metadata !105}
 !105 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!106 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !6, i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
-!107 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !6, i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!106 = metadata !{i32 786478, metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
+!107 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
 !108 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !109 = metadata !{null, metadata !110}
 !110 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_reference_type ]
 !111 = metadata !{metadata !112}
 !112 = metadata !{i32 786479, null, metadata !"_Tp", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!113 = metadata !{i32 786478, i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !6, i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
+!113 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
 !114 = metadata !{i32 786434, null, metadata !"_Base_manager", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null} ; [ DW_TAG_class_type ]
 !115 = metadata !{metadata !116, metadata !113}
-!116 = metadata !{i32 786478, i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
+!116 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
 !117 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !118 = metadata !{null, metadata !119}
 !119 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_reference_type ]
@@ -201,8 +201,8 @@ entry:
 !123 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
 !124 = metadata !{metadata !125}
 !125 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!126 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
-!127 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!126 = metadata !{i32 786478, metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
+!127 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
 !128 = metadata !{metadata !130}
 !130 = metadata !{i32 786484, i32 0, metadata !114, metadata !"__stored_locally", metadata !"__stored_locally", metadata !"__stored_locally", metadata !6, i32 2, metadata !131, i32 1, i32 1, i1 true} ; [ DW_TAG_variable ]
 !131 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ]
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
index 52ab413bfd68..34956237ae0b 100644
--- a/test/DebugInfo/X86/pr13303.ll
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -14,13 +14,13 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
 !6 = metadata !{i32 786473, metadata !"PR13303.c", metadata !"/home/probinson", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 1, i32 14, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !5, i32 1, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
+!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index 30cca604e7b9..6e4917747c14 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -20,14 +20,14 @@ declare i32 @callme(i32)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
+!5 = metadata !{i32 786478, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
 !6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 5, i32 3, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
+!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
 !12 = metadata !{i32 6, i32 3, metadata !11, null}
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index 8e194c48f165..ae2e3d4578c5 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -22,10 +22,10 @@ declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
@@ -34,6 +34,6 @@ declare i32 @printf(i8*, ...)
 !11 = metadata !{i32 786689, metadata !5, metadata !"i", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !12 = metadata !{i32 4, i32 17, metadata !5, null}
 !13 = metadata !{i32 6, i32 3, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 7, i32 1, metadata !14, null}
 !16 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
index 2f3c64826b66..39a026c35494 100644
--- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -42,26 +42,26 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0, !10}
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
+!5 = metadata !{i32 786478, metadata !23, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
 !6 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786449, i32 0, i32 12, metadata !14, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!10 = metadata !{i32 786449, metadata !24, i32 12, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !11 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, i32 0, metadata !14, metadata !"fn", metadata !"fn", metadata !"", metadata !14, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
+!13 = metadata !{i32 786478, metadata !24, metadata !"fn", metadata !"fn", metadata !"", metadata !14, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
 !14 = metadata !{i32 786473, metadata !24} ; [ DW_TAG_file_type ]
 !15 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777218, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 2]
 !16 = metadata !{i32 2, i32 0, metadata !5, null}
 !17 = metadata !{i32 4, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !5, i32 3, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !23, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !19 = metadata !{i32 786689, metadata !13, metadata !"a", metadata !14, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
 !20 = metadata !{i32 1, i32 0, metadata !13, null}
 !21 = metadata !{i32 2, i32 0, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !13, i32 1, i32 0, metadata !14, i32 0} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !24, metadata !13, i32 1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{metadata !"simple.c", metadata !"/private/tmp"}
 !24 = metadata !{metadata !"simple2.c", metadata !"/private/tmp"}
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index 3bcce1ceff54..8df281d08ea6 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -5,12 +5,12 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !8} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !"z.c", metadata !"/home/nicholas"}
 
 ; Verify that we refer to 'yyyy' with a relocation.
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index 7a94154451f6..bdf104f07e39 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -13,13 +13,13 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786445, metadata !11, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{metadata !"struct_bug.c", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
index bd7fdab1f631..efc5bf0417f1 100644
--- a/test/DebugInfo/X86/subrange-type.ll
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -20,17 +20,17 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
 !6 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786688, metadata !11, metadata !"i", metadata !6, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 4]
-!11 = metadata !{i32 786443, metadata !5, i32 3, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c]
-!12 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
+!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c]
+!12 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
 !13 = metadata !{metadata !14}
 !14 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
 !15 = metadata !{i32 4, i32 0, metadata !11, null}
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index 1666ebbf4a1e..027589b3d995 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -20,11 +20,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !9 = metadata !{metadata !1}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{null}
 !6 = metadata !{i32 786468, metadata !3, metadata !"short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 4, i32 22, metadata !8, null}
-!8 = metadata !{i32 786443, metadata !1, i32 3, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 786443, metadata !2, metadata !1, i32 3, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
new file mode 100644
index 000000000000..0f5538e8b40e
--- /dev/null
+++ b/test/DebugInfo/X86/union-template.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O0 -mtriple=x86_64-linux-gnu %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+; Verify that we've emitted template arguments for the union
+; CHECK: DW_TAG_union_type
+; CHECK-NEXT: "Value<float>"
+; CHECK: DW_TAG_template_type_parameter
+; CHECK: "T"
+
+%"union.PR15637::Value" = type { i32 }
+
+@_ZN7PR156371fE = global %"union.PR15637::Value" zeroinitializer, align 4
+
+define void @_ZN7PR156371gEf(float %value) #0 {
+entry:
+  %value.addr = alloca float, align 4
+  %tempValue = alloca %"union.PR15637::Value", align 4
+  store float %value, float* %value.addr, align 4
+  call void @llvm.dbg.declare(metadata !{float* %value.addr}, metadata !23), !dbg !24
+  call void @llvm.dbg.declare(metadata !{%"union.PR15637::Value"* %tempValue}, metadata !25), !dbg !26
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 (trunk 178499) (llvm/trunk 178472)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cc", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"g", metadata !"g", metadata !"_ZN7PR156371gEf", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (float)* @_ZN7PR156371gEf, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [g]
+!5 = metadata !{i32 786489, metadata !1, null, metadata !"PR15637", i32 1} ; [ DW_TAG_namespace ] [PR15637] [line 1]
+!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, metadata !5, metadata !"f", metadata !"f", metadata !"_ZN7PR156371fE", metadata !11, i32 6, metadata !12, i32 0, i32 1, %"union.PR15637::Value"* @_ZN7PR156371fE, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cc]
+!12 = metadata !{i32 786455, metadata !1, metadata !5, metadata !"Value<float>", i32 2, i64 32, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, metadata !21} ; [ DW_TAG_union_type ] [Value<float>] [line 2, size 32, align 32, offset 0] [from ]
+!13 = metadata !{metadata !14, metadata !16}
+!14 = metadata !{i32 786445, metadata !1, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!16 = metadata !{i32 786478, metadata !1, metadata !12, metadata !"Value", metadata !"Value", metadata !"", i32 2, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !20, i32 2} ; [ DW_TAG_subprogram ] [line 2] [Value]
+!17 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{null, metadata !19}
+!19 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Value<float>]
+!20 = metadata !{i32 786468}
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 786479, null, metadata !"T", metadata !8, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!23 = metadata !{i32 786689, metadata !4, metadata !"value", metadata !11, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [value] [line 3]
+!24 = metadata !{i32 3, i32 0, metadata !4, null}
+!25 = metadata !{i32 786688, metadata !4, metadata !"tempValue", metadata !11, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [tempValue] [line 4]
+!26 = metadata !{i32 4, i32 0, metadata !4, null}
+!27 = metadata !{i32 5, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
index 3d2573cf79d4..570adf9e4329 100644
--- a/test/DebugInfo/X86/vector.ll
+++ b/test/DebugInfo/X86/vector.ll
@@ -11,14 +11,14 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, <4 x i32>* @a, null} ; [ DW_TAG_variable ] [a] [line 3] [def]
 !6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786454, null, metadata !"v4si", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 786454, metadata !12, null, metadata !"v4si", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
 !12 = metadata !{metadata !"foo.c", metadata !"/Users/echristo"}
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
index 5b7d6492defe..30771104912f 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/array.ll
@@ -14,14 +14,14 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786688, metadata !7, metadata !"a", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 786443, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !9 = metadata !{metadata !10}
 ;CHECK: DW_TAG_subrange_type
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
index d2dfdd978e28..ec07c232fafa 100644
--- a/test/DebugInfo/dwarf-public-names.ll
+++ b/test/DebugInfo/dwarf-public-names.ll
@@ -1,6 +1,7 @@
+; REQUIRES: object-emission
+
 ; RUN: llc -generate-dwarf-pubnames -filetype=obj -o %t.o < %s
 ; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s
-;
 ; ModuleID = 'dwarf-public-names.cpp'
 ;
 ; Generated from:
@@ -85,27 +86,27 @@ attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
-!3 = metadata !{i32 786478, i32 0, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", metadata !4, i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
 !4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{null, metadata !7}
 !7 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
-!8 = metadata !{i32 786451, null, metadata !"C", metadata !4, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [from ]
+!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [from ]
 !9 = metadata !{metadata !10, metadata !12, metadata !14}
-!10 = metadata !{i32 786445, metadata !8, metadata !"static_member_variable", metadata !4, i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
-!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{i32 786478, i32 0, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", metadata !4, i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
 !13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!14 = metadata !{i32 786478, i32 0, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", metadata !4, i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
 !15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{metadata !11}
 !17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!18 = metadata !{i32 786478, i32 0, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", metadata !4, i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
-!19 = metadata !{i32 786478, i32 0, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", metadata !4, i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
-!20 = metadata !{i32 786478, i32 0, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", metadata !4, i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
 !21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
 !22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null}
@@ -114,7 +115,7 @@ attributes #1 = { nounwind readnone }
 !26 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !4, i32 17, metadata !8, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
 !27 = metadata !{i32 786484, i32 0, metadata !21, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !4, i32 27, metadata !11, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
 !28 = metadata !{i32 786689, metadata !3, metadata !"this", metadata !4, i32 16777225, metadata !29, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 9]
-!29 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
+!29 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
 !30 = metadata !{i32 9, i32 0, metadata !3, null}
 !31 = metadata !{i32 10, i32 0, metadata !3, null}
 !32 = metadata !{i32 11, i32 0, metadata !3, null}
diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test
index 355445e6495b..058d6a36981a 100644
--- a/test/DebugInfo/dwarfdump-test.test
+++ b/test/DebugInfo/dwarfdump-test.test
@@ -8,11 +8,11 @@ RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
 RUN:   --address=0x4004e8 --functions | FileCheck %s -check-prefix MANY_CU_1
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
 RUN:   --address=0x4004f4 --functions | FileCheck %s -check-prefix MANY_CU_2
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
+RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
 RUN:   --address=0x640 --functions | FileCheck %s -check-prefix ABS_ORIGIN_1
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
+RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
 RUN:   --address=0x633 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
+RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
 RUN:   --address=0x62d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
 RUN:   --address=0x62c --functions \
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index 1698c7fb47e4..f302294031c0 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -17,16 +17,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202"} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786478, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", metadata !6, i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786478, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", metadata !6, i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !9, metadata !9}
 !13 = metadata !{metadata !14}
 !14 = metadata !{metadata !15, metadata !16}
@@ -51,3 +51,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !23 = metadata !{i32 4, i32 16, metadata !10, metadata !19}
 !24 = metadata !{i32 5, i32 3, metadata !10, metadata !19}
 !25 = metadata !{i32 6, i32 3, metadata !10, metadata !19}
+!26 = metadata !{metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202"}
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
new file mode 100644
index 000000000000..163bd8ecb30b
--- /dev/null
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -0,0 +1,25 @@
+RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400559" > %t.input
+RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x710" >> %t.input
+RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x633" >> %t.input
+
+RUN: llvm-symbolizer --functions --inlining --demangle=false < %t.input \
+RUN:    | FileCheck %s
+
+REQUIRES: shell
+
+CHECK:       main
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
+CHECK:      _Z1cv
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test4-part1.cc:2
+CHECK:      inlined_h
+CHECK-NEXT: dwarfdump-inl-test.h:2
+CHECK-NEXT: inlined_g
+CHECK-NEXT: dwarfdump-inl-test.h:7
+CHECK-NEXT: inlined_f
+CHECK-NEXT: dwarfdump-inl-test.cc:3
+CHECK-NEXT: main
+CHECK-NEXT: dwarfdump-inl-test.cc:
+
+CHECK:       _Z3do1v
+CHECK-NEXT: dwarfdump-test3-decl.h:7
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll
index 031056e72d90..428986590425 100644
--- a/test/DebugInfo/member-pointers.ll
+++ b/test/DebugInfo/member-pointers.ll
@@ -1,3 +1,5 @@
+; REQUIRES: object-emission
+
 ; RUN: llc -filetype=obj -O0 < %s > %t
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; CHECK: DW_TAG_ptr_to_member_type
@@ -8,6 +10,7 @@
 ; CHECK: DW_TAG_ptr_to_member_type
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
 ; IR generated from clang -g with the following source:
+; XFAIL: hexagon
 ; struct S {
 ; };
 ;
@@ -19,14 +22,14 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !7, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
 !6 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
-!8 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786451, null, metadata !"S", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786451, metadata !15, null, metadata !"S", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ]
 !10 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 5, metadata !11, i32 0, i32 1, { i64, i64 }* @y, null} ; [ DW_TAG_variable ] [y] [line 5] [def]
 !11 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll
new file mode 100644
index 000000000000..4ca777c3e5d8
--- /dev/null
+++ b/test/DebugInfo/namespace.ll
@@ -0,0 +1,44 @@
+; REQUIRES: object-emission
+
+; RUN: llc -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+; CHECK: debug_info contents
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name{{.*}} = "A"
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1:[0-9]]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x03)
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name{{.*}} = "B"
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2:[0-9]]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x01)
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name{{.*}}= "i"
+; CHECK: file_names[  [[F1]]]{{.*}}debug-info-namespace.cpp
+; CHECK: file_names[  [[F2]]]{{.*}}foo.cpp
+
+; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths
+; changed to protect the guilty. The C++ source code is simply:
+; namespace A {
+; #line 1 "foo.cpp"
+; namespace B {
+; int i;
+; }
+; }
+
+@_ZN1A1B1iE = global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !4, metadata !""} ; [ DW_TAG_compile_unit ] [/home/foo/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/foo/debug-info-namespace.cpp]
+!2 = metadata !{metadata !"debug-info-namespace.cpp", metadata !"/home/foo"}
+!3 = metadata !{i32 0}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, metadata !6, metadata !"i", metadata !"i", metadata !"_ZN1A1B1iE", metadata !7, i32 2, metadata !10, i32 0, i32 1, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 2] [def]
+!6 = metadata !{i32 786489, metadata !8, metadata !9, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1]
+!7 = metadata !{i32 786473, metadata !8}          ; [ DW_TAG_file_type ] [/home/foo/foo.cpp]
+!8 = metadata !{metadata !"foo.cpp", metadata !"/home/foo"}
+!9 = metadata !{i32 786489, metadata !2, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
index c751713244e7..784df8d25b4a 100644
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -3,6 +3,8 @@
 ;   blow llc up and produces something reasonable.
 ;
 
+; REQUIRES: object-emission
+
 ; RUN: llc %s -o %t -filetype=obj -O0
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
@@ -32,16 +34,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0, !9}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
-!9 = metadata !{i32 786449, i32 0, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!9 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !10 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
 !13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !14 = metadata !{metadata !15, metadata !15, metadata !16}
 !15 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
index 0ab02747ba1d..349db69e4c63 100644
--- a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,5 @@
-; RUN: %lli -force-interpreter=true %s | grep 1
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
+; CHECK: 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
index 0912897c05fa..9897602250a6 100644
--- a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,5 @@
-; RUN: %lli_mcjit -force-interpreter=true %s | grep 1
+; RUN: %lli_mcjit -force-interpreter=true %s | FileCheck %s
+; CHECK: 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll b/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
new file mode 100644
index 000000000000..3f402c593116
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
@@ -0,0 +1,25 @@
+; RUN: %lli_mcjit %s
+;
+; Verify relocations to global symbols with addend work correctly.
+;
+; Compiled from this C code:
+;
+; int test[2] = { -1, 0 };
+; int *p = &test[1];
+; 
+; int main (void)
+; {
+;   return *p;
+; }
+; 
+
+@test = global [2 x i32] [i32 -1, i32 0], align 4
+@p = global i32* getelementptr inbounds ([2 x i32]* @test, i64 0, i64 1), align 8
+
+define i32 @main() {
+entry:
+  %0 = load i32** @p, align 8
+  %1 = load i32* %0, align 4
+  ret i32 %1
+}
+
diff --git a/test/ExecutionEngine/MCJIT/fpbitcast.ll b/test/ExecutionEngine/MCJIT/fpbitcast.ll
index fb5ab6f24215..ea39617547ab 100644
--- a/test/ExecutionEngine/MCJIT/fpbitcast.ll
+++ b/test/ExecutionEngine/MCJIT/fpbitcast.ll
@@ -1,5 +1,6 @@
-; RUN: %lli_mcjit -force-interpreter=true %s | grep 40091eb8
-;
+; RUN: %lli_mcjit -force-interpreter=true %s | FileCheck %s
+; CHECK: 40091eb8
+
 define i32 @test(double %x) {
 entry:
 	%x46.i = bitcast double %x to i64	
diff --git a/test/ExecutionEngine/fpbitcast.ll b/test/ExecutionEngine/fpbitcast.ll
index fa84be441010..e6d06f83db49 100644
--- a/test/ExecutionEngine/fpbitcast.ll
+++ b/test/ExecutionEngine/fpbitcast.ll
@@ -1,5 +1,6 @@
-; RUN: %lli -force-interpreter=true %s | grep 40091eb8
-;
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
+; CHECK: 40091eb8
+
 define i32 @test(double %x) {
 entry:
 	%x46.i = bitcast double %x to i64	
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index dd6a5bb240ec..1f8ae69b9868 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -10,3 +10,5 @@ root = getRoot(config)
 if root.host_arch in ['PowerPC', 'AArch64']:
     config.unsupported = True
 
+if 'hexagon' in root.target_triple:
+    config.unsupported = True
diff --git a/test/ExecutionEngine/test-interp-vec-loadstore.ll b/test/ExecutionEngine/test-interp-vec-loadstore.ll
new file mode 100644
index 000000000000..e5007114c070
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-loadstore.ll
@@ -0,0 +1,85 @@
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
+; XFAIL: mips
+; CHECK: 1
+; CHECK: 2
+; CHECK: 3
+; CHECK: 4
+; CHECK: 5.{{[0]+}}e+{{[0]+}}
+; CHECK: 6.{{[0]+}}e+{{[0]+}}
+; CHECK: 7.{{[0]+}}e+{{[0]+}}
+; CHECK: 8.{{[0]+}}e+{{[0]+}}
+; CHECK: 9.{{[0]+}}e+{{[0]+}}
+; CHECK: 1.{{[0]+}}e+{{[0]+}}1
+; CHECK: 1.1{{[0]+}}e+{{[0]+}}1
+; CHECK: 1.2{{[0]+}}e+{{[0]+}}1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+@format_i32 = internal global [4 x i8] c"%d\0A\00"
+@format_float = internal global [4 x i8] c"%e\0A\00"
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+  %a = alloca <4 x i32>, align 16
+  %b = alloca <4 x double>, align 16
+  %c = alloca <4 x float>, align 16
+  
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %a, align 16
+
+  %val0 = load <4 x i32> *%a, align 16
+
+  %res_i32_0 = extractelement <4 x i32> %val0, i32 0
+  %res_i32_1 = extractelement <4 x i32> %val0, i32 1
+  %res_i32_2 = extractelement <4 x i32> %val0, i32 2
+  %res_i32_3 = extractelement <4 x i32> %val0, i32 3
+  
+  %ptr0 = getelementptr [4 x i8]* @format_i32, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_0)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_1)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_2)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_3)
+
+  store <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, <4 x double>* %b, align 16
+
+  %val1 = load <4 x double> *%b, align 16
+
+  %res_double_0 = extractelement <4 x double> %val1, i32 0
+  %res_double_1 = extractelement <4 x double> %val1, i32 1
+  %res_double_2 = extractelement <4 x double> %val1, i32 2
+  %res_double_3 = extractelement <4 x double> %val1, i32 3
+  
+  %ptr1 = getelementptr [4 x i8]* @format_float, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_0)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_1)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_2)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_3)
+
+
+  store <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, <4 x float>* %c, align 16
+  
+  %val2 = load <4 x float> *%c, align 16
+  
+  %ptr2 = getelementptr [4 x i8]* @format_float, i32 0, i32 0
+
+  ; by some reason printf doesn't print float correctly, so
+  ; floats are casted to doubles and are printed as doubles
+  
+  %res_serv_0 = extractelement <4 x float> %val2, i32 0
+  %res_float_0 = fpext float %res_serv_0 to double
+  %res_serv_1 = extractelement <4 x float> %val2, i32 1
+  %res_float_1 = fpext float %res_serv_1 to double
+  %res_serv_2 = extractelement <4 x float> %val2, i32 2
+  %res_float_2 = fpext float %res_serv_2 to double
+  %res_serv_3 = extractelement <4 x float> %val2, i32 3
+  %res_float_3 = fpext float %res_serv_3 to double
+
+ 
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_0)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_1)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_2)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_3)
+ 
+  
+  ret i32 0
+}
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index fa4213e9a079..ec51caeb5868 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -37,7 +37,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz]
+!5 = metadata !{i32 786478, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz]
 !6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
diff --git a/test/Instrumentation/MemorySanitizer/unreachable.ll b/test/Instrumentation/MemorySanitizer/unreachable.ll
index 66a9575d3f59..c8130717c7da 100644
--- a/test/Instrumentation/MemorySanitizer/unreachable.ll
+++ b/test/Instrumentation/MemorySanitizer/unreachable.ll
@@ -21,3 +21,19 @@ exit:
 ; CHECK: @Func
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
 ; CHECK: ret i32 42
+
+
+define i32 @UnreachableLoop() nounwind uwtable {
+entry:
+  ret i32 0
+
+zzz:
+  br label %xxx
+
+xxx:
+  br label %zzz
+}
+
+; CHECK: @UnreachableLoop
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32 0
diff --git a/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
index a08453ac4a94..7b6b94edf1b1 100644
--- a/test/Instrumentation/ThreadSanitizer/read_from_global.ll
+++ b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
@@ -48,7 +48,7 @@ entry:
 }
 
 ; CHECK: define void @call_virtual_func
-; CHECK: __tsan_read
+; CHECK: __tsan_vptr_read
 ; CHECK: = load
 ; CHECK-NOT: __tsan_read
 ; CHECK: = load
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
index 33c703b4c9bd..19dd45bda230 100644
--- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -20,3 +20,36 @@ entry:
 ; CHECK: ret i32
 
 
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+
+; Check that tsan converts mem intrinsics back to function calls.
+
+define void @MemCpyTest(i8* nocapture %x, i8* nocapture %y) {
+entry:
+    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
+    ret void
+; CHECK: define void @MemCpyTest
+; CHECK: call i8* @memcpy
+; CHECK: ret void
+}
+
+define void @MemMoveTest(i8* nocapture %x, i8* nocapture %y) {
+entry:
+    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
+    ret void
+; CHECK: define void @MemMoveTest
+; CHECK: call i8* @memmove
+; CHECK: ret void
+}
+
+define void @MemSetTest(i8* nocapture %x)  {
+entry:
+    tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false)
+    ret void
+; CHECK: define void @MemSetTest
+; CHECK: call i8* @memset
+; CHECK: ret void
+}
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_read.ll b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
new file mode 100644
index 000000000000..404ca3ffe50f
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+; Check that vptr reads are treated in a special way.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i8 @Foo(i8* %a) nounwind uwtable {
+entry:
+; CHECK: call void @__tsan_vptr_read
+  %0 = load i8* %a, align 8, !tbaa !0
+  ret i8 %0
+}
+!0 = metadata !{metadata !"vtable pointer", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+
diff --git a/test/Integer/2007-01-19-TruncSext.ll b/test/Integer/2007-01-19-TruncSext.ll
index 3fee6bc8b6e3..e6d89ddaf33e 100644
--- a/test/Integer/2007-01-19-TruncSext.ll
+++ b/test/Integer/2007-01-19-TruncSext.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
-; RUN: llvm-as < %s | lli --force-interpreter=true | grep -- -255
+; RUN: llvm-as < %s | lli --force-interpreter=true | FileCheck %s
+; CHECK: -255
 
 @ARRAY   = global [ 20 x i17 ] zeroinitializer
 @FORMAT  = constant [ 4 x i8 ] c"%d\0A\00"
diff --git a/test/Integer/fold-fpcast_bt.ll b/test/Integer/fold-fpcast_bt.ll
index 8e5f8386d775..0ce776dbf851 100644
--- a/test/Integer/fold-fpcast_bt.ll
+++ b/test/Integer/fold-fpcast_bt.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llvm-dis | not grep bitcast
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; CHECK-NOT: bitcast
 
 define i60 @test1() {
    ret i60 fptoui(float 0x400D9999A0000000 to i60)
diff --git a/test/Integer/packed_struct_bt.ll b/test/Integer/packed_struct_bt.ll
index 257c1c66ebac..b8301bab545c 100644
--- a/test/Integer/packed_struct_bt.ll
+++ b/test/Integer/packed_struct_bt.ll
@@ -1,9 +1,9 @@
 ; RUN: llvm-as < %s | llvm-dis > %t1.ll
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
-; RUN: not grep cast %t2.ll
-; RUN: grep "}>" %t2.ll
-; END.
+; RUN: FileCheck %s --input-file=%t2.ll
+; CHECK-NOT: cast
+; CHECK: }>
 
 %struct.anon = type <{ i8, i35, i35, i35 }>
 @foos = external global %struct.anon 
diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll
index e7431ec15886..cbf754133411 100644
--- a/test/Linker/2003-01-30-LinkerRename.ll
+++ b/test/Linker/2003-01-30-LinkerRename.ll
@@ -3,7 +3,8 @@
 
 ; RUN: echo "define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc
 ; RUN: llvm-as %s -o %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "@foo()" | grep -v internal
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: internal{{.*}}@foo{{[0-9]}}()
 
 define i32 @foo() { ret i32 0 }
 
diff --git a/test/Linker/2003-01-30-LinkerTypeRename.ll b/test/Linker/2003-01-30-LinkerTypeRename.ll
index 94fb5e0826c5..d61eb6d7abb3 100644
--- a/test/Linker/2003-01-30-LinkerTypeRename.ll
+++ b/test/Linker/2003-01-30-LinkerTypeRename.ll
@@ -3,8 +3,9 @@
 
 ; RUN: echo "%%Ty = type opaque @GV = external global %%Ty*" | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "%%Ty " | not grep opaque
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: = global %Ty
 
 %Ty = type {i32}
 
-@GV = global %Ty* null
\ No newline at end of file
+@GV = global %Ty* null
diff --git a/test/Linker/2003-04-23-LinkOnceLost.ll b/test/Linker/2003-04-23-LinkOnceLost.ll
index 98a943a5344b..e4528906e024 100644
--- a/test/Linker/2003-04-23-LinkOnceLost.ll
+++ b/test/Linker/2003-04-23-LinkOnceLost.ll
@@ -4,7 +4,8 @@
 ; RUN: echo " define linkonce void @foo() { ret void } " | \
 ; RUN:   llvm-as -o %t.2.bc
 ; RUN: llvm-as %s -o %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep foo | grep linkonce
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: linkonce{{.*}}foo
 
 declare void @foo()
 
diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll
index dff861dc4bb1..2e734beba6d9 100644
--- a/test/Linker/2003-05-31-LinkerRename.ll
+++ b/test/Linker/2003-05-31-LinkerRename.ll
@@ -6,7 +6,8 @@
 
 ; RUN: echo " define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep "@foo("
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: internal {{.*}} @foo{{[0-9]}}(
 
 declare i32 @foo() 
 
diff --git a/test/Linker/2003-08-23-GlobalVarLinking.ll b/test/Linker/2003-08-23-GlobalVarLinking.ll
index e934836a6135..122bc41f554e 100644
--- a/test/Linker/2003-08-23-GlobalVarLinking.ll
+++ b/test/Linker/2003-08-23-GlobalVarLinking.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s > %t.out1.bc
 ; RUN: echo "%%T1 = type opaque %%T2 = type opaque @S = external global { i32, %%T1* } declare void @F(%%T2*)"\
 ; RUN:   | llvm-as > %t.out2.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep opaque
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | FileCheck %s
+; CHECK-NOT: opaque
 
 ; After linking this testcase, there should be no opaque types left.  The two
 ; S's should cause the opaque type to be resolved to 'int'.
diff --git a/test/Linker/2003-08-24-InheritPtrSize.ll b/test/Linker/2003-08-24-InheritPtrSize.ll
index 51d544b83f90..dbaf9bc111b7 100644
--- a/test/Linker/2003-08-24-InheritPtrSize.ll
+++ b/test/Linker/2003-08-24-InheritPtrSize.ll
@@ -3,7 +3,8 @@
 
 ; RUN: llvm-as < %s > %t.out1.bc
 ; RUN: echo "" | llvm-as > %t.out2.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc 2>&1 | not grep warning
+; RUN: llvm-link %t.out1.bc %t.out2.bc 2>&1 | FileCheck %s 
+; CHECK-NOT: warning
 
 target datalayout = "e-p:64:64"
 
diff --git a/test/Linker/2004-12-03-DisagreeingType.ll b/test/Linker/2004-12-03-DisagreeingType.ll
index 73d7a4055046..63e15299252d 100644
--- a/test/Linker/2004-12-03-DisagreeingType.ll
+++ b/test/Linker/2004-12-03-DisagreeingType.ll
@@ -1,7 +1,8 @@
 ; RUN: echo "@G = weak global {{{{double}}}} zeroinitializer " | \
 ; RUN:   llvm-as > %t.out2.bc
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep "}"
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | FileCheck %s
+; CHECK-NOT: }
 
 ; When linked, the global above should be eliminated, being merged with the 
 ; global below.
diff --git a/test/Linker/2005-02-12-ConstantGlobals-2.ll b/test/Linker/2005-02-12-ConstantGlobals-2.ll
index 30bfafeb13b5..7d2e81314651 100644
--- a/test/Linker/2005-02-12-ConstantGlobals-2.ll
+++ b/test/Linker/2005-02-12-ConstantGlobals-2.ll
@@ -3,6 +3,7 @@
 
 ; RUN: echo "@X = external constant i32" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "global i32 7"
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: global i32 7
 
 @X = global i32 7
diff --git a/test/Linker/2005-02-12-ConstantGlobals.ll b/test/Linker/2005-02-12-ConstantGlobals.ll
index 93709cf50bed..db990604d9a8 100644
--- a/test/Linker/2005-02-12-ConstantGlobals.ll
+++ b/test/Linker/2005-02-12-ConstantGlobals.ll
@@ -3,6 +3,7 @@
 
 ; RUN: echo "@X = global i32 7" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "global i32 7"
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: global i32 7
 
 @X = external constant i32		; <i32*> [#uses=0]
diff --git a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
index d7a34c841e63..b99b3a823755 100644
--- a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
+++ b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
@@ -1,7 +1,8 @@
 ; RUN: echo " @G = appending global [0 x i32] zeroinitializer " | \
 ; RUN:   llvm-as > %t.out2.bc
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep "@G ="
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | FileCheck %s
+; CHECK: @G =
 
 ; When linked, the globals should be merged, and the result should still 
 ; be named '@G'.
diff --git a/test/Linker/2006-06-15-GlobalVarAlignment.ll b/test/Linker/2006-06-15-GlobalVarAlignment.ll
index eec8f637be0d..c9f9b0e2ab43 100644
--- a/test/Linker/2006-06-15-GlobalVarAlignment.ll
+++ b/test/Linker/2006-06-15-GlobalVarAlignment.ll
@@ -2,6 +2,7 @@
 
 ; RUN: echo "@X = global i32 7, align 8" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "align 8"
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: align 8
 
 @X = weak global i32 7, align 4
diff --git a/test/Linker/2008-03-07-DroppedSection_a.ll b/test/Linker/2008-03-07-DroppedSection_a.ll
index ec9d5c26ed3d..58baad95e498 100644
--- a/test/Linker/2008-03-07-DroppedSection_a.ll
+++ b/test/Linker/2008-03-07-DroppedSection_a.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/2008-03-07-DroppedSection_b.ll > %t2.bc
 ; RUN: llvm-link %t.bc %t2.bc -o %t3.bc
-; RUN: llvm-dis < %t3.bc | grep ".data.init_task"
+; RUN: llvm-dis < %t3.bc | FileCheck %s
+; CHECK: .data.init_task
 
 ; ModuleID = 't.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Linker/2008-03-07-DroppedSection_b.ll b/test/Linker/2008-03-07-DroppedSection_b.ll
index 63b64f6aa1aa..9bcb80d796d1 100644
--- a/test/Linker/2008-03-07-DroppedSection_b.ll
+++ b/test/Linker/2008-03-07-DroppedSection_b.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/2008-03-07-DroppedSection_a.ll > %t2.bc
 ; RUN: llvm-link %t.bc %t2.bc -o %t3.bc
-; RUN: llvm-dis < %t3.bc | grep ".data.init_task"
+; RUN: llvm-dis < %t3.bc | FileCheck %s
+; CHECK: .data.init_task
 
 ; ModuleID = 'u.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Linker/2008-06-26-AddressSpace.ll b/test/Linker/2008-06-26-AddressSpace.ll
index e1d35741e7f1..d4310bc816e2 100644
--- a/test/Linker/2008-06-26-AddressSpace.ll
+++ b/test/Linker/2008-06-26-AddressSpace.ll
@@ -2,8 +2,9 @@
 ; in different modules.
 ; RUN: llvm-as %s -o %t.foo1.bc
 ; RUN: echo | llvm-as -o %t.foo2.bc
-; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | grep "addrspace(2)"
-; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | grep "addrspace(2)"
+; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | FileCheck %s
+; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | FileCheck %s
+; CHECK: addrspace(2)
 ; rdar://6038021
 
 @G = addrspace(2) global i32 256 
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index cae1245522ef..328e83bd07b9 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-link %s %p/2011-08-18-unique-class-type2.ll -S -o - | grep DW_TAG_class_type | count 1
+; RUN: llvm-link %s %p/2011-08-18-unique-class-type2.ll -S -o - | FileCheck %s
+; CHECK: DW_TAG_class_type
+; CHECK-NOT: DW_TAG_class_type
 ; Test to check there is only one MDNode for class A after linking.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index 696fdb3108b8..cc0df4d0165e 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -1,6 +1,6 @@
-
-; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | grep "int" | grep -v "^; ModuleID" | count 1
+; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | FileCheck %s
 ; Test to check only one MDNode for "int" after linking.
+; CHECK: !"int"
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
diff --git a/test/Linker/AppendingLinkage.ll b/test/Linker/AppendingLinkage.ll
index 014ead91bd10..5beff5a10777 100644
--- a/test/Linker/AppendingLinkage.ll
+++ b/test/Linker/AppendingLinkage.ll
@@ -3,7 +3,8 @@
 ; RUN: echo "@X = appending global [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 4 | grep 8
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: [i32 7, i32 4, i32 8]
 
 @X = appending global [2 x i32] [ i32 7, i32 4 ]		; <[2 x i32]*> [#uses=2]
 @Y = global i32* getelementptr ([2 x i32]* @X, i64 0, i64 0)		; <i32**> [#uses=0]
diff --git a/test/Linker/AppendingLinkage2.ll b/test/Linker/AppendingLinkage2.ll
index 7385efb1f9b0..341ca1606b81 100644
--- a/test/Linker/AppendingLinkage2.ll
+++ b/test/Linker/AppendingLinkage2.ll
@@ -3,6 +3,7 @@
 ; RUN: echo "@X = appending global [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 8
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: [i32 7, i32 8]
 
 @X = appending global [1 x i32] [ i32 7 ]		; <[1 x i32]*> [#uses=0]
diff --git a/test/Linker/ConstantGlobals1.ll b/test/Linker/ConstantGlobals1.ll
index 716eb3d02e92..a2bb6fbfba8a 100644
--- a/test/Linker/ConstantGlobals1.ll
+++ b/test/Linker/ConstantGlobals1.ll
@@ -3,7 +3,8 @@
 ; RUN: echo "@X = constant [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: constant
 
 @X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
 
diff --git a/test/Linker/ConstantGlobals2.ll b/test/Linker/ConstantGlobals2.ll
index ad0f8e25f2c6..471377998713 100644
--- a/test/Linker/ConstantGlobals2.ll
+++ b/test/Linker/ConstantGlobals2.ll
@@ -3,7 +3,8 @@
 ; RUN: echo "@X = external global [1 x i32] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: constant
 
 @X = constant [1 x i32] [ i32 12 ]		; <[1 x i32]*> [#uses=0]
 
diff --git a/test/Linker/ConstantGlobals3.ll b/test/Linker/ConstantGlobals3.ll
index 5aa26bc29b09..6b4ed24c31cf 100644
--- a/test/Linker/ConstantGlobals3.ll
+++ b/test/Linker/ConstantGlobals3.ll
@@ -3,6 +3,7 @@
 ; RUN: echo "@X = external constant [1 x i32] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: constant
 
 @X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
diff --git a/test/Linker/link-global-to-func.ll b/test/Linker/link-global-to-func.ll
index 9d969d768dff..4d83fe59e894 100644
--- a/test/Linker/link-global-to-func.ll
+++ b/test/Linker/link-global-to-func.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as %s -o %t1.bc
 ; RUN: echo "declare void @__eprintf(i8*, i8*, i32, i8*) noreturn     define void @foo() {      tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind       unreachable }" | llvm-as -o %t2.bc
-; RUN: llvm-link %t2.bc %t1.bc -S | grep __eprintf
-; RUN: llvm-link %t1.bc %t2.bc -S | grep __eprintf
+; RUN: llvm-link %t2.bc %t1.bc -S | FileCheck %s
+; RUN: llvm-link %t1.bc %t2.bc -S | FileCheck %s
+; CHECK: __eprintf
 
 ; rdar://6072702
 
diff --git a/test/Linker/linknamedmdnode.ll b/test/Linker/linknamedmdnode.ll
index e6b779f1fc5d..73e7554a9077 100644
--- a/test/Linker/linknamedmdnode.ll
+++ b/test/Linker/linknamedmdnode.ll
@@ -1,6 +1,7 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/linknamedmdnode2.ll > %t2.bc
-; RUN: llvm-link %t.bc %t2.bc -S | grep "!llvm.stuff = !{!0, !1}"
+; RUN: llvm-link %t.bc %t2.bc -S | FileCheck %s
+; CHECK: !llvm.stuff = !{!0, !1}
 
 !0 = metadata !{i32 42}
 !llvm.stuff = !{!0}
diff --git a/test/Linker/redefinition.ll b/test/Linker/redefinition.ll
index 23ba6a100f06..64a8c341fedb 100644
--- a/test/Linker/redefinition.ll
+++ b/test/Linker/redefinition.ll
@@ -3,8 +3,7 @@
 ; RUN: llvm-as %s -o %t.foo1.bc
 ; RUN: llvm-as %s -o %t.foo2.bc
 ; RUN: echo "define void @foo(i32 %x) { ret void }" | llvm-as -o %t.foo3.bc
-; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc 2>&1 | \
-; RUN:   grep "symbol multiply defined"
-; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc 2>&1 | \
-; RUN:   grep "symbol multiply defined"
+; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc 2>&1 | FileCheck %s
+; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc 2>&1 | FileCheck %s
+; CHECK: symbol multiply defined
 define void @foo() { ret void }
diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll
index 3a72a48aae00..b9f2584c7eef 100644
--- a/test/Linker/weakextern.ll
+++ b/test/Linker/weakextern.ll
@@ -1,9 +1,10 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/testlink1.ll > %t2.bc
 ; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc
-; RUN: llvm-dis < %t1.bc | grep "kallsyms_names = extern_weak"
-; RUN: llvm-dis < %t1.bc | grep "MyVar = external global i32"
-; RUN: llvm-dis < %t1.bc | grep "Inte = global i32"
+; RUN: llvm-dis < %t1.bc | FileCheck %s
+; CHECK: kallsyms_names = extern_weak
+; CHECK: Inte = global i32
+; CHECK: MyVar = external global i32
 
 @kallsyms_names = extern_weak global [0 x i8]		; <[0 x i8]*> [#uses=0]
 @MyVar = extern_weak global i32		; <i32*> [#uses=0]
diff --git a/test/MC/AArch64/elf-globaladdress.ll b/test/MC/AArch64/elf-globaladdress.ll
index 190439d8fe48..942920be4de2 100644
--- a/test/MC/AArch64/elf-globaladdress.ll
+++ b/test/MC/AArch64/elf-globaladdress.ll
@@ -1,10 +1,10 @@
 ;; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
-;; RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+;; RUN:   llvm-readobj -h -r | FileCheck -check-prefix=OBJ %s
 
 ; Also take it on a round-trip through llvm-mc to stretch assembly-parsing's legs:
 ;; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | \
-;; RUN:     llvm-mc -arch=aarch64 -filetype=obj -o - | \
-;; RUN:     elf-dump | FileCheck -check-prefix=OBJ %s
+;; RUN:     llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -o - | \
+;; RUN:     llvm-readobj -h -r | FileCheck -check-prefix=OBJ %s
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -35,77 +35,28 @@ define void @address() {
 }
 
 ; Check we're using EM_AARCH64
-; OBJ: 'e_machine', 0x00
-
-; OBJ: .rela.text
-
-; var8
-; R_AARCH64_ADR_PREL_PG_HI21 against var8
-; OBJ: 'r_sym', 0x0000000f
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_LDST8_ABS_LO12_NC against var8
-; OBJ: 'r_sym', 0x0000000f
-; OBJ-NEXT: 'r_type', 0x00000116
-
-
-; var16
-; R_AARCH64_ADR_PREL_PG_HI21 against var16
-; OBJ: 'r_sym', 0x0000000c
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_LDST16_ABS_LO12_NC against var16
-; OBJ: 'r_sym', 0x0000000c
-; OBJ-NEXT: 'r_type', 0x0000011c
-
-
-; var32
-; R_AARCH64_ADR_PREL_PG_HI21 against var32
-; OBJ: 'r_sym', 0x0000000d
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_LDST32_ABS_LO12_NC against var32
-; OBJ: 'r_sym', 0x0000000d
-; OBJ-NEXT: 'r_type', 0x0000011d
-
-
-; var64
-; R_AARCH64_ADR_PREL_PG_HI21 against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_LDST64_ABS_LO12_NC against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x0000011e
+; OBJ: ElfHeader {
+; OBJ:   Machine: EM_AARCH64
+; OBJ: }
+
+; OBJ: Relocations [
+; OBJ:   Section (1) .text {
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var8
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST8_ABS_LO12_NC  var8
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var16
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST16_ABS_LO12_NC var16
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var32
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST32_ABS_LO12_NC var32
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var64
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64
 
 ; This is on the store, so not really important, but it stops the next
 ; match working.
-; R_AARCH64_LDST64_ABS_LO12_NC against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x0000011e
-
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64
 
 ; Pure address-calculation against var64
-; R_AARCH64_ADR_PREL_PG_HI21 against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_ADD_ABS_LO12_NC against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x00000115
-
-
-; Make sure the symbols don't move around, otherwise relocation info
-; will be wrong:
-
-; OBJ: Symbol 12
-; OBJ-NEXT: var16
-
-; OBJ: Symbol 13
-; OBJ-NEXT: var32
-
-; OBJ: Symbol 14
-; OBJ-NEXT: var64
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var64
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC    var64
 
-; OBJ: Symbol 15
-; OBJ-NEXT: var8
+; OBJ:   }
+; OBJ: ]
diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s
index c5aa5b19899e..51d444a36def 100644
--- a/test/MC/AArch64/elf-objdump.s
+++ b/test/MC/AArch64/elf-objdump.s
@@ -1,5 +1,5 @@
 // 64 bit little endian
-// RUN: llvm-mc -filetype=obj -arch=aarch64 -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d
+// RUN: llvm-mc -filetype=obj -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d
 
 // We just want to see if llvm-objdump works at all.
 // CHECK: .text
diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s
index 7fa6e90b5d0d..0321dda332c2 100644
--- a/test/MC/AArch64/elf-reloc-addsubimm.s
+++ b/test/MC/AArch64/elf-reloc-addsubimm.s
@@ -1,13 +1,10 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         add x2, x3, #:lo12:some_label
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000115
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_ADD_ABS_LO12_NC some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-condbr.s b/test/MC/AArch64/elf-reloc-condbr.s
index 283d3b95d0db..684e75a33cb7 100644
--- a/test/MC/AArch64/elf-reloc-condbr.s
+++ b/test/MC/AArch64/elf-reloc-condbr.s
@@ -1,13 +1,10 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         b.eq somewhere
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000118
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: somewhere
\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_CONDBR19 somewhere 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
index ce9ff49db448..de43c4feac98 100644
--- a/test/MC/AArch64/elf-reloc-ldrlit.s
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -1,28 +1,16 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         ldr x0, some_label
         ldr w3, some_label
         ldrsw x9, some_label
         prfm pldl3keep, some_label
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000111
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000111
-
-// OBJ: 'r_offset', 0x0000000000000008
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000111
-
-// OBJ: 'r_offset', 0x000000000000000c
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000111
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_LD_PREL_LO19 some_label 0x0
+// OBJ-NEXT:     0x4 R_AARCH64_LD_PREL_LO19 some_label 0x0
+// OBJ-NEXT:     0x8 R_AARCH64_LD_PREL_LO19 some_label 0x0
+// OBJ-NEXT:     0xC R_AARCH64_LD_PREL_LO19 some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
index 345fc8247d0e..e1f841bd20f7 100644
--- a/test/MC/AArch64/elf-reloc-ldstunsimm.s
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         ldrb w0, [sp, #:lo12:some_label]
         ldrh w0, [sp, #:lo12:some_label]
@@ -7,28 +7,12 @@
         ldr x0, [sp, #:lo12:some_label]
         str q0, [sp, #:lo12:some_label]
 
-// OBJ: .rela.text
-
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000116
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011c
-
-// OBJ: 'r_offset', 0x0000000000000008
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011d
-
-// OBJ: 'r_offset', 0x000000000000000c
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011e
-
-// OBJ: 'r_offset', 0x0000000000000010
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000012b
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0  R_AARCH64_LDST8_ABS_LO12_NC   some_label 0x0
+// OBJ-NEXT:     0x4  R_AARCH64_LDST16_ABS_LO12_NC  some_label 0x0
+// OBJ-NEXT:     0x8  R_AARCH64_LDST32_ABS_LO12_NC  some_label 0x0
+// OBJ-NEXT:     0xC  R_AARCH64_LDST64_ABS_LO12_NC  some_label 0x0
+// OBJ-NEXT:     0x10 R_AARCH64_LDST128_ABS_LO12_NC some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s
index cb7dc6768e32..8a7e532cdd23 100644
--- a/test/MC/AArch64/elf-reloc-movw.s
+++ b/test/MC/AArch64/elf-reloc-movw.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         movz x0, #:abs_g0:some_label
         movk x0, #:abs_g0_nc:some_label
@@ -21,78 +21,22 @@
 
         movz x19, #:abs_g2_s:some_label
         movn x19, #:abs_g2_s:some_label
-// OBJ: .rela.text
 
-// :abs_g0: => R_AARCH64_MOVW_UABS_G0
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000107
-
-// :abs_g0_nc: => R_AARCH64_MOVW_UABS_G0_NC
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000108
-
-// :abs_g1: => R_AARCH64_MOVW_UABS_G1
-// OBJ: 'r_offset', 0x0000000000000008
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000109
-
-// :abs_g1_nc: => R_AARCH64_MOVW_UABS_G1_NC
-// OBJ: 'r_offset', 0x000000000000000c
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010a
-
-// :abs_g2: => R_AARCH64_MOVW_UABS_G2
-// OBJ: 'r_offset', 0x0000000000000010
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010b
-
-// :abs_g2_nc: => R_AARCH64_MOVW_UABS_G2_NC
-// OBJ: 'r_offset', 0x0000000000000014
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010c
-
-// :abs_g3: => R_AARCH64_MOVW_UABS_G3
-// OBJ: 'r_offset', 0x0000000000000018
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010d
-
-// :abs_g3: => R_AARCH64_MOVW_UABS_G3
-// OBJ: 'r_offset', 0x000000000000001c
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010d
-
-// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
-// OBJ: 'r_offset', 0x0000000000000020
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010e
-
-// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
-// OBJ: 'r_offset', 0x0000000000000024
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010e
-
-// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
-// OBJ: 'r_offset', 0x0000000000000028
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010f
-
-// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
-// OBJ: 'r_offset', 0x000000000000002c
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010f
-
-// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
-// OBJ: 'r_offset', 0x0000000000000030
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000110
-
-// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
-// OBJ: 'r_offset', 0x0000000000000034
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000110
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0  R_AARCH64_MOVW_UABS_G0    some_label 0x0
+// OBJ-NEXT:     0x4  R_AARCH64_MOVW_UABS_G0_NC some_label 0x0
+// OBJ-NEXT:     0x8  R_AARCH64_MOVW_UABS_G1    some_label 0x0
+// OBJ-NEXT:     0xC  R_AARCH64_MOVW_UABS_G1_NC some_label 0x0
+// OBJ-NEXT:     0x10 R_AARCH64_MOVW_UABS_G2    some_label 0x0
+// OBJ-NEXT:     0x14 R_AARCH64_MOVW_UABS_G2_NC some_label 0x0
+// OBJ-NEXT:     0x18 R_AARCH64_MOVW_UABS_G3    some_label 0x0
+// OBJ-NEXT:     0x1C R_AARCH64_MOVW_UABS_G3    some_label 0x0
+// OBJ-NEXT:     0x20 R_AARCH64_MOVW_SABS_G0    some_label 0x0
+// OBJ-NEXT:     0x24 R_AARCH64_MOVW_SABS_G0    some_label 0x0
+// OBJ-NEXT:     0x28 R_AARCH64_MOVW_SABS_G1    some_label 0x0
+// OBJ-NEXT:     0x2C R_AARCH64_MOVW_SABS_G1    some_label 0x0
+// OBJ-NEXT:     0x30 R_AARCH64_MOVW_SABS_G2    some_label 0x0
+// OBJ-NEXT:     0x34 R_AARCH64_MOVW_SABS_G2    some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s
index 39a8ba9402a8..b5f072712fd7 100644
--- a/test/MC/AArch64/elf-reloc-pcreladdressing.s
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s
@@ -1,29 +1,17 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         adr x2, some_label
         adrp x5, some_label
 
         adrp x5, :got:some_label
         ldr x0, [x5, #:got_lo12:some_label]
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000112
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000113
-
-// OBJ: 'r_offset', 0x0000000000000008
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000137
-
-// OBJ: 'r_offset', 0x000000000000000c
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000138
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_ADR_PREL_LO21    some_label 0x0
+// OBJ-NEXT:     0x4 R_AARCH64_ADR_PREL_PG_HI21 some_label 0x0
+// OBJ-NEXT:     0x8 R_AARCH64_ADR_GOT_PAGE     some_label 0x0
+// OBJ-NEXT:     0xC R_AARCH64_LD64_GOT_LO12_NC some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
index c5e2981a22ef..037e89632e82 100644
--- a/test/MC/AArch64/elf-reloc-tstb.s
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -1,18 +1,12 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         tbz x6, #45, somewhere
         tbnz w3, #15, somewhere
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000117
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000117
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: somewhere
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0  R_AARCH64_TSTBR14 somewhere 0x0
+// OBJ-NEXT:     0x4  R_AARCH64_TSTBR14 somewhere 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
index 0e97bc66695f..bead07c12d2c 100644
--- a/test/MC/AArch64/elf-reloc-uncondbrimm.s
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -1,18 +1,12 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         b somewhere
         bl somewhere
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011a
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011b
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: somewhere
\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_JUMP26 somewhere 0x0
+// OBJ-NEXT:     0x4 R_AARCH64_CALL26 somewhere 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/gicv3-regs-diagnostics.s b/test/MC/AArch64/gicv3-regs-diagnostics.s
new file mode 100644
index 000000000000..e891adbbb375
--- /dev/null
+++ b/test/MC/AArch64/gicv3-regs-diagnostics.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
+
+        // Write-only
+        mrs x10, icc_eoir1_el1
+        mrs x7, icc_eoir0_el1
+        mrs x22, icc_dir_el1
+        mrs x24, icc_sgi1r_el1
+        mrs x8, icc_asgi1r_el1
+        mrs x28, icc_sgi0r_el1
+// CHECK: error: expected readable system register
+// CHECK-NEXT:         mrs x10, icc_eoir1_el1
+// CHECK-NEXT:                  ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x7, icc_eoir0_el1
+// CHECK-NEXT:                 ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x22, icc_dir_el1
+// CHECK-NEXT:                  ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x24, icc_sgi1r_el1
+// CHECK-NEXT:                  ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x8, icc_asgi1r_el1
+// CHECK-NEXT:                 ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x28, icc_sgi0r_el1
+// CHECK-NEXT:                  ^
+
+        // Read-only
+        msr icc_iar1_el1, x16
+        msr icc_iar0_el1, x19
+        msr icc_hppir1_el1, x29
+        msr icc_hppir0_el1, x14
+        msr icc_rpr_el1, x6
+        msr ich_vtr_el2, x8
+        msr ich_eisr_el2, x22
+        msr ich_elsr_el2, x8
+// CHECK: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_iar1_el1, x16
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_iar0_el1, x19
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_hppir1_el1, x29
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_hppir0_el1, x14
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_rpr_el1, x6
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr ich_vtr_el2, x8
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr ich_eisr_el2, x22
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr ich_elsr_el2, x8
+// CHECK-NEXT:             ^
diff --git a/test/MC/AArch64/gicv3-regs.s b/test/MC/AArch64/gicv3-regs.s
new file mode 100644
index 000000000000..f7776514da09
--- /dev/null
+++ b/test/MC/AArch64/gicv3-regs.s
@@ -0,0 +1,223 @@
+ // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+
+        mrs x8, icc_iar1_el1
+        mrs x26, icc_iar0_el1
+        mrs x2, icc_hppir1_el1
+        mrs x17, icc_hppir0_el1
+        mrs x29, icc_rpr_el1
+        mrs x4, ich_vtr_el2
+        mrs x24, ich_eisr_el2
+        mrs x9, ich_elsr_el2
+        mrs x24, icc_bpr1_el1
+        mrs x14, icc_bpr0_el1
+        mrs x19, icc_pmr_el1
+        mrs x23, icc_ctlr_el1
+        mrs x20, icc_ctlr_el3
+        mrs x28, icc_sre_el1
+        mrs x25, icc_sre_el2
+        mrs x8, icc_sre_el3
+        mrs x22, icc_igrpen0_el1
+        mrs x5, icc_igrpen1_el1
+        mrs x7, icc_igrpen1_el3
+        mrs x22, icc_seien_el1
+        mrs x4, icc_ap0r0_el1
+        mrs x11, icc_ap0r1_el1
+        mrs x27, icc_ap0r2_el1
+        mrs x21, icc_ap0r3_el1
+        mrs x2, icc_ap1r0_el1
+        mrs x21, icc_ap1r1_el1
+        mrs x10, icc_ap1r2_el1
+        mrs x27, icc_ap1r3_el1
+        mrs x20, ich_ap0r0_el2
+        mrs x21, ich_ap0r1_el2
+        mrs x5, ich_ap0r2_el2
+        mrs x4, ich_ap0r3_el2
+        mrs x15, ich_ap1r0_el2
+        mrs x12, ich_ap1r1_el2
+        mrs x27, ich_ap1r2_el2
+        mrs x20, ich_ap1r3_el2
+        mrs x10, ich_hcr_el2
+        mrs x27, ich_misr_el2
+        mrs x6, ich_vmcr_el2
+        mrs x19, ich_vseir_el2
+        mrs x3, ich_lr0_el2
+        mrs x1, ich_lr1_el2
+        mrs x22, ich_lr2_el2
+        mrs x21, ich_lr3_el2
+        mrs x6, ich_lr4_el2
+        mrs x10, ich_lr5_el2
+        mrs x11, ich_lr6_el2
+        mrs x12, ich_lr7_el2
+        mrs x0, ich_lr8_el2
+        mrs x21, ich_lr9_el2
+        mrs x13, ich_lr10_el2
+        mrs x26, ich_lr11_el2
+        mrs x1, ich_lr12_el2
+        mrs x8, ich_lr13_el2
+        mrs x2, ich_lr14_el2
+        mrs x8, ich_lr15_el2
+// CHECK: mrs      x8, icc_iar1_el1           // encoding: [0x08,0xcc,0x38,0xd5]
+// CHECK: mrs      x26, icc_iar0_el1          // encoding: [0x1a,0xc8,0x38,0xd5]
+// CHECK: mrs      x2, icc_hppir1_el1         // encoding: [0x42,0xcc,0x38,0xd5]
+// CHECK: mrs      x17, icc_hppir0_el1        // encoding: [0x51,0xc8,0x38,0xd5]
+// CHECK: mrs      x29, icc_rpr_el1           // encoding: [0x7d,0xcb,0x38,0xd5]
+// CHECK: mrs      x4, ich_vtr_el2            // encoding: [0x24,0xcb,0x3c,0xd5]
+// CHECK: mrs      x24, ich_eisr_el2          // encoding: [0x78,0xcb,0x3c,0xd5]
+// CHECK: mrs      x9, ich_elsr_el2           // encoding: [0xa9,0xcb,0x3c,0xd5]
+// CHECK: mrs      x24, icc_bpr1_el1          // encoding: [0x78,0xcc,0x38,0xd5]
+// CHECK: mrs      x14, icc_bpr0_el1          // encoding: [0x6e,0xc8,0x38,0xd5]
+// CHECK: mrs      x19, icc_pmr_el1           // encoding: [0x13,0x46,0x38,0xd5]
+// CHECK: mrs      x23, icc_ctlr_el1          // encoding: [0x97,0xcc,0x38,0xd5]
+// CHECK: mrs      x20, icc_ctlr_el3          // encoding: [0x94,0xcc,0x3e,0xd5]
+// CHECK: mrs      x28, icc_sre_el1           // encoding: [0xbc,0xcc,0x38,0xd5]
+// CHECK: mrs      x25, icc_sre_el2           // encoding: [0xb9,0xc9,0x3c,0xd5]
+// CHECK: mrs      x8, icc_sre_el3            // encoding: [0xa8,0xcc,0x3e,0xd5]
+// CHECK: mrs      x22, icc_igrpen0_el1       // encoding: [0xd6,0xcc,0x38,0xd5]
+// CHECK: mrs      x5, icc_igrpen1_el1        // encoding: [0xe5,0xcc,0x38,0xd5]
+// CHECK: mrs      x7, icc_igrpen1_el3        // encoding: [0xe7,0xcc,0x3e,0xd5]
+// CHECK: mrs      x22, icc_seien_el1         // encoding: [0x16,0xcd,0x38,0xd5]
+// CHECK: mrs      x4, icc_ap0r0_el1          // encoding: [0x84,0xc8,0x38,0xd5]
+// CHECK: mrs      x11, icc_ap0r1_el1         // encoding: [0xab,0xc8,0x38,0xd5]
+// CHECK: mrs      x27, icc_ap0r2_el1         // encoding: [0xdb,0xc8,0x38,0xd5]
+// CHECK: mrs      x21, icc_ap0r3_el1         // encoding: [0xf5,0xc8,0x38,0xd5]
+// CHECK: mrs      x2, icc_ap1r0_el1          // encoding: [0x02,0xc9,0x38,0xd5]
+// CHECK: mrs      x21, icc_ap1r1_el1         // encoding: [0x35,0xc9,0x38,0xd5]
+// CHECK: mrs      x10, icc_ap1r2_el1         // encoding: [0x4a,0xc9,0x38,0xd5]
+// CHECK: mrs      x27, icc_ap1r3_el1         // encoding: [0x7b,0xc9,0x38,0xd5]
+// CHECK: mrs      x20, ich_ap0r0_el2         // encoding: [0x14,0xc8,0x3c,0xd5]
+// CHECK: mrs      x21, ich_ap0r1_el2         // encoding: [0x35,0xc8,0x3c,0xd5]
+// CHECK: mrs      x5, ich_ap0r2_el2          // encoding: [0x45,0xc8,0x3c,0xd5]
+// CHECK: mrs      x4, ich_ap0r3_el2          // encoding: [0x64,0xc8,0x3c,0xd5]
+// CHECK: mrs      x15, ich_ap1r0_el2         // encoding: [0x0f,0xc9,0x3c,0xd5]
+// CHECK: mrs      x12, ich_ap1r1_el2         // encoding: [0x2c,0xc9,0x3c,0xd5]
+// CHECK: mrs      x27, ich_ap1r2_el2         // encoding: [0x5b,0xc9,0x3c,0xd5]
+// CHECK: mrs      x20, ich_ap1r3_el2         // encoding: [0x74,0xc9,0x3c,0xd5]
+// CHECK: mrs      x10, ich_hcr_el2           // encoding: [0x0a,0xcb,0x3c,0xd5]
+// CHECK: mrs      x27, ich_misr_el2          // encoding: [0x5b,0xcb,0x3c,0xd5]
+// CHECK: mrs      x6, ich_vmcr_el2           // encoding: [0xe6,0xcb,0x3c,0xd5]
+// CHECK: mrs      x19, ich_vseir_el2         // encoding: [0x93,0xc9,0x3c,0xd5]
+// CHECK: mrs      x3, ich_lr0_el2            // encoding: [0x03,0xcc,0x3c,0xd5]
+// CHECK: mrs      x1, ich_lr1_el2            // encoding: [0x21,0xcc,0x3c,0xd5]
+// CHECK: mrs      x22, ich_lr2_el2           // encoding: [0x56,0xcc,0x3c,0xd5]
+// CHECK: mrs      x21, ich_lr3_el2           // encoding: [0x75,0xcc,0x3c,0xd5]
+// CHECK: mrs      x6, ich_lr4_el2            // encoding: [0x86,0xcc,0x3c,0xd5]
+// CHECK: mrs      x10, ich_lr5_el2           // encoding: [0xaa,0xcc,0x3c,0xd5]
+// CHECK: mrs      x11, ich_lr6_el2           // encoding: [0xcb,0xcc,0x3c,0xd5]
+// CHECK: mrs      x12, ich_lr7_el2           // encoding: [0xec,0xcc,0x3c,0xd5]
+// CHECK: mrs      x0, ich_lr8_el2            // encoding: [0x00,0xcd,0x3c,0xd5]
+// CHECK: mrs      x21, ich_lr9_el2           // encoding: [0x35,0xcd,0x3c,0xd5]
+// CHECK: mrs      x13, ich_lr10_el2          // encoding: [0x4d,0xcd,0x3c,0xd5]
+// CHECK: mrs      x26, ich_lr11_el2          // encoding: [0x7a,0xcd,0x3c,0xd5]
+// CHECK: mrs      x1, ich_lr12_el2           // encoding: [0x81,0xcd,0x3c,0xd5]
+// CHECK: mrs      x8, ich_lr13_el2           // encoding: [0xa8,0xcd,0x3c,0xd5]
+// CHECK: mrs      x2, ich_lr14_el2           // encoding: [0xc2,0xcd,0x3c,0xd5]
+// CHECK: mrs      x8, ich_lr15_el2           // encoding: [0xe8,0xcd,0x3c,0xd5]
+
+        msr icc_eoir1_el1, x27
+        msr icc_eoir0_el1, x5
+        msr icc_dir_el1, x13
+        msr icc_sgi1r_el1, x21
+        msr icc_asgi1r_el1, x25
+        msr icc_sgi0r_el1, x28
+        msr icc_bpr1_el1, x7
+        msr icc_bpr0_el1, x9
+        msr icc_pmr_el1, x29
+        msr icc_ctlr_el1, x24
+        msr icc_ctlr_el3, x0
+        msr icc_sre_el1, x2
+        msr icc_sre_el2, x5
+        msr icc_sre_el3, x10
+        msr icc_igrpen0_el1, x22
+        msr icc_igrpen1_el1, x11
+        msr icc_igrpen1_el3, x8
+        msr icc_seien_el1, x4
+        msr icc_ap0r0_el1, x27
+        msr icc_ap0r1_el1, x5
+        msr icc_ap0r2_el1, x20
+        msr icc_ap0r3_el1, x0
+        msr icc_ap1r0_el1, x2
+        msr icc_ap1r1_el1, x29
+        msr icc_ap1r2_el1, x23
+        msr icc_ap1r3_el1, x11
+        msr ich_ap0r0_el2, x2
+        msr ich_ap0r1_el2, x27
+        msr ich_ap0r2_el2, x7
+        msr ich_ap0r3_el2, x1
+        msr ich_ap1r0_el2, x7
+        msr ich_ap1r1_el2, x12
+        msr ich_ap1r2_el2, x14
+        msr ich_ap1r3_el2, x13
+        msr ich_hcr_el2, x1
+        msr ich_misr_el2, x10
+        msr ich_vmcr_el2, x24
+        msr ich_vseir_el2, x29
+        msr ich_lr0_el2, x26
+        msr ich_lr1_el2, x9
+        msr ich_lr2_el2, x18
+        msr ich_lr3_el2, x26
+        msr ich_lr4_el2, x22
+        msr ich_lr5_el2, x26
+        msr ich_lr6_el2, x27
+        msr ich_lr7_el2, x8
+        msr ich_lr8_el2, x17
+        msr ich_lr9_el2, x19
+        msr ich_lr10_el2, x17
+        msr ich_lr11_el2, x5
+        msr ich_lr12_el2, x29
+        msr ich_lr13_el2, x2
+        msr ich_lr14_el2, x13
+        msr ich_lr15_el2, x27
+// CHECK: msr      icc_eoir1_el1, x27         // encoding: [0x3b,0xcc,0x18,0xd5]
+// CHECK: msr      icc_eoir0_el1, x5          // encoding: [0x25,0xc8,0x18,0xd5]
+// CHECK: msr      icc_dir_el1, x13           // encoding: [0x2d,0xcb,0x18,0xd5]
+// CHECK: msr      icc_sgi1r_el1, x21         // encoding: [0xb5,0xcb,0x18,0xd5]
+// CHECK: msr      icc_asgi1r_el1, x25        // encoding: [0xd9,0xcb,0x18,0xd5]
+// CHECK: msr      icc_sgi0r_el1, x28         // encoding: [0xfc,0xcb,0x18,0xd5]
+// CHECK: msr      icc_bpr1_el1, x7           // encoding: [0x67,0xcc,0x18,0xd5]
+// CHECK: msr      icc_bpr0_el1, x9           // encoding: [0x69,0xc8,0x18,0xd5]
+// CHECK: msr      icc_pmr_el1, x29           // encoding: [0x1d,0x46,0x18,0xd5]
+// CHECK: msr      icc_ctlr_el1, x24          // encoding: [0x98,0xcc,0x18,0xd5]
+// CHECK: msr      icc_ctlr_el3, x0           // encoding: [0x80,0xcc,0x1e,0xd5]
+// CHECK: msr      icc_sre_el1, x2            // encoding: [0xa2,0xcc,0x18,0xd5]
+// CHECK: msr      icc_sre_el2, x5            // encoding: [0xa5,0xc9,0x1c,0xd5]
+// CHECK: msr      icc_sre_el3, x10           // encoding: [0xaa,0xcc,0x1e,0xd5]
+// CHECK: msr      icc_igrpen0_el1, x22       // encoding: [0xd6,0xcc,0x18,0xd5]
+// CHECK: msr      icc_igrpen1_el1, x11       // encoding: [0xeb,0xcc,0x18,0xd5]
+// CHECK: msr      icc_igrpen1_el3, x8        // encoding: [0xe8,0xcc,0x1e,0xd5]
+// CHECK: msr      icc_seien_el1, x4          // encoding: [0x04,0xcd,0x18,0xd5]
+// CHECK: msr      icc_ap0r0_el1, x27         // encoding: [0x9b,0xc8,0x18,0xd5]
+// CHECK: msr      icc_ap0r1_el1, x5          // encoding: [0xa5,0xc8,0x18,0xd5]
+// CHECK: msr      icc_ap0r2_el1, x20         // encoding: [0xd4,0xc8,0x18,0xd5]
+// CHECK: msr      icc_ap0r3_el1, x0          // encoding: [0xe0,0xc8,0x18,0xd5]
+// CHECK: msr      icc_ap1r0_el1, x2          // encoding: [0x02,0xc9,0x18,0xd5]
+// CHECK: msr      icc_ap1r1_el1, x29         // encoding: [0x3d,0xc9,0x18,0xd5]
+// CHECK: msr      icc_ap1r2_el1, x23         // encoding: [0x57,0xc9,0x18,0xd5]
+// CHECK: msr      icc_ap1r3_el1, x11         // encoding: [0x6b,0xc9,0x18,0xd5]
+// CHECK: msr      ich_ap0r0_el2, x2          // encoding: [0x02,0xc8,0x1c,0xd5]
+// CHECK: msr      ich_ap0r1_el2, x27         // encoding: [0x3b,0xc8,0x1c,0xd5]
+// CHECK: msr      ich_ap0r2_el2, x7          // encoding: [0x47,0xc8,0x1c,0xd5]
+// CHECK: msr      ich_ap0r3_el2, x1          // encoding: [0x61,0xc8,0x1c,0xd5]
+// CHECK: msr      ich_ap1r0_el2, x7          // encoding: [0x07,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_ap1r1_el2, x12         // encoding: [0x2c,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_ap1r2_el2, x14         // encoding: [0x4e,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_ap1r3_el2, x13         // encoding: [0x6d,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_hcr_el2, x1            // encoding: [0x01,0xcb,0x1c,0xd5]
+// CHECK: msr      ich_misr_el2, x10          // encoding: [0x4a,0xcb,0x1c,0xd5]
+// CHECK: msr      ich_vmcr_el2, x24          // encoding: [0xf8,0xcb,0x1c,0xd5]
+// CHECK: msr      ich_vseir_el2, x29         // encoding: [0x9d,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_lr0_el2, x26           // encoding: [0x1a,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr1_el2, x9            // encoding: [0x29,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr2_el2, x18           // encoding: [0x52,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr3_el2, x26           // encoding: [0x7a,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr4_el2, x22           // encoding: [0x96,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr5_el2, x26           // encoding: [0xba,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr6_el2, x27           // encoding: [0xdb,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr7_el2, x8            // encoding: [0xe8,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr8_el2, x17           // encoding: [0x11,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr9_el2, x19           // encoding: [0x33,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr10_el2, x17          // encoding: [0x51,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr11_el2, x5           // encoding: [0x65,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr12_el2, x29          // encoding: [0x9d,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr13_el2, x2           // encoding: [0xa2,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr14_el2, x13          // encoding: [0xcd,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr15_el2, x27          // encoding: [0xfb,0xcd,0x1c,0xd5]
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
index 690fa8c00962..d0e336ecaf23 100644
--- a/test/MC/AArch64/tls-relocs.s
+++ b/test/MC/AArch64/tls-relocs.s
@@ -1,9 +1,6 @@
-// RUN: llvm-mc -arch=aarch64 -show-encoding < %s | FileCheck %s
-// RUN: llvm-mc -arch=aarch64 -filetype=obj < %s -o %t
-// RUN: elf-dump %t | FileCheck --check-prefix=CHECK-ELF %s
-// RUN: llvm-objdump -r %t | FileCheck --check-prefix=CHECK-ELF-NAMES %s
-
-// CHECK-ELF:  .rela.text
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s -o - | \
+// RUN:   llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s
 
         // TLS local-dynamic forms
         movz x1, #:dtprel_g2:var
@@ -12,34 +9,20 @@
         movn x4, #:dtprel_g2:var
 // CHECK: movz    x1, #:dtprel_g2:var     // encoding: [0x01'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK-NEXT: movn    x2, #:dtprel_g2:var     // encoding: [0x02'A',A,0xc0'A',0x92'A']
+// CHECK: movn    x2, #:dtprel_g2:var     // encoding: [0x02'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK-NEXT: movz    x3, #:dtprel_g2:var     // encoding: [0x03'A',A,0xc0'A',0x92'A']
+// CHECK: movz    x3, #:dtprel_g2:var     // encoding: [0x03'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK-NEXT: movn    x4, #:dtprel_g2:var     // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK: movn    x4, #:dtprel_g2:var     // encoding: [0x04'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
 
-// CHECK-ELF: # Relocation 0
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000000)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM:0x[0-9a-f]+]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
-// CHECK-ELF: # Relocation 1
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000004)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
-// CHECK-ELF: # Relocation 2
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000008)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
-// CHECK-ELF: # Relocation 3
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000000c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
-
-// CHECK-ELF-NAMES: 0 R_AARCH64_TLSLD_MOVW_DTPREL_G2
-// CHECK-ELF-NAMES: 4 R_AARCH64_TLSLD_MOVW_DTPREL_G2
-// CHECK-ELF-NAMES: 8 R_AARCH64_TLSLD_MOVW_DTPREL_G2
-// CHECK-ELF-NAMES: 12 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF:      Relocations [
+// CHECK-ELF-NEXT:   Section (1) .text {
+// CHECK-ELF-NEXT:     0x0 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM:[^ ]+]]
+// CHECK-ELF-NEXT:     0x4 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x8 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xC R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
+
 
         movz x5, #:dtprel_g1:var
         movn x6, #:dtprel_g1:var
@@ -54,46 +37,22 @@
 // CHECK-NEXT: movn    w8, #:dtprel_g1:var     // encoding: [0x08'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
 
-// CHECK-ELF: # Relocation 4
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000010)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
-// CHECK-ELF: # Relocation 5
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000014)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
-// CHECK-ELF: # Relocation 6
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000018)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
-// CHECK-ELF: # Relocation 7
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000001c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
-
-// CHECK-ELF-NAMES: 16 R_AARCH64_TLSLD_MOVW_DTPREL_G1
-// CHECK-ELF-NAMES: 20 R_AARCH64_TLSLD_MOVW_DTPREL_G1
-// CHECK-ELF-NAMES: 24 R_AARCH64_TLSLD_MOVW_DTPREL_G1
-// CHECK-ELF-NAMES: 28 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NEXT:     0x10 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x14 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x18 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x1C R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+
 
         movk x9, #:dtprel_g1_nc:var
         movk w10, #:dtprel_g1_nc:var
 // CHECK: movk    x9, #:dtprel_g1_nc:var  // encoding: [0x09'A',A,0xa0'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
-// CHECK-NEXT: movk    w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK: movk    w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
 
-// CHECK-ELF: # Relocation 8
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000020)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
-// CHECK-ELF: # Relocation 9
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000024)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
+// CHECK-ELF-NEXT:     0x20 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x24 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 32 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
-// CHECK-ELF-NAMES: 36 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
 
         movz x11, #:dtprel_g0:var
         movn x12, #:dtprel_g0:var
@@ -101,275 +60,156 @@
         movn w14, #:dtprel_g0:var
 // CHECK: movz    x11, #:dtprel_g0:var    // encoding: [0x0b'A',A,0x80'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK-NEXT: movn    x12, #:dtprel_g0:var    // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK: movn    x12, #:dtprel_g0:var    // encoding: [0x0c'A',A,0x80'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK-NEXT: movz    w13, #:dtprel_g0:var    // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK: movz    w13, #:dtprel_g0:var    // encoding: [0x0d'A',A,0x80'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK-NEXT: movn    w14, #:dtprel_g0:var    // encoding: [0x0e'A',A,0x80'A',0x12'A']
-
-
-// CHECK-ELF: # Relocation 10
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000028)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
-// CHECK-ELF: # Relocation 11
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000002c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
-// CHECK-ELF: # Relocation 12
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000030)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
-// CHECK-ELF: # Relocation 13
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000034)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
-
-// CHECK-ELF-NAMES: 40 R_AARCH64_TLSLD_MOVW_DTPREL_G0
-// CHECK-ELF-NAMES: 44 R_AARCH64_TLSLD_MOVW_DTPREL_G0
-// CHECK-ELF-NAMES: 48 R_AARCH64_TLSLD_MOVW_DTPREL_G0
-// CHECK-ELF-NAMES: 52 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK: movn    w14, #:dtprel_g0:var    // encoding: [0x0e'A',A,0x80'A',0x12'A']
+
+// CHECK-ELF-NEXT:     0x28 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x2C R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x30 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x34 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
 
 
         movk x15, #:dtprel_g0_nc:var
         movk w16, #:dtprel_g0_nc:var
 // CHECK: movk    x15, #:dtprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
-// CHECK-NEXT: movk    w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK: movk    w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
 
-// CHECK-ELF: # Relocation 14
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000038)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
-// CHECK-ELF: # Relocation 15
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000003c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
+// CHECK-ELF-NEXT:     0x38 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x3C R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 56 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
-// CHECK-ELF-NAMES: 60 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
 
         add x17, x18, #:dtprel_hi12:var, lsl #12
         add w19, w20, #:dtprel_hi12:var, lsl #12
 // CHECK: add     x17, x18, #:dtprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
-// CHECK-NEXT: add     w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK: add     w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
 
-// CHECK-ELF: # Relocation 16
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000040)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
-// CHECK-ELF: # Relocation 17
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000044)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
-
-// CHECK-ELF-NAMES: 64 R_AARCH64_TLSLD_ADD_DTPREL_HI12
-// CHECK-ELF-NAMES: 68 R_AARCH64_TLSLD_ADD_DTPREL_HI12
+// CHECK-ELF-NEXT:     0x40 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x44 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]]
 
 
         add x21, x22, #:dtprel_lo12:var
         add w23, w24, #:dtprel_lo12:var
 // CHECK: add     x21, x22, #:dtprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
-// CHECK-NEXT: add     w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK: add     w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
 
-// CHECK-ELF: # Relocation 18
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000048)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
-// CHECK-ELF: # Relocation 19
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000004c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
+// CHECK-ELF-NEXT:     0x48 R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x4C R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 72 R_AARCH64_TLSLD_ADD_DTPREL_LO12
-// CHECK-ELF-NAMES: 76 R_AARCH64_TLSLD_ADD_DTPREL_LO12
 
         add x25, x26, #:dtprel_lo12_nc:var
         add w27, w28, #:dtprel_lo12_nc:var
 // CHECK: add     x25, x26, #:dtprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
-// CHECK-NEXT: add     w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK: add     w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
 
-// CHECK-ELF: # Relocation 20
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000050)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
-// CHECK-ELF: # Relocation 21
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000054)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
+// CHECK-ELF-NEXT:     0x50 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x54 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 80 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
-// CHECK-ELF-NAMES: 84 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
 
         ldrb w29, [x30, #:dtprel_lo12:var]
         ldrsb x29, [x28, #:dtprel_lo12_nc:var]
 // CHECK: ldrb    w29, [x30, #:dtprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst8_dtprel_lo12
-// CHECK-NEXT: ldrsb   x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK: ldrsb   x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst8_dtprel_lo12_nc
 
-// CHECK-ELF: # Relocation 22
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000058)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000213)
-// CHECK-ELF: # Relocation 23
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000005c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000214)
+// CHECK-ELF-NEXT:     0x58 R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x5C R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 88 R_AARCH64_TLSLD_LDST8_DTPREL_LO12
-// CHECK-ELF-NAMES: 92 R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC
 
         strh w27, [x26, #:dtprel_lo12:var]
         ldrsh x25, [x24, #:dtprel_lo12_nc:var]
 // CHECK: strh    w27, [x26, #:dtprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst16_dtprel_lo12
-// CHECK-NEXT: ldrsh   x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK: ldrsh   x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst16_dtprel_lo12_n
 
-// CHECK-ELF: # Relocation 24
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000060)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000215)
-// CHECK-ELF: # Relocation 25
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000064)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000216)
+// CHECK-ELF-NEXT:     0x60 R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 96 R_AARCH64_TLSLD_LDST16_DTPREL_LO12
-// CHECK-ELF-NAMES: 100 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC
 
         ldr w23, [x22, #:dtprel_lo12:var]
         ldrsw x21, [x20, #:dtprel_lo12_nc:var]
 // CHECK: ldr     w23, [x22, #:dtprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst32_dtprel_lo12
-// CHECK-NEXT: ldrsw   x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK: ldrsw   x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst32_dtprel_lo12_n
 
-// CHECK-ELF: # Relocation 26
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000068)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000217)
-// CHECK-ELF: # Relocation 27
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000006c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000218)
+// CHECK-ELF-NEXT:     0x68 R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x6C R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 104 R_AARCH64_TLSLD_LDST32_DTPREL_LO12
-// CHECK-ELF-NAMES: 108 R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC
 
         ldr x19, [x18, #:dtprel_lo12:var]
         str x17, [x16, #:dtprel_lo12_nc:var]
 // CHECK: ldr     x19, [x18, #:dtprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst64_dtprel_lo12
-// CHECK-NEXT: str     x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK: str     x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst64_dtprel_lo12_nc
 
 
-// CHECK-ELF: # Relocation 28
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000070)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000219)
-// CHECK-ELF: # Relocation 29
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000074)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021a)
+// CHECK-ELF-NEXT:     0x70 R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x74 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 112 R_AARCH64_TLSLD_LDST64_DTPREL_LO12
-// CHECK-ELF-NAMES: 116 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC
 
         // TLS initial-exec forms
         movz x15, #:gottprel_g1:var
         movz w14, #:gottprel_g1:var
 // CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0x0f'A',A,0xa0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
-// CHECK-NEXT: movz    w14, #:gottprel_g1:var  // encoding: [0x0e'A',A,0xa0'A',0x12'A']
+// CHECK: movz    w14, #:gottprel_g1:var  // encoding: [0x0e'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
 
-// CHECK-ELF: # Relocation 30
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000078)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
-// CHECK-ELF: # Relocation 31
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000007c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
+// CHECK-ELF-NEXT:     0x78 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x7C R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 120 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
-// CHECK-ELF-NAMES: 124 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
 
         movk x13, #:gottprel_g0_nc:var
         movk w12, #:gottprel_g0_nc:var
 // CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0x0d'A',A,0x80'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
-// CHECK-NEXT: movk    w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A']
+// CHECK: movk    w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
 
-// CHECK-ELF: # Relocation 32
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000080)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
-// CHECK-ELF: # Relocation 33
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000084)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
+// CHECK-ELF-NEXT:     0x80 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x84 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 128 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
-// CHECK-ELF-NAMES: 132 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
 
         adrp x11, :gottprel:var
         ldr x10, [x0, #:gottprel_lo12:var]
         ldr x9, :gottprel:var
 // CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_adr_gottprel_page
-// CHECK-NEXT: ldr     x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A']
+// CHECK: ldr     x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_a64_ld64_gottprel_lo12_nc
-// CHECK-NEXT: ldr     x9, :gottprel:var       // encoding: [0x09'A',A,A,0x58'A']
+// CHECK: ldr     x9, :gottprel:var       // encoding: [0x09'A',A,A,0x58'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_ld_gottprel_prel19
 
-// CHECK-ELF: # Relocation 34
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000088)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021d)
-// CHECK-ELF: # Relocation 35
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000008c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021e)
-// CHECK-ELF: # Relocation 36
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000090)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021f)
-
-// CHECK-ELF-NAMES: 136 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE
-// CHECK-ELF-NAMES: 140 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
-// CHECK-ELF-NAMES: 144 R_AARCH64_TLSIE_LD_GOTTPREL_PREL19
+// CHECK-ELF-NEXT:     0x88 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x8C R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x90 R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 [[VARSYM]]
+
 
         // TLS local-exec forms
         movz x3, #:tprel_g2:var
         movn x4, #:tprel_g2:var
 // CHECK: movz    x3, #:tprel_g2:var      // encoding: [0x03'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
-// CHECK-NEXT: movn    x4, #:tprel_g2:var      // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK: movn    x4, #:tprel_g2:var      // encoding: [0x04'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
 
-// CHECK-ELF: # Relocation 37
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000094)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
-// CHECK-ELF: # Relocation 38
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000098)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
+// CHECK-ELF-NEXT:     0x94 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x98 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 148 R_AARCH64_TLSLE_MOVW_TPREL_G2
-// CHECK-ELF-NAMES: 152 R_AARCH64_TLSLE_MOVW_TPREL_G2
 
         movz x5, #:tprel_g1:var
         movn x6, #:tprel_g1:var
@@ -377,53 +217,29 @@
         movn w8, #:tprel_g1:var
 // CHECK: movz    x5, #:tprel_g1:var      // encoding: [0x05'A',A,0xa0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK-NEXT: movn    x6, #:tprel_g1:var      // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK: movn    x6, #:tprel_g1:var      // encoding: [0x06'A',A,0xa0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK-NEXT: movz    w7, #:tprel_g1:var      // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK: movz    w7, #:tprel_g1:var      // encoding: [0x07'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK-NEXT: movn    w8, #:tprel_g1:var      // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK: movn    w8, #:tprel_g1:var      // encoding: [0x08'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
 
-// CHECK-ELF: # Relocation 39
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000009c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
-// CHECK-ELF: # Relocation 40
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
-// CHECK-ELF: # Relocation 41
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
-// CHECK-ELF: # Relocation 42
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
-
-// CHECK-ELF-NAMES: 156 R_AARCH64_TLSLE_MOVW_TPREL_G1
-// CHECK-ELF-NAMES: 160 R_AARCH64_TLSLE_MOVW_TPREL_G1
-// CHECK-ELF-NAMES: 164 R_AARCH64_TLSLE_MOVW_TPREL_G1
-// CHECK-ELF-NAMES: 168 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NEXT:     0x9C R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xA0 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xA4 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xA8 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+
 
         movk x9, #:tprel_g1_nc:var
         movk w10, #:tprel_g1_nc:var
 // CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0x09'A',A,0xa0'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
-// CHECK-NEXT: movk    w10, #:tprel_g1_nc:var  // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK: movk    w10, #:tprel_g1_nc:var  // encoding: [0x0a'A',A,0xa0'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
 
-// CHECK-ELF: # Relocation 43
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ac)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
-// CHECK-ELF: # Relocation 44
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
+// CHECK-ELF-NEXT:     0xAC R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0xB0 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 172 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
-// CHECK-ELF-NAMES: 176 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
 
         movz x11, #:tprel_g0:var
         movn x12, #:tprel_g0:var
@@ -431,187 +247,104 @@
         movn w14, #:tprel_g0:var
 // CHECK: movz    x11, #:tprel_g0:var     // encoding: [0x0b'A',A,0x80'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK-NEXT: movn    x12, #:tprel_g0:var     // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK: movn    x12, #:tprel_g0:var     // encoding: [0x0c'A',A,0x80'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK-NEXT: movz    w13, #:tprel_g0:var     // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK: movz    w13, #:tprel_g0:var     // encoding: [0x0d'A',A,0x80'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK-NEXT: movn    w14, #:tprel_g0:var     // encoding: [0x0e'A',A,0x80'A',0x12'A']
+// CHECK: movn    w14, #:tprel_g0:var     // encoding: [0x0e'A',A,0x80'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
 
-// CHECK-ELF: # Relocation 45
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
-// CHECK-ELF: # Relocation 46
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
-// CHECK-ELF: # Relocation 47
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000bc)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
-// CHECK-ELF: # Relocation 48
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
-
-// CHECK-ELF-NAMES: 180 R_AARCH64_TLSLE_MOVW_TPREL_G0
-// CHECK-ELF-NAMES: 184 R_AARCH64_TLSLE_MOVW_TPREL_G0
-// CHECK-ELF-NAMES: 188 R_AARCH64_TLSLE_MOVW_TPREL_G0
-// CHECK-ELF-NAMES: 192 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NEXT:     0xB4 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xB8 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xBC R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xC0 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+
 
         movk x15, #:tprel_g0_nc:var
         movk w16, #:tprel_g0_nc:var
 // CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0x0f'A',A,0x80'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
-// CHECK-NEXT: movk    w16, #:tprel_g0_nc:var  // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK: movk    w16, #:tprel_g0_nc:var  // encoding: [0x10'A',A,0x80'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
 
-// CHECK-ELF: # Relocation 49
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
-// CHECK-ELF: # Relocation 50
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
+// CHECK-ELF-NEXT:     0xC4 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0xC8 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 196 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
-// CHECK-ELF-NAMES: 200 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
 
         add x17, x18, #:tprel_hi12:var, lsl #12
         add w19, w20, #:tprel_hi12:var, lsl #12
 // CHECK: add     x17, x18, #:tprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
-// CHECK-NEXT: add     w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK: add     w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
 
-// CHECK-ELF: # Relocation 51
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000cc)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
-// CHECK-ELF: # Relocation 52
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
+// CHECK-ELF-NEXT:     0xCC R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xD0 R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 204 R_AARCH64_TLSLE_ADD_TPREL_HI12
-// CHECK-ELF-NAMES: 208 R_AARCH64_TLSLE_ADD_TPREL_HI12
 
         add x21, x22, #:tprel_lo12:var
         add w23, w24, #:tprel_lo12:var
 // CHECK: add     x21, x22, #:tprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
-// CHECK-NEXT: add     w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK: add     w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
 
-// CHECK-ELF: # Relocation 53
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
-// CHECK-ELF: # Relocation 54
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
+// CHECK-ELF-NEXT:     0xD4 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xD8 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 212 R_AARCH64_TLSLE_ADD_TPREL_LO12
-// CHECK-ELF-NAMES: 216 R_AARCH64_TLSLE_ADD_TPREL_LO12
 
         add x25, x26, #:tprel_lo12_nc:var
         add w27, w28, #:tprel_lo12_nc:var
 // CHECK: add     x25, x26, #:tprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
-// CHECK-NEXT: add     w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK: add     w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
 
-// CHECK-ELF: # Relocation 55
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000dc)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
-// CHECK-ELF: # Relocation 56
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
-
+// CHECK-ELF-NEXT:     0xDC R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0xE0 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 220 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
-// CHECK-ELF-NAMES: 224 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
 
         ldrb w29, [x30, #:tprel_lo12:var]
         ldrsb x29, [x28, #:tprel_lo12_nc:var]
 // CHECK: ldrb    w29, [x30, #:tprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst8_tprel_lo12
-// CHECK-NEXT: ldrsb   x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK: ldrsb   x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst8_tprel_lo12_nc
 
-// CHECK-ELF: # Relocation 57
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000228)
-// CHECK-ELF: # Relocation 58
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000229)
+// CHECK-ELF-NEXT:     0xE4 R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xE8 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 228 R_AARCH64_TLSLE_LDST8_TPREL_LO12
-// CHECK-ELF-NAMES: 232 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC
 
         strh w27, [x26, #:tprel_lo12:var]
         ldrsh x25, [x24, #:tprel_lo12_nc:var]
 // CHECK: strh    w27, [x26, #:tprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst16_tprel_lo12
-// CHECK-NEXT: ldrsh   x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK: ldrsh   x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst16_tprel_lo12_n
 
-// CHECK-ELF: # Relocation 59
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ec)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022a)
-// CHECK-ELF: # Relocation 60
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022b)
+// CHECK-ELF-NEXT:     0xEC R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xF0 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 236 R_AARCH64_TLSLE_LDST16_TPREL_LO12
-// CHECK-ELF-NAMES: 240 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC
 
         ldr w23, [x22, #:tprel_lo12:var]
         ldrsw x21, [x20, #:tprel_lo12_nc:var]
 // CHECK: ldr     w23, [x22, #:tprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst32_tprel_lo12
-// CHECK-NEXT: ldrsw   x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK: ldrsw   x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst32_tprel_lo12_n
 
-// CHECK-ELF: # Relocation 61
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022c)
-// CHECK-ELF: # Relocation 62
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022d)
-
-// CHECK-ELF-NAMES: 244 R_AARCH64_TLSLE_LDST32_TPREL_LO12
-// CHECK-ELF-NAMES: 248 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC
+// CHECK-ELF-NEXT:     0xF4 R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xF8 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
 
         ldr x19, [x18, #:tprel_lo12:var]
         str x17, [x16, #:tprel_lo12_nc:var]
 // CHECK: ldr     x19, [x18, #:tprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst64_tprel_lo12
-// CHECK-NEXT: str     x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK: str     x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst64_tprel_lo12_nc
 
-// CHECK-ELF: # Relocation 63
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000fc)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022e)
-// CHECK-ELF: # Relocation 64
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000100)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022f)
-
-// CHECK-ELF-NAMES: 252 R_AARCH64_TLSLE_LDST64_TPREL_LO12
-// CHECK-ELF-NAMES: 256 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC
+// CHECK-ELF-NEXT:     0xFC  R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x100 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
 
         // TLS descriptor forms
         adrp x8, :tlsdesc:var
@@ -622,41 +355,27 @@
 
 // CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_adr_page
-// CHECK-NEXT: ldr     x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A']
+// CHECK: ldr     x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_ld64_lo12_nc
-// CHECK-NEXT: add     x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A']
+// CHECK: add     x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_add_lo12_nc
-// CHECK-NEXT: .tlsdesccall var                // encoding: []
+// CHECK: .tlsdesccall var                // encoding: []
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_call
 // CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
 
 
-// CHECK-ELF: # Relocation 65
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000104)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000232)
-// CHECK-ELF: # Relocation 66
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000108)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000233)
-// CHECK-ELF: # Relocation 67
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000010c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000234)
-// CHECK-ELF: # Relocation 68
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000110)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000239)
-
-// CHECK-ELF-NAMES: 260 R_AARCH64_TLSDESC_ADR_PAGE
-// CHECK-ELF-NAMES: 264 R_AARCH64_TLSDESC_LD64_LO12_NC
-// CHECK-ELF-NAMES: 268 R_AARCH64_TLSDESC_ADD_LO12_NC
-// CHECK-ELF-NAMES: 272 R_AARCH64_TLSDESC_CALL
+// CHECK-ELF-NEXT:     0x104 R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]]
+// CHECK-ELF-NEXT:     0x108 R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x10C R_AARCH64_TLSDESC_ADD_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x110 R_AARCH64_TLSDESC_CALL [[VARSYM]]
 
 
 // Make sure symbol 5 has type STT_TLS:
 
-// CHECK-ELF: # Symbol 5
-// CHECK-ELF-NEXT: (('st_name', 0x00000006) # 'var'
-// CHECK-ELF-NEXT:  ('st_bind', 0x1)
-// CHECK-ELF-NEXT:  ('st_type', 0x6)
+// CHECK-ELF:      Symbols [
+// CHECK-ELF:        Symbol {
+// CHECK-ELF:          Name: var (6)
+// CHECK-ELF-NEXT:     Value:
+// CHECK-ELF-NEXT:     Size:
+// CHECK-ELF-NEXT:     Binding: Global
+// CHECK-ELF-NEXT:     Type: TLS
diff --git a/test/MC/AArch64/trace-regs-diagnostics.s b/test/MC/AArch64/trace-regs-diagnostics.s
new file mode 100644
index 000000000000..82ec7c0c745d
--- /dev/null
+++ b/test/MC/AArch64/trace-regs-diagnostics.s
@@ -0,0 +1,156 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
+        // Write-only
+        mrs x12, trcoslar
+        mrs x10, trclar
+// CHECK: error: expected readable system register
+// CHECK-NEXT:         mrs x12, trcoslar
+// CHECK-NEXT:                  ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x10, trclar
+// CHECK-NEXT:                  ^
+
+        // Read-only
+        msr trcstatr, x0
+        msr trcidr8, x13
+        msr trcidr9, x25
+        msr trcidr10, x2
+        msr trcidr11, x19
+        msr trcidr12, x15
+        msr trcidr13, x24
+        msr trcidr0, x20
+        msr trcidr1, x5
+        msr trcidr2, x18
+        msr trcidr3, x10
+        msr trcidr4, x1
+        msr trcidr5, x10
+        msr trcidr6, x4
+        msr trcidr7, x0
+        msr trcoslsr, x23
+        msr trcpdsr, x21
+        msr trcdevaff0, x4
+        msr trcdevaff1, x17
+        msr trclsr, x18
+        msr trcauthstatus, x10
+        msr trcdevarch, x8
+        msr trcdevid, x11
+        msr trcdevtype, x1
+        msr trcpidr4, x2
+        msr trcpidr5, x7
+        msr trcpidr6, x17
+        msr trcpidr7, x5
+        msr trcpidr0, x0
+        msr trcpidr1, x16
+        msr trcpidr2, x29
+        msr trcpidr3, x1
+        msr trccidr0, x27
+        msr trccidr1, x1
+        msr trccidr2, x24
+        msr trccidr3, x8
+// CHECK: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcstatr, x0
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr8, x13
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr9, x25
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr10, x2
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr11, x19
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr12, x15
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr13, x24
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr0, x20
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr1, x5
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr2, x18
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr3, x10
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr4, x1
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr5, x10
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr6, x4
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr7, x0
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcoslsr, x23
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpdsr, x21
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevaff0, x4
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevaff1, x17
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trclsr, x18
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcauthstatus, x10
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevarch, x8
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevid, x11
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevtype, x1
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr4, x2
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr5, x7
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr6, x17
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr7, x5
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr0, x0
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr1, x16
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr2, x29
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr3, x1
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trccidr0, x27
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trccidr1, x1
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trccidr2, x24
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trccidr3, x8
+// CHECK-NEXT:             ^
diff --git a/test/MC/AArch64/trace-regs.s b/test/MC/AArch64/trace-regs.s
new file mode 100644
index 000000000000..f9ab4c9ad975
--- /dev/null
+++ b/test/MC/AArch64/trace-regs.s
@@ -0,0 +1,766 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+        mrs x8, trcstatr
+        mrs x9, trcidr8
+        mrs x11, trcidr9
+        mrs x25, trcidr10
+        mrs x7, trcidr11
+        mrs x7, trcidr12
+        mrs x6, trcidr13
+        mrs x27, trcidr0
+        mrs x29, trcidr1
+        mrs x4, trcidr2
+        mrs x8, trcidr3
+        mrs x15, trcidr4
+        mrs x20, trcidr5
+        mrs x6, trcidr6
+        mrs x6, trcidr7
+        mrs x24, trcoslsr
+        mrs x18, trcpdsr
+        mrs x28, trcdevaff0
+        mrs x5, trcdevaff1
+        mrs x5, trclsr
+        mrs x11, trcauthstatus
+        mrs x13, trcdevarch
+        mrs x18, trcdevid
+        mrs x22, trcdevtype
+        mrs x14, trcpidr4
+        mrs x5, trcpidr5
+        mrs x5, trcpidr6
+        mrs x9, trcpidr7
+        mrs x15, trcpidr0
+        mrs x6, trcpidr1
+        mrs x11, trcpidr2
+        mrs x20, trcpidr3
+        mrs x17, trccidr0
+        mrs x2, trccidr1
+        mrs x20, trccidr2
+        mrs x4, trccidr3
+        mrs x11, trcprgctlr
+        mrs x23, trcprocselr
+        mrs x13, trcconfigr
+        mrs x23, trcauxctlr
+        mrs x9, trceventctl0r
+        mrs x16, trceventctl1r
+        mrs x4, trcstallctlr
+        mrs x14, trctsctlr
+        mrs x24, trcsyncpr
+        mrs x28, trcccctlr
+        mrs x15, trcbbctlr
+        mrs x1, trctraceidr
+        mrs x20, trcqctlr
+        mrs x2, trcvictlr
+        mrs x12, trcviiectlr
+        mrs x16, trcvissctlr
+        mrs x8, trcvipcssctlr
+        mrs x27, trcvdctlr
+        mrs x9, trcvdsacctlr
+        mrs x0, trcvdarcctlr
+        mrs x13, trcseqevr0
+        mrs x11, trcseqevr1
+        mrs x26, trcseqevr2
+        mrs x14, trcseqrstevr
+        mrs x4, trcseqstr
+        mrs x17, trcextinselr
+        mrs x21, trccntrldvr0
+        mrs x10, trccntrldvr1
+        mrs x20, trccntrldvr2
+        mrs x5, trccntrldvr3
+        mrs x17, trccntctlr0
+        mrs x1, trccntctlr1
+        mrs x17, trccntctlr2
+        mrs x6, trccntctlr3
+        mrs x28, trccntvr0
+        mrs x23, trccntvr1
+        mrs x9, trccntvr2
+        mrs x6, trccntvr3
+        mrs x24, trcimspec0
+        mrs x24, trcimspec1
+        mrs x15, trcimspec2
+        mrs x10, trcimspec3
+        mrs x29, trcimspec4
+        mrs x18, trcimspec5
+        mrs x29, trcimspec6
+        mrs x2, trcimspec7
+        mrs x8, trcrsctlr2
+        mrs x0, trcrsctlr3
+        mrs x12, trcrsctlr4
+        mrs x26, trcrsctlr5
+        mrs x29, trcrsctlr6
+        mrs x17, trcrsctlr7
+        mrs x0, trcrsctlr8
+        mrs x1, trcrsctlr9
+        mrs x17, trcrsctlr10
+        mrs x21, trcrsctlr11
+        mrs x1, trcrsctlr12
+        mrs x8, trcrsctlr13
+        mrs x24, trcrsctlr14
+        mrs x0, trcrsctlr15
+        mrs x2, trcrsctlr16
+        mrs x29, trcrsctlr17
+        mrs x22, trcrsctlr18
+        mrs x6, trcrsctlr19
+        mrs x26, trcrsctlr20
+        mrs x26, trcrsctlr21
+        mrs x4, trcrsctlr22
+        mrs x12, trcrsctlr23
+        mrs x1, trcrsctlr24
+        mrs x0, trcrsctlr25
+        mrs x17, trcrsctlr26
+        mrs x8, trcrsctlr27
+        mrs x10, trcrsctlr28
+        mrs x25, trcrsctlr29
+        mrs x12, trcrsctlr30
+        mrs x11, trcrsctlr31
+        mrs x18, trcssccr0
+        mrs x12, trcssccr1
+        mrs x3, trcssccr2
+        mrs x2, trcssccr3
+        mrs x21, trcssccr4
+        mrs x10, trcssccr5
+        mrs x22, trcssccr6
+        mrs x23, trcssccr7
+        mrs x23, trcsscsr0
+        mrs x19, trcsscsr1
+        mrs x25, trcsscsr2
+        mrs x17, trcsscsr3
+        mrs x19, trcsscsr4
+        mrs x11, trcsscsr5
+        mrs x5, trcsscsr6
+        mrs x9, trcsscsr7
+        mrs x1, trcsspcicr0
+        mrs x12, trcsspcicr1
+        mrs x21, trcsspcicr2
+        mrs x11, trcsspcicr3
+        mrs x3, trcsspcicr4
+        mrs x9, trcsspcicr5
+        mrs x5, trcsspcicr6
+        mrs x2, trcsspcicr7
+        mrs x26, trcpdcr
+        mrs x8, trcacvr0
+        mrs x15, trcacvr1
+        mrs x19, trcacvr2
+        mrs x8, trcacvr3
+        mrs x28, trcacvr4
+        mrs x3, trcacvr5
+        mrs x25, trcacvr6
+        mrs x24, trcacvr7
+        mrs x6, trcacvr8
+        mrs x3, trcacvr9
+        mrs x24, trcacvr10
+        mrs x3, trcacvr11
+        mrs x12, trcacvr12
+        mrs x9, trcacvr13
+        mrs x14, trcacvr14
+        mrs x3, trcacvr15
+        mrs x21, trcacatr0
+        mrs x26, trcacatr1
+        mrs x8, trcacatr2
+        mrs x22, trcacatr3
+        mrs x6, trcacatr4
+        mrs x29, trcacatr5
+        mrs x5, trcacatr6
+        mrs x18, trcacatr7
+        mrs x2, trcacatr8
+        mrs x19, trcacatr9
+        mrs x13, trcacatr10
+        mrs x25, trcacatr11
+        mrs x18, trcacatr12
+        mrs x29, trcacatr13
+        mrs x9, trcacatr14
+        mrs x18, trcacatr15
+        mrs x29, trcdvcvr0
+        mrs x15, trcdvcvr1
+        mrs x15, trcdvcvr2
+        mrs x15, trcdvcvr3
+        mrs x19, trcdvcvr4
+        mrs x22, trcdvcvr5
+        mrs x27, trcdvcvr6
+        mrs x1, trcdvcvr7
+        mrs x29, trcdvcmr0
+        mrs x9, trcdvcmr1
+        mrs x1, trcdvcmr2
+        mrs x2, trcdvcmr3
+        mrs x5, trcdvcmr4
+        mrs x21, trcdvcmr5
+        mrs x5, trcdvcmr6
+        mrs x1, trcdvcmr7
+        mrs x21, trccidcvr0
+        mrs x24, trccidcvr1
+        mrs x24, trccidcvr2
+        mrs x12, trccidcvr3
+        mrs x10, trccidcvr4
+        mrs x9, trccidcvr5
+        mrs x6, trccidcvr6
+        mrs x20, trccidcvr7
+        mrs x20, trcvmidcvr0
+        mrs x20, trcvmidcvr1
+        mrs x26, trcvmidcvr2
+        mrs x1, trcvmidcvr3
+        mrs x14, trcvmidcvr4
+        mrs x27, trcvmidcvr5
+        mrs x29, trcvmidcvr6
+        mrs x17, trcvmidcvr7
+        mrs x10, trccidcctlr0
+        mrs x4, trccidcctlr1
+        mrs x9, trcvmidcctlr0
+        mrs x11, trcvmidcctlr1
+        mrs x22, trcitctrl
+        mrs x23, trcclaimset
+        mrs x14, trcclaimclr
+// CHECK: mrs      x8, trcstatr               // encoding: [0x08,0x03,0x31,0xd5]
+// CHECK: mrs      x9, trcidr8                // encoding: [0xc9,0x00,0x31,0xd5]
+// CHECK: mrs      x11, trcidr9               // encoding: [0xcb,0x01,0x31,0xd5]
+// CHECK: mrs      x25, trcidr10              // encoding: [0xd9,0x02,0x31,0xd5]
+// CHECK: mrs      x7, trcidr11               // encoding: [0xc7,0x03,0x31,0xd5]
+// CHECK: mrs      x7, trcidr12               // encoding: [0xc7,0x04,0x31,0xd5]
+// CHECK: mrs      x6, trcidr13               // encoding: [0xc6,0x05,0x31,0xd5]
+// CHECK: mrs      x27, trcidr0               // encoding: [0xfb,0x08,0x31,0xd5]
+// CHECK: mrs      x29, trcidr1               // encoding: [0xfd,0x09,0x31,0xd5]
+// CHECK: mrs      x4, trcidr2                // encoding: [0xe4,0x0a,0x31,0xd5]
+// CHECK: mrs      x8, trcidr3                // encoding: [0xe8,0x0b,0x31,0xd5]
+// CHECK: mrs      x15, trcidr4               // encoding: [0xef,0x0c,0x31,0xd5]
+// CHECK: mrs      x20, trcidr5               // encoding: [0xf4,0x0d,0x31,0xd5]
+// CHECK: mrs      x6, trcidr6                // encoding: [0xe6,0x0e,0x31,0xd5]
+// CHECK: mrs      x6, trcidr7                // encoding: [0xe6,0x0f,0x31,0xd5]
+// CHECK: mrs      x24, trcoslsr              // encoding: [0x98,0x11,0x31,0xd5]
+// CHECK: mrs      x18, trcpdsr               // encoding: [0x92,0x15,0x31,0xd5]
+// CHECK: mrs      x28, trcdevaff0            // encoding: [0xdc,0x7a,0x31,0xd5]
+// CHECK: mrs      x5, trcdevaff1             // encoding: [0xc5,0x7b,0x31,0xd5]
+// CHECK: mrs      x5, trclsr                 // encoding: [0xc5,0x7d,0x31,0xd5]
+// CHECK: mrs      x11, trcauthstatus         // encoding: [0xcb,0x7e,0x31,0xd5]
+// CHECK: mrs      x13, trcdevarch            // encoding: [0xcd,0x7f,0x31,0xd5]
+// CHECK: mrs      x18, trcdevid              // encoding: [0xf2,0x72,0x31,0xd5]
+// CHECK: mrs      x22, trcdevtype            // encoding: [0xf6,0x73,0x31,0xd5]
+// CHECK: mrs      x14, trcpidr4              // encoding: [0xee,0x74,0x31,0xd5]
+// CHECK: mrs      x5, trcpidr5               // encoding: [0xe5,0x75,0x31,0xd5]
+// CHECK: mrs      x5, trcpidr6               // encoding: [0xe5,0x76,0x31,0xd5]
+// CHECK: mrs      x9, trcpidr7               // encoding: [0xe9,0x77,0x31,0xd5]
+// CHECK: mrs      x15, trcpidr0              // encoding: [0xef,0x78,0x31,0xd5]
+// CHECK: mrs      x6, trcpidr1               // encoding: [0xe6,0x79,0x31,0xd5]
+// CHECK: mrs      x11, trcpidr2              // encoding: [0xeb,0x7a,0x31,0xd5]
+// CHECK: mrs      x20, trcpidr3              // encoding: [0xf4,0x7b,0x31,0xd5]
+// CHECK: mrs      x17, trccidr0              // encoding: [0xf1,0x7c,0x31,0xd5]
+// CHECK: mrs      x2, trccidr1               // encoding: [0xe2,0x7d,0x31,0xd5]
+// CHECK: mrs      x20, trccidr2              // encoding: [0xf4,0x7e,0x31,0xd5]
+// CHECK: mrs      x4, trccidr3               // encoding: [0xe4,0x7f,0x31,0xd5]
+// CHECK: mrs      x11, trcprgctlr            // encoding: [0x0b,0x01,0x31,0xd5]
+// CHECK: mrs      x23, trcprocselr           // encoding: [0x17,0x02,0x31,0xd5]
+// CHECK: mrs      x13, trcconfigr            // encoding: [0x0d,0x04,0x31,0xd5]
+// CHECK: mrs      x23, trcauxctlr            // encoding: [0x17,0x06,0x31,0xd5]
+// CHECK: mrs      x9, trceventctl0r          // encoding: [0x09,0x08,0x31,0xd5]
+// CHECK: mrs      x16, trceventctl1r         // encoding: [0x10,0x09,0x31,0xd5]
+// CHECK: mrs      x4, trcstallctlr           // encoding: [0x04,0x0b,0x31,0xd5]
+// CHECK: mrs      x14, trctsctlr             // encoding: [0x0e,0x0c,0x31,0xd5]
+// CHECK: mrs      x24, trcsyncpr             // encoding: [0x18,0x0d,0x31,0xd5]
+// CHECK: mrs      x28, trcccctlr             // encoding: [0x1c,0x0e,0x31,0xd5]
+// CHECK: mrs      x15, trcbbctlr             // encoding: [0x0f,0x0f,0x31,0xd5]
+// CHECK: mrs      x1, trctraceidr            // encoding: [0x21,0x00,0x31,0xd5]
+// CHECK: mrs      x20, trcqctlr              // encoding: [0x34,0x01,0x31,0xd5]
+// CHECK: mrs      x2, trcvictlr              // encoding: [0x42,0x00,0x31,0xd5]
+// CHECK: mrs      x12, trcviiectlr           // encoding: [0x4c,0x01,0x31,0xd5]
+// CHECK: mrs      x16, trcvissctlr           // encoding: [0x50,0x02,0x31,0xd5]
+// CHECK: mrs      x8, trcvipcssctlr          // encoding: [0x48,0x03,0x31,0xd5]
+// CHECK: mrs      x27, trcvdctlr             // encoding: [0x5b,0x08,0x31,0xd5]
+// CHECK: mrs      x9, trcvdsacctlr           // encoding: [0x49,0x09,0x31,0xd5]
+// CHECK: mrs      x0, trcvdarcctlr           // encoding: [0x40,0x0a,0x31,0xd5]
+// CHECK: mrs      x13, trcseqevr0            // encoding: [0x8d,0x00,0x31,0xd5]
+// CHECK: mrs      x11, trcseqevr1            // encoding: [0x8b,0x01,0x31,0xd5]
+// CHECK: mrs      x26, trcseqevr2            // encoding: [0x9a,0x02,0x31,0xd5]
+// CHECK: mrs      x14, trcseqrstevr          // encoding: [0x8e,0x06,0x31,0xd5]
+// CHECK: mrs      x4, trcseqstr              // encoding: [0x84,0x07,0x31,0xd5]
+// CHECK: mrs      x17, trcextinselr          // encoding: [0x91,0x08,0x31,0xd5]
+// CHECK: mrs      x21, trccntrldvr0          // encoding: [0xb5,0x00,0x31,0xd5]
+// CHECK: mrs      x10, trccntrldvr1          // encoding: [0xaa,0x01,0x31,0xd5]
+// CHECK: mrs      x20, trccntrldvr2          // encoding: [0xb4,0x02,0x31,0xd5]
+// CHECK: mrs      x5, trccntrldvr3           // encoding: [0xa5,0x03,0x31,0xd5]
+// CHECK: mrs      x17, trccntctlr0           // encoding: [0xb1,0x04,0x31,0xd5]
+// CHECK: mrs      x1, trccntctlr1            // encoding: [0xa1,0x05,0x31,0xd5]
+// CHECK: mrs      x17, trccntctlr2           // encoding: [0xb1,0x06,0x31,0xd5]
+// CHECK: mrs      x6, trccntctlr3            // encoding: [0xa6,0x07,0x31,0xd5]
+// CHECK: mrs      x28, trccntvr0             // encoding: [0xbc,0x08,0x31,0xd5]
+// CHECK: mrs      x23, trccntvr1             // encoding: [0xb7,0x09,0x31,0xd5]
+// CHECK: mrs      x9, trccntvr2              // encoding: [0xa9,0x0a,0x31,0xd5]
+// CHECK: mrs      x6, trccntvr3              // encoding: [0xa6,0x0b,0x31,0xd5]
+// CHECK: mrs      x24, trcimspec0            // encoding: [0xf8,0x00,0x31,0xd5]
+// CHECK: mrs      x24, trcimspec1            // encoding: [0xf8,0x01,0x31,0xd5]
+// CHECK: mrs      x15, trcimspec2            // encoding: [0xef,0x02,0x31,0xd5]
+// CHECK: mrs      x10, trcimspec3            // encoding: [0xea,0x03,0x31,0xd5]
+// CHECK: mrs      x29, trcimspec4            // encoding: [0xfd,0x04,0x31,0xd5]
+// CHECK: mrs      x18, trcimspec5            // encoding: [0xf2,0x05,0x31,0xd5]
+// CHECK: mrs      x29, trcimspec6            // encoding: [0xfd,0x06,0x31,0xd5]
+// CHECK: mrs      x2, trcimspec7             // encoding: [0xe2,0x07,0x31,0xd5]
+// CHECK: mrs      x8, trcrsctlr2             // encoding: [0x08,0x12,0x31,0xd5]
+// CHECK: mrs      x0, trcrsctlr3             // encoding: [0x00,0x13,0x31,0xd5]
+// CHECK: mrs      x12, trcrsctlr4            // encoding: [0x0c,0x14,0x31,0xd5]
+// CHECK: mrs      x26, trcrsctlr5            // encoding: [0x1a,0x15,0x31,0xd5]
+// CHECK: mrs      x29, trcrsctlr6            // encoding: [0x1d,0x16,0x31,0xd5]
+// CHECK: mrs      x17, trcrsctlr7            // encoding: [0x11,0x17,0x31,0xd5]
+// CHECK: mrs      x0, trcrsctlr8             // encoding: [0x00,0x18,0x31,0xd5]
+// CHECK: mrs      x1, trcrsctlr9             // encoding: [0x01,0x19,0x31,0xd5]
+// CHECK: mrs      x17, trcrsctlr10           // encoding: [0x11,0x1a,0x31,0xd5]
+// CHECK: mrs      x21, trcrsctlr11           // encoding: [0x15,0x1b,0x31,0xd5]
+// CHECK: mrs      x1, trcrsctlr12            // encoding: [0x01,0x1c,0x31,0xd5]
+// CHECK: mrs      x8, trcrsctlr13            // encoding: [0x08,0x1d,0x31,0xd5]
+// CHECK: mrs      x24, trcrsctlr14           // encoding: [0x18,0x1e,0x31,0xd5]
+// CHECK: mrs      x0, trcrsctlr15            // encoding: [0x00,0x1f,0x31,0xd5]
+// CHECK: mrs      x2, trcrsctlr16            // encoding: [0x22,0x10,0x31,0xd5]
+// CHECK: mrs      x29, trcrsctlr17           // encoding: [0x3d,0x11,0x31,0xd5]
+// CHECK: mrs      x22, trcrsctlr18           // encoding: [0x36,0x12,0x31,0xd5]
+// CHECK: mrs      x6, trcrsctlr19            // encoding: [0x26,0x13,0x31,0xd5]
+// CHECK: mrs      x26, trcrsctlr20           // encoding: [0x3a,0x14,0x31,0xd5]
+// CHECK: mrs      x26, trcrsctlr21           // encoding: [0x3a,0x15,0x31,0xd5]
+// CHECK: mrs      x4, trcrsctlr22            // encoding: [0x24,0x16,0x31,0xd5]
+// CHECK: mrs      x12, trcrsctlr23           // encoding: [0x2c,0x17,0x31,0xd5]
+// CHECK: mrs      x1, trcrsctlr24            // encoding: [0x21,0x18,0x31,0xd5]
+// CHECK: mrs      x0, trcrsctlr25            // encoding: [0x20,0x19,0x31,0xd5]
+// CHECK: mrs      x17, trcrsctlr26           // encoding: [0x31,0x1a,0x31,0xd5]
+// CHECK: mrs      x8, trcrsctlr27            // encoding: [0x28,0x1b,0x31,0xd5]
+// CHECK: mrs      x10, trcrsctlr28           // encoding: [0x2a,0x1c,0x31,0xd5]
+// CHECK: mrs      x25, trcrsctlr29           // encoding: [0x39,0x1d,0x31,0xd5]
+// CHECK: mrs      x12, trcrsctlr30           // encoding: [0x2c,0x1e,0x31,0xd5]
+// CHECK: mrs      x11, trcrsctlr31           // encoding: [0x2b,0x1f,0x31,0xd5]
+// CHECK: mrs      x18, trcssccr0             // encoding: [0x52,0x10,0x31,0xd5]
+// CHECK: mrs      x12, trcssccr1             // encoding: [0x4c,0x11,0x31,0xd5]
+// CHECK: mrs      x3, trcssccr2              // encoding: [0x43,0x12,0x31,0xd5]
+// CHECK: mrs      x2, trcssccr3              // encoding: [0x42,0x13,0x31,0xd5]
+// CHECK: mrs      x21, trcssccr4             // encoding: [0x55,0x14,0x31,0xd5]
+// CHECK: mrs      x10, trcssccr5             // encoding: [0x4a,0x15,0x31,0xd5]
+// CHECK: mrs      x22, trcssccr6             // encoding: [0x56,0x16,0x31,0xd5]
+// CHECK: mrs      x23, trcssccr7             // encoding: [0x57,0x17,0x31,0xd5]
+// CHECK: mrs      x23, trcsscsr0             // encoding: [0x57,0x18,0x31,0xd5]
+// CHECK: mrs      x19, trcsscsr1             // encoding: [0x53,0x19,0x31,0xd5]
+// CHECK: mrs      x25, trcsscsr2             // encoding: [0x59,0x1a,0x31,0xd5]
+// CHECK: mrs      x17, trcsscsr3             // encoding: [0x51,0x1b,0x31,0xd5]
+// CHECK: mrs      x19, trcsscsr4             // encoding: [0x53,0x1c,0x31,0xd5]
+// CHECK: mrs      x11, trcsscsr5             // encoding: [0x4b,0x1d,0x31,0xd5]
+// CHECK: mrs      x5, trcsscsr6              // encoding: [0x45,0x1e,0x31,0xd5]
+// CHECK: mrs      x9, trcsscsr7              // encoding: [0x49,0x1f,0x31,0xd5]
+// CHECK: mrs      x1, trcsspcicr0            // encoding: [0x61,0x10,0x31,0xd5]
+// CHECK: mrs      x12, trcsspcicr1           // encoding: [0x6c,0x11,0x31,0xd5]
+// CHECK: mrs      x21, trcsspcicr2           // encoding: [0x75,0x12,0x31,0xd5]
+// CHECK: mrs      x11, trcsspcicr3           // encoding: [0x6b,0x13,0x31,0xd5]
+// CHECK: mrs      x3, trcsspcicr4            // encoding: [0x63,0x14,0x31,0xd5]
+// CHECK: mrs      x9, trcsspcicr5            // encoding: [0x69,0x15,0x31,0xd5]
+// CHECK: mrs      x5, trcsspcicr6            // encoding: [0x65,0x16,0x31,0xd5]
+// CHECK: mrs      x2, trcsspcicr7            // encoding: [0x62,0x17,0x31,0xd5]
+// CHECK: mrs      x26, trcpdcr               // encoding: [0x9a,0x14,0x31,0xd5]
+// CHECK: mrs      x8, trcacvr0               // encoding: [0x08,0x20,0x31,0xd5]
+// CHECK: mrs      x15, trcacvr1              // encoding: [0x0f,0x22,0x31,0xd5]
+// CHECK: mrs      x19, trcacvr2              // encoding: [0x13,0x24,0x31,0xd5]
+// CHECK: mrs      x8, trcacvr3               // encoding: [0x08,0x26,0x31,0xd5]
+// CHECK: mrs      x28, trcacvr4              // encoding: [0x1c,0x28,0x31,0xd5]
+// CHECK: mrs      x3, trcacvr5               // encoding: [0x03,0x2a,0x31,0xd5]
+// CHECK: mrs      x25, trcacvr6              // encoding: [0x19,0x2c,0x31,0xd5]
+// CHECK: mrs      x24, trcacvr7              // encoding: [0x18,0x2e,0x31,0xd5]
+// CHECK: mrs      x6, trcacvr8               // encoding: [0x26,0x20,0x31,0xd5]
+// CHECK: mrs      x3, trcacvr9               // encoding: [0x23,0x22,0x31,0xd5]
+// CHECK: mrs      x24, trcacvr10             // encoding: [0x38,0x24,0x31,0xd5]
+// CHECK: mrs      x3, trcacvr11              // encoding: [0x23,0x26,0x31,0xd5]
+// CHECK: mrs      x12, trcacvr12             // encoding: [0x2c,0x28,0x31,0xd5]
+// CHECK: mrs      x9, trcacvr13              // encoding: [0x29,0x2a,0x31,0xd5]
+// CHECK: mrs      x14, trcacvr14             // encoding: [0x2e,0x2c,0x31,0xd5]
+// CHECK: mrs      x3, trcacvr15              // encoding: [0x23,0x2e,0x31,0xd5]
+// CHECK: mrs      x21, trcacatr0             // encoding: [0x55,0x20,0x31,0xd5]
+// CHECK: mrs      x26, trcacatr1             // encoding: [0x5a,0x22,0x31,0xd5]
+// CHECK: mrs      x8, trcacatr2              // encoding: [0x48,0x24,0x31,0xd5]
+// CHECK: mrs      x22, trcacatr3             // encoding: [0x56,0x26,0x31,0xd5]
+// CHECK: mrs      x6, trcacatr4              // encoding: [0x46,0x28,0x31,0xd5]
+// CHECK: mrs      x29, trcacatr5             // encoding: [0x5d,0x2a,0x31,0xd5]
+// CHECK: mrs      x5, trcacatr6              // encoding: [0x45,0x2c,0x31,0xd5]
+// CHECK: mrs      x18, trcacatr7             // encoding: [0x52,0x2e,0x31,0xd5]
+// CHECK: mrs      x2, trcacatr8              // encoding: [0x62,0x20,0x31,0xd5]
+// CHECK: mrs      x19, trcacatr9             // encoding: [0x73,0x22,0x31,0xd5]
+// CHECK: mrs      x13, trcacatr10            // encoding: [0x6d,0x24,0x31,0xd5]
+// CHECK: mrs      x25, trcacatr11            // encoding: [0x79,0x26,0x31,0xd5]
+// CHECK: mrs      x18, trcacatr12            // encoding: [0x72,0x28,0x31,0xd5]
+// CHECK: mrs      x29, trcacatr13            // encoding: [0x7d,0x2a,0x31,0xd5]
+// CHECK: mrs      x9, trcacatr14             // encoding: [0x69,0x2c,0x31,0xd5]
+// CHECK: mrs      x18, trcacatr15            // encoding: [0x72,0x2e,0x31,0xd5]
+// CHECK: mrs      x29, trcdvcvr0             // encoding: [0x9d,0x20,0x31,0xd5]
+// CHECK: mrs      x15, trcdvcvr1             // encoding: [0x8f,0x24,0x31,0xd5]
+// CHECK: mrs      x15, trcdvcvr2             // encoding: [0x8f,0x28,0x31,0xd5]
+// CHECK: mrs      x15, trcdvcvr3             // encoding: [0x8f,0x2c,0x31,0xd5]
+// CHECK: mrs      x19, trcdvcvr4             // encoding: [0xb3,0x20,0x31,0xd5]
+// CHECK: mrs      x22, trcdvcvr5             // encoding: [0xb6,0x24,0x31,0xd5]
+// CHECK: mrs      x27, trcdvcvr6             // encoding: [0xbb,0x28,0x31,0xd5]
+// CHECK: mrs      x1, trcdvcvr7              // encoding: [0xa1,0x2c,0x31,0xd5]
+// CHECK: mrs      x29, trcdvcmr0             // encoding: [0xdd,0x20,0x31,0xd5]
+// CHECK: mrs      x9, trcdvcmr1              // encoding: [0xc9,0x24,0x31,0xd5]
+// CHECK: mrs      x1, trcdvcmr2              // encoding: [0xc1,0x28,0x31,0xd5]
+// CHECK: mrs      x2, trcdvcmr3              // encoding: [0xc2,0x2c,0x31,0xd5]
+// CHECK: mrs      x5, trcdvcmr4              // encoding: [0xe5,0x20,0x31,0xd5]
+// CHECK: mrs      x21, trcdvcmr5             // encoding: [0xf5,0x24,0x31,0xd5]
+// CHECK: mrs      x5, trcdvcmr6              // encoding: [0xe5,0x28,0x31,0xd5]
+// CHECK: mrs      x1, trcdvcmr7              // encoding: [0xe1,0x2c,0x31,0xd5]
+// CHECK: mrs      x21, trccidcvr0            // encoding: [0x15,0x30,0x31,0xd5]
+// CHECK: mrs      x24, trccidcvr1            // encoding: [0x18,0x32,0x31,0xd5]
+// CHECK: mrs      x24, trccidcvr2            // encoding: [0x18,0x34,0x31,0xd5]
+// CHECK: mrs      x12, trccidcvr3            // encoding: [0x0c,0x36,0x31,0xd5]
+// CHECK: mrs      x10, trccidcvr4            // encoding: [0x0a,0x38,0x31,0xd5]
+// CHECK: mrs      x9, trccidcvr5             // encoding: [0x09,0x3a,0x31,0xd5]
+// CHECK: mrs      x6, trccidcvr6             // encoding: [0x06,0x3c,0x31,0xd5]
+// CHECK: mrs      x20, trccidcvr7            // encoding: [0x14,0x3e,0x31,0xd5]
+// CHECK: mrs      x20, trcvmidcvr0           // encoding: [0x34,0x30,0x31,0xd5]
+// CHECK: mrs      x20, trcvmidcvr1           // encoding: [0x34,0x32,0x31,0xd5]
+// CHECK: mrs      x26, trcvmidcvr2           // encoding: [0x3a,0x34,0x31,0xd5]
+// CHECK: mrs      x1, trcvmidcvr3            // encoding: [0x21,0x36,0x31,0xd5]
+// CHECK: mrs      x14, trcvmidcvr4           // encoding: [0x2e,0x38,0x31,0xd5]
+// CHECK: mrs      x27, trcvmidcvr5           // encoding: [0x3b,0x3a,0x31,0xd5]
+// CHECK: mrs      x29, trcvmidcvr6           // encoding: [0x3d,0x3c,0x31,0xd5]
+// CHECK: mrs      x17, trcvmidcvr7           // encoding: [0x31,0x3e,0x31,0xd5]
+// CHECK: mrs      x10, trccidcctlr0          // encoding: [0x4a,0x30,0x31,0xd5]
+// CHECK: mrs      x4, trccidcctlr1           // encoding: [0x44,0x31,0x31,0xd5]
+// CHECK: mrs      x9, trcvmidcctlr0          // encoding: [0x49,0x32,0x31,0xd5]
+// CHECK: mrs      x11, trcvmidcctlr1         // encoding: [0x4b,0x33,0x31,0xd5]
+// CHECK: mrs      x22, trcitctrl             // encoding: [0x96,0x70,0x31,0xd5]
+// CHECK: mrs      x23, trcclaimset           // encoding: [0xd7,0x78,0x31,0xd5]
+// CHECK: mrs      x14, trcclaimclr           // encoding: [0xce,0x79,0x31,0xd5]
+
+        msr trcoslar, x28
+        msr trclar, x14
+        msr trcprgctlr, x10
+        msr trcprocselr, x27
+        msr trcconfigr, x24
+        msr trcauxctlr, x8
+        msr trceventctl0r, x16
+        msr trceventctl1r, x27
+        msr trcstallctlr, x26
+        msr trctsctlr, x0
+        msr trcsyncpr, x14
+        msr trcccctlr, x8
+        msr trcbbctlr, x6
+        msr trctraceidr, x23
+        msr trcqctlr, x5
+        msr trcvictlr, x0
+        msr trcviiectlr, x0
+        msr trcvissctlr, x1
+        msr trcvipcssctlr, x0
+        msr trcvdctlr, x7
+        msr trcvdsacctlr, x18
+        msr trcvdarcctlr, x24
+        msr trcseqevr0, x28
+        msr trcseqevr1, x21
+        msr trcseqevr2, x16
+        msr trcseqrstevr, x16
+        msr trcseqstr, x25
+        msr trcextinselr, x29
+        msr trccntrldvr0, x20
+        msr trccntrldvr1, x20
+        msr trccntrldvr2, x22
+        msr trccntrldvr3, x12
+        msr trccntctlr0, x20
+        msr trccntctlr1, x4
+        msr trccntctlr2, x8
+        msr trccntctlr3, x16
+        msr trccntvr0, x5
+        msr trccntvr1, x27
+        msr trccntvr2, x21
+        msr trccntvr3, x8
+        msr trcimspec0, x6
+        msr trcimspec1, x27
+        msr trcimspec2, x23
+        msr trcimspec3, x15
+        msr trcimspec4, x13
+        msr trcimspec5, x25
+        msr trcimspec6, x19
+        msr trcimspec7, x27
+        msr trcrsctlr2, x4
+        msr trcrsctlr3, x0
+        msr trcrsctlr4, x21
+        msr trcrsctlr5, x8
+        msr trcrsctlr6, x20
+        msr trcrsctlr7, x11
+        msr trcrsctlr8, x18
+        msr trcrsctlr9, x24
+        msr trcrsctlr10, x15
+        msr trcrsctlr11, x21
+        msr trcrsctlr12, x4
+        msr trcrsctlr13, x28
+        msr trcrsctlr14, x3
+        msr trcrsctlr15, x20
+        msr trcrsctlr16, x12
+        msr trcrsctlr17, x17
+        msr trcrsctlr18, x10
+        msr trcrsctlr19, x11
+        msr trcrsctlr20, x3
+        msr trcrsctlr21, x18
+        msr trcrsctlr22, x26
+        msr trcrsctlr23, x5
+        msr trcrsctlr24, x25
+        msr trcrsctlr25, x5
+        msr trcrsctlr26, x4
+        msr trcrsctlr27, x20
+        msr trcrsctlr28, x5
+        msr trcrsctlr29, x10
+        msr trcrsctlr30, x24
+        msr trcrsctlr31, x20
+        msr trcssccr0, x23
+        msr trcssccr1, x27
+        msr trcssccr2, x27
+        msr trcssccr3, x6
+        msr trcssccr4, x3
+        msr trcssccr5, x12
+        msr trcssccr6, x7
+        msr trcssccr7, x6
+        msr trcsscsr0, x20
+        msr trcsscsr1, x17
+        msr trcsscsr2, x11
+        msr trcsscsr3, x4
+        msr trcsscsr4, x14
+        msr trcsscsr5, x22
+        msr trcsscsr6, x3
+        msr trcsscsr7, x11
+        msr trcsspcicr0, x2
+        msr trcsspcicr1, x3
+        msr trcsspcicr2, x5
+        msr trcsspcicr3, x7
+        msr trcsspcicr4, x11
+        msr trcsspcicr5, x13
+        msr trcsspcicr6, x17
+        msr trcsspcicr7, x23
+        msr trcpdcr, x3
+        msr trcacvr0, x6
+        msr trcacvr1, x20
+        msr trcacvr2, x25
+        msr trcacvr3, x1
+        msr trcacvr4, x28
+        msr trcacvr5, x15
+        msr trcacvr6, x25
+        msr trcacvr7, x12
+        msr trcacvr8, x5
+        msr trcacvr9, x25
+        msr trcacvr10, x13
+        msr trcacvr11, x10
+        msr trcacvr12, x19
+        msr trcacvr13, x10
+        msr trcacvr14, x19
+        msr trcacvr15, x2
+        msr trcacatr0, x15
+        msr trcacatr1, x13
+        msr trcacatr2, x8
+        msr trcacatr3, x1
+        msr trcacatr4, x11
+        msr trcacatr5, x8
+        msr trcacatr6, x24
+        msr trcacatr7, x6
+        msr trcacatr8, x23
+        msr trcacatr9, x5
+        msr trcacatr10, x11
+        msr trcacatr11, x11
+        msr trcacatr12, x3
+        msr trcacatr13, x28
+        msr trcacatr14, x25
+        msr trcacatr15, x4
+        msr trcdvcvr0, x6
+        msr trcdvcvr1, x3
+        msr trcdvcvr2, x5
+        msr trcdvcvr3, x11
+        msr trcdvcvr4, x9
+        msr trcdvcvr5, x14
+        msr trcdvcvr6, x10
+        msr trcdvcvr7, x12
+        msr trcdvcmr0, x8
+        msr trcdvcmr1, x8
+        msr trcdvcmr2, x22
+        msr trcdvcmr3, x22
+        msr trcdvcmr4, x5
+        msr trcdvcmr5, x16
+        msr trcdvcmr6, x27
+        msr trcdvcmr7, x21
+        msr trccidcvr0, x8
+        msr trccidcvr1, x6
+        msr trccidcvr2, x9
+        msr trccidcvr3, x8
+        msr trccidcvr4, x3
+        msr trccidcvr5, x21
+        msr trccidcvr6, x12
+        msr trccidcvr7, x7
+        msr trcvmidcvr0, x4
+        msr trcvmidcvr1, x3
+        msr trcvmidcvr2, x9
+        msr trcvmidcvr3, x17
+        msr trcvmidcvr4, x14
+        msr trcvmidcvr5, x12
+        msr trcvmidcvr6, x10
+        msr trcvmidcvr7, x3
+        msr trccidcctlr0, x14
+        msr trccidcctlr1, x22
+        msr trcvmidcctlr0, x8
+        msr trcvmidcctlr1, x15
+        msr trcitctrl, x1
+        msr trcclaimset, x7
+        msr trcclaimclr, x29
+// CHECK: msr      trcoslar, x28              // encoding: [0x9c,0x10,0x11,0xd5]
+// CHECK: msr      trclar, x14                // encoding: [0xce,0x7c,0x11,0xd5]
+// CHECK: msr      trcprgctlr, x10            // encoding: [0x0a,0x01,0x11,0xd5]
+// CHECK: msr      trcprocselr, x27           // encoding: [0x1b,0x02,0x11,0xd5]
+// CHECK: msr      trcconfigr, x24            // encoding: [0x18,0x04,0x11,0xd5]
+// CHECK: msr      trcauxctlr, x8             // encoding: [0x08,0x06,0x11,0xd5]
+// CHECK: msr      trceventctl0r, x16         // encoding: [0x10,0x08,0x11,0xd5]
+// CHECK: msr      trceventctl1r, x27         // encoding: [0x1b,0x09,0x11,0xd5]
+// CHECK: msr      trcstallctlr, x26          // encoding: [0x1a,0x0b,0x11,0xd5]
+// CHECK: msr      trctsctlr, x0              // encoding: [0x00,0x0c,0x11,0xd5]
+// CHECK: msr      trcsyncpr, x14             // encoding: [0x0e,0x0d,0x11,0xd5]
+// CHECK: msr      trcccctlr, x8              // encoding: [0x08,0x0e,0x11,0xd5]
+// CHECK: msr      trcbbctlr, x6              // encoding: [0x06,0x0f,0x11,0xd5]
+// CHECK: msr      trctraceidr, x23           // encoding: [0x37,0x00,0x11,0xd5]
+// CHECK: msr      trcqctlr, x5               // encoding: [0x25,0x01,0x11,0xd5]
+// CHECK: msr      trcvictlr, x0              // encoding: [0x40,0x00,0x11,0xd5]
+// CHECK: msr      trcviiectlr, x0            // encoding: [0x40,0x01,0x11,0xd5]
+// CHECK: msr      trcvissctlr, x1            // encoding: [0x41,0x02,0x11,0xd5]
+// CHECK: msr      trcvipcssctlr, x0          // encoding: [0x40,0x03,0x11,0xd5]
+// CHECK: msr      trcvdctlr, x7              // encoding: [0x47,0x08,0x11,0xd5]
+// CHECK: msr      trcvdsacctlr, x18          // encoding: [0x52,0x09,0x11,0xd5]
+// CHECK: msr      trcvdarcctlr, x24          // encoding: [0x58,0x0a,0x11,0xd5]
+// CHECK: msr      trcseqevr0, x28            // encoding: [0x9c,0x00,0x11,0xd5]
+// CHECK: msr      trcseqevr1, x21            // encoding: [0x95,0x01,0x11,0xd5]
+// CHECK: msr      trcseqevr2, x16            // encoding: [0x90,0x02,0x11,0xd5]
+// CHECK: msr      trcseqrstevr, x16          // encoding: [0x90,0x06,0x11,0xd5]
+// CHECK: msr      trcseqstr, x25             // encoding: [0x99,0x07,0x11,0xd5]
+// CHECK: msr      trcextinselr, x29          // encoding: [0x9d,0x08,0x11,0xd5]
+// CHECK: msr      trccntrldvr0, x20          // encoding: [0xb4,0x00,0x11,0xd5]
+// CHECK: msr      trccntrldvr1, x20          // encoding: [0xb4,0x01,0x11,0xd5]
+// CHECK: msr      trccntrldvr2, x22          // encoding: [0xb6,0x02,0x11,0xd5]
+// CHECK: msr      trccntrldvr3, x12          // encoding: [0xac,0x03,0x11,0xd5]
+// CHECK: msr      trccntctlr0, x20           // encoding: [0xb4,0x04,0x11,0xd5]
+// CHECK: msr      trccntctlr1, x4            // encoding: [0xa4,0x05,0x11,0xd5]
+// CHECK: msr      trccntctlr2, x8            // encoding: [0xa8,0x06,0x11,0xd5]
+// CHECK: msr      trccntctlr3, x16           // encoding: [0xb0,0x07,0x11,0xd5]
+// CHECK: msr      trccntvr0, x5              // encoding: [0xa5,0x08,0x11,0xd5]
+// CHECK: msr      trccntvr1, x27             // encoding: [0xbb,0x09,0x11,0xd5]
+// CHECK: msr      trccntvr2, x21             // encoding: [0xb5,0x0a,0x11,0xd5]
+// CHECK: msr      trccntvr3, x8              // encoding: [0xa8,0x0b,0x11,0xd5]
+// CHECK: msr      trcimspec0, x6             // encoding: [0xe6,0x00,0x11,0xd5]
+// CHECK: msr      trcimspec1, x27            // encoding: [0xfb,0x01,0x11,0xd5]
+// CHECK: msr      trcimspec2, x23            // encoding: [0xf7,0x02,0x11,0xd5]
+// CHECK: msr      trcimspec3, x15            // encoding: [0xef,0x03,0x11,0xd5]
+// CHECK: msr      trcimspec4, x13            // encoding: [0xed,0x04,0x11,0xd5]
+// CHECK: msr      trcimspec5, x25            // encoding: [0xf9,0x05,0x11,0xd5]
+// CHECK: msr      trcimspec6, x19            // encoding: [0xf3,0x06,0x11,0xd5]
+// CHECK: msr      trcimspec7, x27            // encoding: [0xfb,0x07,0x11,0xd5]
+// CHECK: msr      trcrsctlr2, x4             // encoding: [0x04,0x12,0x11,0xd5]
+// CHECK: msr      trcrsctlr3, x0             // encoding: [0x00,0x13,0x11,0xd5]
+// CHECK: msr      trcrsctlr4, x21            // encoding: [0x15,0x14,0x11,0xd5]
+// CHECK: msr      trcrsctlr5, x8             // encoding: [0x08,0x15,0x11,0xd5]
+// CHECK: msr      trcrsctlr6, x20            // encoding: [0x14,0x16,0x11,0xd5]
+// CHECK: msr      trcrsctlr7, x11            // encoding: [0x0b,0x17,0x11,0xd5]
+// CHECK: msr      trcrsctlr8, x18            // encoding: [0x12,0x18,0x11,0xd5]
+// CHECK: msr      trcrsctlr9, x24            // encoding: [0x18,0x19,0x11,0xd5]
+// CHECK: msr      trcrsctlr10, x15           // encoding: [0x0f,0x1a,0x11,0xd5]
+// CHECK: msr      trcrsctlr11, x21           // encoding: [0x15,0x1b,0x11,0xd5]
+// CHECK: msr      trcrsctlr12, x4            // encoding: [0x04,0x1c,0x11,0xd5]
+// CHECK: msr      trcrsctlr13, x28           // encoding: [0x1c,0x1d,0x11,0xd5]
+// CHECK: msr      trcrsctlr14, x3            // encoding: [0x03,0x1e,0x11,0xd5]
+// CHECK: msr      trcrsctlr15, x20           // encoding: [0x14,0x1f,0x11,0xd5]
+// CHECK: msr      trcrsctlr16, x12           // encoding: [0x2c,0x10,0x11,0xd5]
+// CHECK: msr      trcrsctlr17, x17           // encoding: [0x31,0x11,0x11,0xd5]
+// CHECK: msr      trcrsctlr18, x10           // encoding: [0x2a,0x12,0x11,0xd5]
+// CHECK: msr      trcrsctlr19, x11           // encoding: [0x2b,0x13,0x11,0xd5]
+// CHECK: msr      trcrsctlr20, x3            // encoding: [0x23,0x14,0x11,0xd5]
+// CHECK: msr      trcrsctlr21, x18           // encoding: [0x32,0x15,0x11,0xd5]
+// CHECK: msr      trcrsctlr22, x26           // encoding: [0x3a,0x16,0x11,0xd5]
+// CHECK: msr      trcrsctlr23, x5            // encoding: [0x25,0x17,0x11,0xd5]
+// CHECK: msr      trcrsctlr24, x25           // encoding: [0x39,0x18,0x11,0xd5]
+// CHECK: msr      trcrsctlr25, x5            // encoding: [0x25,0x19,0x11,0xd5]
+// CHECK: msr      trcrsctlr26, x4            // encoding: [0x24,0x1a,0x11,0xd5]
+// CHECK: msr      trcrsctlr27, x20           // encoding: [0x34,0x1b,0x11,0xd5]
+// CHECK: msr      trcrsctlr28, x5            // encoding: [0x25,0x1c,0x11,0xd5]
+// CHECK: msr      trcrsctlr29, x10           // encoding: [0x2a,0x1d,0x11,0xd5]
+// CHECK: msr      trcrsctlr30, x24           // encoding: [0x38,0x1e,0x11,0xd5]
+// CHECK: msr      trcrsctlr31, x20           // encoding: [0x34,0x1f,0x11,0xd5]
+// CHECK: msr      trcssccr0, x23             // encoding: [0x57,0x10,0x11,0xd5]
+// CHECK: msr      trcssccr1, x27             // encoding: [0x5b,0x11,0x11,0xd5]
+// CHECK: msr      trcssccr2, x27             // encoding: [0x5b,0x12,0x11,0xd5]
+// CHECK: msr      trcssccr3, x6              // encoding: [0x46,0x13,0x11,0xd5]
+// CHECK: msr      trcssccr4, x3              // encoding: [0x43,0x14,0x11,0xd5]
+// CHECK: msr      trcssccr5, x12             // encoding: [0x4c,0x15,0x11,0xd5]
+// CHECK: msr      trcssccr6, x7              // encoding: [0x47,0x16,0x11,0xd5]
+// CHECK: msr      trcssccr7, x6              // encoding: [0x46,0x17,0x11,0xd5]
+// CHECK: msr      trcsscsr0, x20             // encoding: [0x54,0x18,0x11,0xd5]
+// CHECK: msr      trcsscsr1, x17             // encoding: [0x51,0x19,0x11,0xd5]
+// CHECK: msr      trcsscsr2, x11             // encoding: [0x4b,0x1a,0x11,0xd5]
+// CHECK: msr      trcsscsr3, x4              // encoding: [0x44,0x1b,0x11,0xd5]
+// CHECK: msr      trcsscsr4, x14             // encoding: [0x4e,0x1c,0x11,0xd5]
+// CHECK: msr      trcsscsr5, x22             // encoding: [0x56,0x1d,0x11,0xd5]
+// CHECK: msr      trcsscsr6, x3              // encoding: [0x43,0x1e,0x11,0xd5]
+// CHECK: msr      trcsscsr7, x11             // encoding: [0x4b,0x1f,0x11,0xd5]
+// CHECK: msr      trcsspcicr0, x2            // encoding: [0x62,0x10,0x11,0xd5]
+// CHECK: msr      trcsspcicr1, x3            // encoding: [0x63,0x11,0x11,0xd5]
+// CHECK: msr      trcsspcicr2, x5            // encoding: [0x65,0x12,0x11,0xd5]
+// CHECK: msr      trcsspcicr3, x7            // encoding: [0x67,0x13,0x11,0xd5]
+// CHECK: msr      trcsspcicr4, x11           // encoding: [0x6b,0x14,0x11,0xd5]
+// CHECK: msr      trcsspcicr5, x13           // encoding: [0x6d,0x15,0x11,0xd5]
+// CHECK: msr      trcsspcicr6, x17           // encoding: [0x71,0x16,0x11,0xd5]
+// CHECK: msr      trcsspcicr7, x23           // encoding: [0x77,0x17,0x11,0xd5]
+// CHECK: msr      trcpdcr, x3                // encoding: [0x83,0x14,0x11,0xd5]
+// CHECK: msr      trcacvr0, x6               // encoding: [0x06,0x20,0x11,0xd5]
+// CHECK: msr      trcacvr1, x20              // encoding: [0x14,0x22,0x11,0xd5]
+// CHECK: msr      trcacvr2, x25              // encoding: [0x19,0x24,0x11,0xd5]
+// CHECK: msr      trcacvr3, x1               // encoding: [0x01,0x26,0x11,0xd5]
+// CHECK: msr      trcacvr4, x28              // encoding: [0x1c,0x28,0x11,0xd5]
+// CHECK: msr      trcacvr5, x15              // encoding: [0x0f,0x2a,0x11,0xd5]
+// CHECK: msr      trcacvr6, x25              // encoding: [0x19,0x2c,0x11,0xd5]
+// CHECK: msr      trcacvr7, x12              // encoding: [0x0c,0x2e,0x11,0xd5]
+// CHECK: msr      trcacvr8, x5               // encoding: [0x25,0x20,0x11,0xd5]
+// CHECK: msr      trcacvr9, x25              // encoding: [0x39,0x22,0x11,0xd5]
+// CHECK: msr      trcacvr10, x13             // encoding: [0x2d,0x24,0x11,0xd5]
+// CHECK: msr      trcacvr11, x10             // encoding: [0x2a,0x26,0x11,0xd5]
+// CHECK: msr      trcacvr12, x19             // encoding: [0x33,0x28,0x11,0xd5]
+// CHECK: msr      trcacvr13, x10             // encoding: [0x2a,0x2a,0x11,0xd5]
+// CHECK: msr      trcacvr14, x19             // encoding: [0x33,0x2c,0x11,0xd5]
+// CHECK: msr      trcacvr15, x2              // encoding: [0x22,0x2e,0x11,0xd5]
+// CHECK: msr      trcacatr0, x15             // encoding: [0x4f,0x20,0x11,0xd5]
+// CHECK: msr      trcacatr1, x13             // encoding: [0x4d,0x22,0x11,0xd5]
+// CHECK: msr      trcacatr2, x8              // encoding: [0x48,0x24,0x11,0xd5]
+// CHECK: msr      trcacatr3, x1              // encoding: [0x41,0x26,0x11,0xd5]
+// CHECK: msr      trcacatr4, x11             // encoding: [0x4b,0x28,0x11,0xd5]
+// CHECK: msr      trcacatr5, x8              // encoding: [0x48,0x2a,0x11,0xd5]
+// CHECK: msr      trcacatr6, x24             // encoding: [0x58,0x2c,0x11,0xd5]
+// CHECK: msr      trcacatr7, x6              // encoding: [0x46,0x2e,0x11,0xd5]
+// CHECK: msr      trcacatr8, x23             // encoding: [0x77,0x20,0x11,0xd5]
+// CHECK: msr      trcacatr9, x5              // encoding: [0x65,0x22,0x11,0xd5]
+// CHECK: msr      trcacatr10, x11            // encoding: [0x6b,0x24,0x11,0xd5]
+// CHECK: msr      trcacatr11, x11            // encoding: [0x6b,0x26,0x11,0xd5]
+// CHECK: msr      trcacatr12, x3             // encoding: [0x63,0x28,0x11,0xd5]
+// CHECK: msr      trcacatr13, x28            // encoding: [0x7c,0x2a,0x11,0xd5]
+// CHECK: msr      trcacatr14, x25            // encoding: [0x79,0x2c,0x11,0xd5]
+// CHECK: msr      trcacatr15, x4             // encoding: [0x64,0x2e,0x11,0xd5]
+// CHECK: msr      trcdvcvr0, x6              // encoding: [0x86,0x20,0x11,0xd5]
+// CHECK: msr      trcdvcvr1, x3              // encoding: [0x83,0x24,0x11,0xd5]
+// CHECK: msr      trcdvcvr2, x5              // encoding: [0x85,0x28,0x11,0xd5]
+// CHECK: msr      trcdvcvr3, x11             // encoding: [0x8b,0x2c,0x11,0xd5]
+// CHECK: msr      trcdvcvr4, x9              // encoding: [0xa9,0x20,0x11,0xd5]
+// CHECK: msr      trcdvcvr5, x14             // encoding: [0xae,0x24,0x11,0xd5]
+// CHECK: msr      trcdvcvr6, x10             // encoding: [0xaa,0x28,0x11,0xd5]
+// CHECK: msr      trcdvcvr7, x12             // encoding: [0xac,0x2c,0x11,0xd5]
+// CHECK: msr      trcdvcmr0, x8              // encoding: [0xc8,0x20,0x11,0xd5]
+// CHECK: msr      trcdvcmr1, x8              // encoding: [0xc8,0x24,0x11,0xd5]
+// CHECK: msr      trcdvcmr2, x22             // encoding: [0xd6,0x28,0x11,0xd5]
+// CHECK: msr      trcdvcmr3, x22             // encoding: [0xd6,0x2c,0x11,0xd5]
+// CHECK: msr      trcdvcmr4, x5              // encoding: [0xe5,0x20,0x11,0xd5]
+// CHECK: msr      trcdvcmr5, x16             // encoding: [0xf0,0x24,0x11,0xd5]
+// CHECK: msr      trcdvcmr6, x27             // encoding: [0xfb,0x28,0x11,0xd5]
+// CHECK: msr      trcdvcmr7, x21             // encoding: [0xf5,0x2c,0x11,0xd5]
+// CHECK: msr      trccidcvr0, x8             // encoding: [0x08,0x30,0x11,0xd5]
+// CHECK: msr      trccidcvr1, x6             // encoding: [0x06,0x32,0x11,0xd5]
+// CHECK: msr      trccidcvr2, x9             // encoding: [0x09,0x34,0x11,0xd5]
+// CHECK: msr      trccidcvr3, x8             // encoding: [0x08,0x36,0x11,0xd5]
+// CHECK: msr      trccidcvr4, x3             // encoding: [0x03,0x38,0x11,0xd5]
+// CHECK: msr      trccidcvr5, x21            // encoding: [0x15,0x3a,0x11,0xd5]
+// CHECK: msr      trccidcvr6, x12            // encoding: [0x0c,0x3c,0x11,0xd5]
+// CHECK: msr      trccidcvr7, x7             // encoding: [0x07,0x3e,0x11,0xd5]
+// CHECK: msr      trcvmidcvr0, x4            // encoding: [0x24,0x30,0x11,0xd5]
+// CHECK: msr      trcvmidcvr1, x3            // encoding: [0x23,0x32,0x11,0xd5]
+// CHECK: msr      trcvmidcvr2, x9            // encoding: [0x29,0x34,0x11,0xd5]
+// CHECK: msr      trcvmidcvr3, x17           // encoding: [0x31,0x36,0x11,0xd5]
+// CHECK: msr      trcvmidcvr4, x14           // encoding: [0x2e,0x38,0x11,0xd5]
+// CHECK: msr      trcvmidcvr5, x12           // encoding: [0x2c,0x3a,0x11,0xd5]
+// CHECK: msr      trcvmidcvr6, x10           // encoding: [0x2a,0x3c,0x11,0xd5]
+// CHECK: msr      trcvmidcvr7, x3            // encoding: [0x23,0x3e,0x11,0xd5]
+// CHECK: msr      trccidcctlr0, x14          // encoding: [0x4e,0x30,0x11,0xd5]
+// CHECK: msr      trccidcctlr1, x22          // encoding: [0x56,0x31,0x11,0xd5]
+// CHECK: msr      trcvmidcctlr0, x8          // encoding: [0x48,0x32,0x11,0xd5]
+// CHECK: msr      trcvmidcctlr1, x15         // encoding: [0x4f,0x33,0x11,0xd5]
+// CHECK: msr      trcitctrl, x1              // encoding: [0x81,0x70,0x11,0xd5]
+// CHECK: msr      trcclaimset, x7            // encoding: [0xc7,0x78,0x11,0xd5]
+// CHECK: msr      trcclaimclr, x29           // encoding: [0xdd,0x79,0x11,0xd5]
diff --git a/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s b/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s
new file mode 100644
index 000000000000..172abcf6f813
--- /dev/null
+++ b/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s
@@ -0,0 +1,5 @@
+@ RUN: llvm-mc -arch arm %s
+@ CHECK: test:
+@ CHECK: br r1
+test:
+  bl r1
diff --git a/test/MC/ARM/arm-thumb-trustzone.s b/test/MC/ARM/arm-thumb-trustzone.s
new file mode 100644
index 000000000000..a080b3efac88
--- /dev/null
+++ b/test/MC/ARM/arm-thumb-trustzone.s
@@ -0,0 +1,25 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+
+  .syntax unified
+  .globl _func
+
+@ Check that the assembler processes SMC instructions when TrustZone support is 
+@ active and that it rejects them when this feature is not enabled
+
+_func:
+@ CHECK: _func
+
+
+@------------------------------------------------------------------------------
+@ SMC
+@------------------------------------------------------------------------------
+        smc #0xf
+        ite eq
+        smceq #0
+
+@ NOTZ-NOT: smc 	#15
+@ NOTZ-NOT: smceq	#0
+@ TZ: smc	#15                     @ encoding: [0xff,0xf7,0x00,0x80]
+@ TZ: ite	eq                      @ encoding: [0x0c,0xbf]
+@ TZ: smceq	#0                      @ encoding: [0xf0,0xf7,0x00,0x80]
diff --git a/test/MC/ARM/arm-trustzone.s b/test/MC/ARM/arm-trustzone.s
new file mode 100644
index 000000000000..69157f60dc0a
--- /dev/null
+++ b/test/MC/ARM/arm-trustzone.s
@@ -0,0 +1,24 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+
+  .syntax unified
+  .globl _func
+
+@ Check that the assembler processes SMC instructions when TrustZone support is 
+@ active and that it rejects them when this feature is not enabled
+
+_func:
+@ CHECK: _func
+
+
+@------------------------------------------------------------------------------
+@ SMC
+@------------------------------------------------------------------------------
+        smc #0xf
+        smceq #0
+
+@ NOTZ-NOT: smc 	#15
+@ NOTZ-NOT: smceq	#0
+@ TZ: smc	#15                     @ encoding: [0x7f,0x00,0x60,0xe1]
+@ TZ: smceq	#0                      @ encoding: [0x70,0x00,0x60,0x01]
+
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 560a0d633cbe..0b7f2f9dbdb6 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -1790,15 +1790,6 @@ Lforward:
 @ CHECK: shsub8	r4, r8, r2              @ encoding: [0xf2,0x4f,0x38,0xe6]
 @ CHECK: shsub8gt	r4, r8, r2      @ encoding: [0xf2,0x4f,0x38,0xc6]
 
-@------------------------------------------------------------------------------
-@ SMC
-@------------------------------------------------------------------------------
-        smc #0xf
-        smceq #0
-
-@ CHECK: smc	#15                     @ encoding: [0x7f,0x00,0x60,0xe1]
-@ CHECK: smceq	#0                      @ encoding: [0x70,0x00,0x60,0x01]
-
 @------------------------------------------------------------------------------
 @ SMLABB/SMLABT/SMLATB/SMLATT
 @------------------------------------------------------------------------------
@@ -2318,7 +2309,7 @@ Lforward:
         strpl	r3, [r10, #0]!
 
 @ CHECK: strpl	r3, [r10, #-0]!         @ encoding: [0x00,0x30,0x2a,0x55]
-@ CHECK: strpl	r3, [r10]!              @ encoding: [0x00,0x30,0xaa,0x55]
+@ CHECK: strpl	r3, [r10, #0]!          @ encoding: [0x00,0x30,0xaa,0x55]
 
 @------------------------------------------------------------------------------
 @ SUB
@@ -2892,7 +2883,7 @@ Lforward:
 @ CHECK: wfilt                          @ encoding: [0x03,0xf0,0x20,0xb3]
 @ CHECK: yield                          @ encoding: [0x01,0xf0,0x20,0xe3]
 @ CHECK: yieldne                        @ encoding: [0x01,0xf0,0x20,0x13]
-@ CHECK: hint	#5                      @ encoding: [0x05,0xf0,0x20,0xe3]
+@ CHECK-NOT: hint	#5
 @ CHECK: sev                            @ encoding: [0x04,0xf0,0x20,0xe3]
 @ CHECK: wfi                            @ encoding: [0x03,0xf0,0x20,0xe3]
 @ CHECK: wfe                            @ encoding: [0x02,0xf0,0x20,0xe3]
diff --git a/test/MC/ARM/cxx-global-constructor.ll b/test/MC/ARM/cxx-global-constructor.ll
index e06d2c73ed93..4afd1e19ad44 100644
--- a/test/MC/ARM/cxx-global-constructor.ll
+++ b/test/MC/ARM/cxx-global-constructor.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -mtriple=armv7-linux-gnueabi -relocation-model=pic \
-; RUN: -filetype=obj -o - | elf-dump --dump-section-data | FileCheck %s
+; RUN: -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
 
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }]
@@ -9,4 +9,5 @@ define void @f() {
 }
 
 ; Check for a relocation of type R_ARM_TARGET1.
-; CHECK: ('r_type', 0x26)
+; CHECK: Relocations [
+; CHECK:   0x{{[0-9,A-F]+}} R_ARM_TARGET1
diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll
index c2feec5303c3..e3325b6bf6e6 100644
--- a/test/MC/ARM/data-in-code.ll
+++ b/test/MC/ARM/data-in-code.ll
@@ -1,8 +1,8 @@
 ;; RUN: llc -O0 -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
-;; RUN:   elf-dump | FileCheck -check-prefix=ARM %s
+;; RUN:   llvm-readobj -t | FileCheck -check-prefix=ARM %s
 
 ;; RUN: llc -O0 -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
-;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=TMB %s
+;; RUN:   llvm-readobj -t | FileCheck -check-prefix=TMB %s
 
 ;; Ensure that if a jump table is generated that it has Mapping Symbols
 ;; marking the data-in-code region.
@@ -108,68 +108,68 @@ exit:
   ret void
 }
 
-;; ARM:         # Symbol 2
-;; ARM-NEXT:    $a
-;; ARM-NEXT:   'st_value', 0x00000000
-;; ARM-NEXT:   'st_size', 0x00000000
-;; ARM-NEXT:   'st_bind', 0x0
-;; ARM-NEXT:   'st_type', 0x0
-;; ARM-NEXT:   'st_other'
-;; ARM-NEXT:   'st_shndx', [[MIXED_SECT:0x[0-9a-f]+]]
-
-;; ARM:         # Symbol 3
-;; ARM-NEXT:    $a
-;; ARM-NEXT:   'st_value', 0x000000ac
-;; ARM-NEXT:   'st_size', 0x00000000
-;; ARM-NEXT:   'st_bind', 0x0
-;; ARM-NEXT:   'st_type', 0x0
-;; ARM-NEXT:   'st_other'
-;; ARM-NEXT:   'st_shndx', [[MIXED_SECT]]
-
-;; ARM:         # Symbol 4
-;; ARM-NEXT:    $d
-;; ARM-NEXT:    'st_value', 0x00000000
-;; ARM-NEXT:    'st_size', 0x00000000
-;; ARM-NEXT:    'st_bind', 0x0
-;; ARM-NEXT:    'st_type', 0x0
-
-;; ARM:         # Symbol 5
-;; ARM-NEXT:    $d
-;; ARM-NEXT:   'st_value', 0x00000030
-;; ARM-NEXT:   'st_size', 0x00000000
-;; ARM-NEXT:   'st_bind', 0x0
-;; ARM-NEXT:   'st_type', 0x0
-;; ARM-NEXT:   'st_other'
-;; ARM-NEXT:   'st_shndx', [[MIXED_SECT]]
+;; ARM:        Symbol {
+;; ARM:          Name: $a
+;; ARM-NEXT:     Value: 0x0
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+;; ARM-NEXT:     Other:
+;; ARM-NEXT:     Section: [[MIXED_SECT:[^ ]+]]
+
+;; ARM:        Symbol {
+;; ARM:          Name: $a
+;; ARM-NEXT:     Value: 0xAC
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+;; ARM-NEXT:     Other:
+;; ARM-NEXT:     Section: [[MIXED_SECT]]
+
+;; ARM:        Symbol {
+;; ARM:          Name: $d
+;; ARM-NEXT:     Value: 0
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+
+;; ARM:        Symbol {
+;; ARM:          Name: $d
+;; ARM-NEXT:     Value: 0x30
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+;; ARM-NEXT:     Other:
+;; ARM-NEXT:     Section: [[MIXED_SECT]]
 
 ;; ARM-NOT:     ${{[atd]}}
 
-;; TMB:         # Symbol 3
-;; TMB-NEXT:    $d
-;; TMB-NEXT:   'st_value', 0x00000016
-;; TMB-NEXT:   'st_size', 0x00000000
-;; TMB-NEXT:   'st_bind', 0x0
-;; TMB-NEXT:   'st_type', 0x0
-;; TMB-NEXT:   'st_other'
-;; TMB-NEXT:   'st_shndx', [[MIXED_SECT:0x[0-9a-f]+]]
-
-;; TMB:         # Symbol 4
-;; TMB-NEXT:    $t
-;; TMB-NEXT:   'st_value', 0x00000000
-;; TMB-NEXT:   'st_size', 0x00000000
-;; TMB-NEXT:   'st_bind', 0x0
-;; TMB-NEXT:   'st_type', 0x0
-;; TMB-NEXT:   'st_other'
-;; TMB-NEXT:   'st_shndx', [[MIXED_SECT]]
-
-;; TMB:         # Symbol 5
-;; TMB-NEXT:    $t
-;; TMB-NEXT:   'st_value', 0x00000036
-;; TMB-NEXT:   'st_size', 0x00000000
-;; TMB-NEXT:   'st_bind', 0x0
-;; TMB-NEXT:   'st_type', 0x0
-;; TMB-NEXT:   'st_other'
-;; TMB-NEXT:   'st_shndx', [[MIXED_SECT]]
+;; TMB:        Symbol {
+;; TMB:          Name: $d.2
+;; TMB-NEXT:     Value: 0x16
+;; TMB-NEXT:     Size: 0
+;; TMB-NEXT:     Binding: Local
+;; TMB-NEXT:     Type: None
+;; TMB-NEXT:     Other:
+;; TMB-NEXT:     Section: [[MIXED_SECT:[^ ]+]]
+
+;; TMB:        Symbol {
+;; TMB:          Name: $t
+;; TMB-NEXT:     Value: 0x0
+;; TMB-NEXT:     Size: 0
+;; TMB-NEXT:     Binding: Local
+;; TMB-NEXT:     Type: None
+;; TMB-NEXT:     Other:
+;; TMB-NEXT:     Section: [[MIXED_SECT]]
+
+;; TMB:        Symbol {
+;; TMB:          Name: $t
+;; TMB-NEXT:     Value: 0x36
+;; TMB-NEXT:     Size: 0
+;; TMB-NEXT:     Binding: Local
+;; TMB-NEXT:     Type: None
+;; TMB-NEXT:     Other:
+;; TMB-NEXT:     Section: [[MIXED_SECT]]
 
 
 ;; TMB-NOT:     ${{[atd]}}
diff --git a/test/MC/ARM/elf-eflags-eabi-cg.ll b/test/MC/ARM/elf-eflags-eabi-cg.ll
index 2e86a0f36077..0b9de7f2a62a 100644
--- a/test/MC/ARM/elf-eflags-eabi-cg.ll
+++ b/test/MC/ARM/elf-eflags-eabi-cg.ll
@@ -1,7 +1,7 @@
 ; Codegen version to check for ELF header flags.
 ;
 ; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
-; RUN: -filetype=obj -o - | elf-dump --dump-section-data | \
+; RUN: -filetype=obj -o - | llvm-readobj -h | \
 ; RUN: FileCheck %s
 
 define void @bar() nounwind {
@@ -10,4 +10,5 @@ entry:
 }
 
 ; For now the only e_flag set is EF_ARM_EABI_VER5
-;CHECK:    'e_flags', 0x05000000
+; CHECK: ElfHeader {
+; CHECK:   Flags [ (0x5000000)
diff --git a/test/MC/ARM/elf-eflags-eabi.s b/test/MC/ARM/elf-eflags-eabi.s
index ea89eacf74fb..fe0b6c071e62 100644
--- a/test/MC/ARM/elf-eflags-eabi.s
+++ b/test/MC/ARM/elf-eflags-eabi.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
-@ RUN:    elf-dump --dump-section-data  | FileCheck -check-prefix=OBJ %s
+@ RUN:    llvm-readobj -h | FileCheck -check-prefix=OBJ %s
 	.syntax unified
 	.text
 	.globl	barf
@@ -10,4 +10,5 @@ barf:                                   @ @barf
         b foo
 
 @@@ make sure the EF_ARM_EABIMASK comes out OK
-@OBJ:    'e_flags', 0x05000000
+@OBJ: ElfHeader {
+@OBJ:   Flags [ (0x5000000)
diff --git a/test/MC/ARM/elf-movt.s b/test/MC/ARM/elf-movt.s
index 02bb5a6907de..74b3c9f2f53b 100644
--- a/test/MC/ARM/elf-movt.s
+++ b/test/MC/ARM/elf-movt.s
@@ -1,6 +1,6 @@
 @ RUN: llvm-mc %s -triple=armv7-linux-gnueabi | FileCheck -check-prefix=ASM %s
 @ RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
-@ RUN:    elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+@ RUN:    llvm-readobj -s -sd -sr | FileCheck -check-prefix=OBJ %s
 	.syntax unified
 	.text
 	.globl	barf
@@ -15,25 +15,24 @@ barf:                                   @ @barf
 @ ASM-NEXT:     movt    r0, :upper16:(GOT-(.LPC0_2+8))
 
 @@ make sure that the text section fixups are sane too
-@ OBJ:                 '.text'
-@ OBJ-NEXT:            'sh_type', 0x00000001
-@ OBJ-NEXT:            'sh_flags', 0x00000006
-@ OBJ-NEXT:            'sh_addr', 0x00000000
-@ OBJ-NEXT:            'sh_offset', 0x00000034
-@ OBJ-NEXT:            'sh_size', 0x00000008
-@ OBJ-NEXT:            'sh_link', 0x00000000
-@ OBJ-NEXT:            'sh_info', 0x00000000
-@ OBJ-NEXT:            'sh_addralign', 0x00000004
-@ OBJ-NEXT:            'sh_entsize', 0x00000000
-@ OBJ-NEXT:            '_section_data', 'f00f0fe3 f40f4fe3'
-
-@ OBJ:              Relocation 0
-@ OBJ-NEXT:         'r_offset', 0x00000000
-@ OBJ-NEXT:         'r_sym'
-@ OBJ-NEXT:         'r_type', 0x2d
-
-@ OBJ:              Relocation 1
-@ OBJ-NEXT:         'r_offset', 0x00000004
-@ OBJ-NEXT:         'r_sym'
-@ OBJ-NEXT:         'r_type', 0x2e
-
+@ OBJ:        Section {
+@ OBJ:          Name: .text
+@ OBJ-NEXT:     Type: SHT_PROGBITS
+@ OBJ-NEXT:     Flags [ (0x6)
+@ OBJ-NEXT:       SHF_ALLOC
+@ OBJ-NEXT:       SHF_EXECINSTR
+@ OBJ-NEXT:     ]
+@ OBJ-NEXT:     Address: 0x0
+@ OBJ-NEXT:     Offset: 0x34
+@ OBJ-NEXT:     Size: 8
+@ OBJ-NEXT:     Link: 0
+@ OBJ-NEXT:     Info: 0
+@ OBJ-NEXT:     AddressAlignment: 4
+@ OBJ-NEXT:     EntrySize: 0
+@ OBJ-NEXT:     Relocations [
+@ OBJ-NEXT:       0x0 R_ARM_MOVW_PREL_NC
+@ OBJ-NEXT:       0x4 R_ARM_MOVT_PREL
+@ OBJ-NEXT:     ]
+@ OBJ-NEXT:     SectionData (
+@ OBJ-NEXT:       0000: F00F0FE3 F40F4FE3
+@ OBJ-NEXT:     )
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index 3ebd7c641b6d..9b5dbd9e99dd 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -1,7 +1,7 @@
 ;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
 ;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
 ;; RUN:    -filetype=obj %s -o - | \
-;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+;; RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
 ;; FIXME: This file needs to be in .s form!
 ;; The args to llc are there to constrain the codegen only.
@@ -60,11 +60,8 @@ bb3:                                              ; preds = %bb, %entry
 
 declare void @exit(i32) noreturn nounwind
 
-;; OBJ:          Relocation 1
-;; OBJ-NEXT:     'r_offset',
-;; OBJ-NEXT:     'r_sym', 0x000007
-;; OBJ-NEXT:     'r_type', 0x2b
-
-;; OBJ:         Symbol 7
-;; OBJ-NEXT:    '_MergedGlobals'
-;; OBJ-NEXT:    'st_value', 0x00000010
+; OBJ: Relocations [
+; OBJ:   Section (1) .text {
+; OBJ:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC _MergedGlobals
+; OBJ:   }
+; OBJ: ]
diff --git a/test/MC/ARM/elf-reloc-02.ll b/test/MC/ARM/elf-reloc-02.ll
index 6b6b03c388a4..f0217644cad9 100644
--- a/test/MC/ARM/elf-reloc-02.ll
+++ b/test/MC/ARM/elf-reloc-02.ll
@@ -1,7 +1,7 @@
 ;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
 ;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
 ;; RUN:    -filetype=obj %s -o - | \
-;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+;; RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
 ;; FIXME: This file needs to be in .s form!
 ;; The args to llc are there to constrain the codegen only.
@@ -41,10 +41,8 @@ declare i32 @write(...)
 
 declare void @exit(i32) noreturn nounwind
 
-;; OBJ:        Relocation 0
-;; OBJ-NEXT:    'r_offset',
-;; OBJ-NEXT:    'r_sym', 0x000005
-;; OBJ-NEXT:    'r_type', 0x2b
-
-;; OBJ:          Symbol 5
-;; OBJ-NEXT:    '.L.str'
+;; OBJ:      Relocations [
+;; OBJ:        Section (1) .text {
+;; OBJ-NEXT:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC .L.str
+;; OBJ:        }
+;; OBJ:      ]
diff --git a/test/MC/ARM/elf-reloc-03.ll b/test/MC/ARM/elf-reloc-03.ll
index 87f91c11210b..ac46e697c18a 100644
--- a/test/MC/ARM/elf-reloc-03.ll
+++ b/test/MC/ARM/elf-reloc-03.ll
@@ -1,7 +1,7 @@
 ;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
 ;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
 ;; RUN:    -filetype=obj %s -o - | \
-;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+;; RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
 ;; FIXME: This file needs to be in .s form!
 ;; The args to llc are there to constrain the codegen only.
@@ -88,10 +88,8 @@ entry:
 
 declare void @exit(i32) noreturn nounwind
 
-;; OBJ:           Relocation 1
-;; OBJ-NEXT:     'r_offset',
-;; OBJ-NEXT:     'r_sym', 0x000010
-;; OBJ-NEXT:     'r_type', 0x2b
-
-;; OBJ:      Symbol 16
-;; OBJ-NEXT:    'vtable'
+;; OBJ: Relocations [
+;; OBJ:   Section (1) .text {
+;; OBJ:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC vtable
+;; OBJ:   }
+;; OBJ: ]
diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s
index 3fafb43eb060..612942f2c595 100644
--- a/test/MC/ARM/elf-reloc-condcall.s
+++ b/test/MC/ARM/elf-reloc-condcall.s
@@ -1,33 +1,18 @@
 // RUN: llvm-mc -triple=armv7-linux-gnueabi -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         bleq some_label
         bl some_label
         blx some_label
         beq some_label
         b some_label
-// OBJ: .rel.text
 
-// OBJ: 'r_offset', 0x00000000
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1d
-
-// OBJ: 'r_offset', 0x00000004
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1c
-
-// OBJ: 'r_offset', 0x00000008
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1c
-
-// OBJ: 'r_offset', 0x0000000c
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1d
-
-// OBJ: 'r_offset', 0x00000010
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1d
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0  R_ARM_JUMP24 some_label 0x0
+// OBJ-NEXT:     0x4  R_ARM_CALL   some_label 0x0
+// OBJ-NEXT:     0x8  R_ARM_CALL   some_label 0x0
+// OBJ-NEXT:     0xC  R_ARM_JUMP24 some_label 0x0
+// OBJ-NEXT:     0x10 R_ARM_JUMP24 some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.ll b/test/MC/ARM/elf-thumbfunc-reloc.ll
index b2f253d2fa95..e7d2c340d44d 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.ll
+++ b/test/MC/ARM/elf-thumbfunc-reloc.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
-; RUN: -filetype=obj -o - | elf-dump --dump-section-data | \
+; RUN: -filetype=obj -o - | llvm-readobj -s -sd -r -t | \
 ; RUN: FileCheck %s
 
 ; FIXME: This file needs to be in .s form!
@@ -22,16 +22,20 @@ entry:
 
 
 ; make sure that bl 0 <foo> (fff7feff) is correctly encoded
-; CHECK: '_section_data', '704700bf 2de90048 fff7feff bde80088'
-
-;  Offset     Info    Type            Sym.Value  Sym. Name
-; 00000008  0000070a R_ARM_THM_CALL    00000001   foo
-; CHECK:           Relocation 0
-; CHECK-NEXT:      'r_offset', 0x00000008
-; CHECK-NEXT:      'r_sym', 0x000009
-; CHECK-NEXT:      'r_type', 0x0a
+; CHECK: Sections [
+; CHECK:   SectionData (
+; CHECK:     0000: 704700BF 2DE90048 FFF7FEFF BDE80088
+; CHECK:   )
+; CHECK: ]
+
+; CHECK:      Relocations [
+; CHECK-NEXT:   Section (1) .text {
+; CHECK-NEXT:     0x8 R_ARM_THM_CALL foo 0x0
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
 
 ; make sure foo is thumb function: bit 0 = 1
-; CHECK:           Symbol 9
-; CHECK-NEXT:      'foo'
-; CHECK-NEXT:      'st_value', 0x00000001
+; CHECK:      Symbols [
+; CHECK:        Symbol {
+; CHECK:          Name: foo
+; CHECK-NEXT:     Value: 0x1
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.s b/test/MC/ARM/elf-thumbfunc-reloc.s
index 4a311dd51131..87a26d8df84e 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.s
+++ b/test/MC/ARM/elf-thumbfunc-reloc.s
@@ -1,6 +1,6 @@
 @@ test st_value bit 0 of thumb function
 @ RUN: llvm-mc %s -triple=arm-freebsd-eabi -filetype=obj -o - | \
-@ RUN: elf-dump  | FileCheck %s
+@ RUN: llvm-readobj -r  | FileCheck %s
 
 
 	.syntax unified
@@ -17,7 +17,8 @@ f:
         pop     {r7, pc}
 
 @@ make sure an R_ARM_THM_CALL relocation is generated for the call to g
-@CHECK:        ('_relocations', [
-@CHECK:         (('r_offset', 0x00000004)
-@CHECK-NEXT:     ('r_sym', 0x{{[0-9a-fA-F]+}})
-@CHECK-NEXT:     ('r_type', 0x0a)
+@CHECK:      Relocations [
+@CHECK-NEXT:   Section (1) .text {
+@CHECK-NEXT:     0x4 R_ARM_THM_CALL g 0x0
+@CHECK-NEXT:   }
+@CHECK-NEXT: ]
diff --git a/test/MC/ARM/elf-thumbfunc.s b/test/MC/ARM/elf-thumbfunc.s
index 91b2eee7592b..26f5f0b159af 100644
--- a/test/MC/ARM/elf-thumbfunc.s
+++ b/test/MC/ARM/elf-thumbfunc.s
@@ -1,6 +1,6 @@
 @@ test st_value bit 0 of thumb function
 @ RUN: llvm-mc %s -triple=thumbv7-linux-gnueabi -filetype=obj -o - | \
-@ RUN: elf-dump  | FileCheck %s
+@ RUN: llvm-readobj -t | FileCheck %s
 	.syntax unified
 	.text
 	.globl	foo
@@ -12,9 +12,9 @@ foo:
 	bx	lr
 
 @@ make sure foo is thumb function: bit 0 = 1 (st_value)
-@CHECK:           Symbol 5
-@CHECK-NEXT:      'st_name', 0x00000001
-@CHECK-NEXT:      'st_value', 0x00000001
-@CHECK-NEXT:      'st_size', 0x00000000
-@CHECK-NEXT:      'st_bind', 0x1
-@CHECK-NEXT:      'st_type', 0x2
+@CHECK:        Symbol {
+@CHECK:          Name: foo
+@CHECK-NEXT:     Value: 0x1
+@CHECK-NEXT:     Size: 0
+@CHECK-NEXT:     Binding: Global
+@CHECK-NEXT:     Type: Function
diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s
index b3aedb8e52ec..cffbeab644a0 100644
--- a/test/MC/ARM/neon-cmp-encoding.s
+++ b/test/MC/ARM/neon-cmp-encoding.s
@@ -174,3 +174,24 @@
 @ CHECK: vcge.u16	q8, q9, q8      @ encoding: [0xf0,0x03,0x52,0xf3]
 @ CHECK: vcge.u32	q8, q9, q8      @ encoding: [0xf0,0x03,0x62,0xf3]
 @ CHECK: vcge.f32	q8, q9, q8      @ encoding: [0xe0,0x0e,0x42,0xf3]
+
+
+@ VACLT is an alias for VACGT w/ the source operands reversed.
+@ VACLE is an alias for VACGE w/ the source operands reversed.
+	vaclt.f32 q9, q11, q12
+	vaclt.f32 d9, d11, d12
+	vaclt.f32 q11, q12
+	vaclt.f32 d11, d12
+	vacle.f32 q9, q11, q12
+	vacle.f32 d9, d11, d12
+	vacle.f32 q11, q12
+	vacle.f32 d11, d12
+
+@ CHECK: vacgt.f32	q9, q12, q11    @ encoding: [0xf6,0x2e,0x68,0xf3]
+@ CHECK: vacgt.f32	d9, d12, d11    @ encoding: [0x1b,0x9e,0x2c,0xf3]
+@ CHECK: vacgt.f32	q11, q12, q11   @ encoding: [0xf6,0x6e,0x68,0xf3]
+@ CHECK: vacgt.f32	d11, d12, d11   @ encoding: [0x1b,0xbe,0x2c,0xf3]
+@ CHECK: vacge.f32	q9, q12, q11    @ encoding: [0xf6,0x2e,0x48,0xf3]
+@ CHECK: vacge.f32	d9, d12, d11    @ encoding: [0x1b,0x9e,0x0c,0xf3]
+@ CHECK: vacge.f32	q11, q12, q11   @ encoding: [0xf6,0x6e,0x48,0xf3]
+@ CHECK: vacge.f32	d11, d12, d11   @ encoding: [0x1b,0xbe,0x0c,0xf3]
diff --git a/test/MC/ARM/xscale-attributes.ll b/test/MC/ARM/xscale-attributes.ll
index 3ccf02b95e2c..d1e9931e1448 100644
--- a/test/MC/ARM/xscale-attributes.ll
+++ b/test/MC/ARM/xscale-attributes.ll
@@ -2,7 +2,7 @@
 ; RUN: FileCheck -check-prefix=ASM %s
 
 ; RUN: llc %s -mtriple=thumbv5-linux-gnueabi -filetype=obj \
-; RUN: -mcpu=xscale -o - | elf-dump --dump-section-data | \
+; RUN: -mcpu=xscale -o - | llvm-readobj -s -sd | \
 ; RUN: FileCheck -check-prefix=OBJ %s
 
 ; FIXME: The OBJ test should be a .s to .o test and the ASM test should
@@ -17,15 +17,22 @@ entry:
 ; ASM-NEXT:      .eabi_attribute 8, 1
 ; ASM-NEXT:      .eabi_attribute 9, 1
 
-; OBJ:           Section 4
-; OBJ-NEXT:      'sh_name', 0x0000000c
-; OBJ-NEXT:      'sh_type', 0x70000003
-; OBJ-NEXT:	   'sh_flags', 0x00000000
-; OBJ-NEXT:	   'sh_addr', 0x00000000
-; OBJ-NEXT:	   'sh_offset', 0x00000038
-; OBJ-NEXT:	   'sh_size', 0x00000020
-; OBJ-NEXT:	   'sh_link', 0x00000000
-; OBJ-NEXT:	   'sh_info', 0x00000000
-; OBJ-NEXT:	   'sh_addralign', 0x00000001
-; OBJ-NEXT:	   'sh_entsize', 0x00000000
-; OBJ-NEXT:      '_section_data', '411f0000 00616561 62690001 15000000 06050801 09011401 15011703 18011901'
+; OBJ:      Sections [
+; OBJ:        Section {
+; OBJ:          Index: 4
+; OBJ-NEXT:     Name: .ARM.attributes (12)
+; OBJ-NEXT:     Type: SHT_ARM_ATTRIBUTES
+; OBJ-NEXT:     Flags [ (0x0)
+; OBJ-NEXT:     ]
+; OBJ-NEXT:     Address: 0x0
+; OBJ-NEXT:     Offset: 0x38
+; OBJ-NEXT:     Size: 32
+; OBJ-NEXT:     Link: 0
+; OBJ-NEXT:     Info: 0
+; OBJ-NEXT:     AddressAlignment: 1
+; OBJ-NEXT:     EntrySize: 0
+; OBJ-NEXT:     SectionData (
+; OBJ-NEXT:       0000: 411F0000 00616561 62690001 15000000
+; OBJ-NEXT:       0010: 06050801 09011401 15011703 18011901
+; OBJ-NEXT:     )
+; OBJ-NEXT:   }
diff --git a/test/MC/AsmParser/section.s b/test/MC/AsmParser/section.s
index 5abacc701354..0c3828d714a7 100644
--- a/test/MC/AsmParser/section.s
+++ b/test/MC/AsmParser/section.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s
-# RUN: elf-dump --dump-section-data < %t | FileCheck %s
+# RUN: llvm-readobj -s -sd < %t | FileCheck %s
 .section test1
 .byte 1
 .section test2
@@ -45,63 +45,85 @@
 .previous
 .byte 1
 .previous
-# CHECK:       (('sh_name', 0x00000044) # 'test1'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x00000034)
-# CHECK-NEXT:   ('sh_size', 0x00000007)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '01010101 010101')
-# CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x0000003e) # 'test2'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x0000003b)
-# CHECK-NEXT:   ('sh_size', 0x00000006)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '02020202 0202')
-# CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x00000038) # 'test3'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x00000041)
-# CHECK-NEXT:   ('sh_size', 0x00000005)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '03030303 03')
-# CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x00000032) # 'test4'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x00000046)
-# CHECK-NEXT:   ('sh_size', 0x00000003)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '040404')
-# CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x0000002c) # 'test5'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x00000049)
-# CHECK-NEXT:   ('sh_size', 0x00000001)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '05')
-# CHECK-NEXT:  ),
+
+# CHECK:      Sections [
+# CHECK:        Section {
+# CHECK:          Name: test1 (68)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x34
+# CHECK-NEXT:     Size: 7
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 01010101 010101
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+# CHECK:        Section {
+# CHECK:          Name: test2 (62)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x3B
+# CHECK-NEXT:     Size: 6
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 02020202 0202
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+# CHECK:        Section {
+# CHECK:          Name: test3 (56)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x41
+# CHECK-NEXT:     Size: 5
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 03030303 03
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+# CHECK:        Section {
+# CHECK:          Name: test4 (50)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x46
+# CHECK-NEXT:     Size: 3
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 040404
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+# CHECK:        Section {
+# CHECK:          Name: test5 (44)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x49
+# CHECK-NEXT:     Size: 1
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 05
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
diff --git a/test/MC/AsmParser/section_names.s b/test/MC/AsmParser/section_names.s
index 332cdbe3fed5..3883e15880a5 100644
--- a/test/MC/AsmParser/section_names.s
+++ b/test/MC/AsmParser/section_names.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s
-# RUN: elf-dump --dump-section-data < %t | FileCheck %s
+# RUN: llvm-readobj -s < %t | FileCheck %s
 .section .nobits
 .byte 1
 .section .nobits2
@@ -30,33 +30,33 @@
 .byte 1
 .section .notefoo
 .byte 1
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobits'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobits2'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobitsfoo'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_array'
-# CHECK-NEXT:  ('sh_type', 0x0000000e)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_array2'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_arrayfoo'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_array'
-# CHECK-NEXT:  ('sh_type', 0x0000000f)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_array2'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_arrayfoo'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_array'
-# CHECK-NEXT:  ('sh_type', 0x00000010)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_array2'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_arrayfoo'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.note'
-# CHECK-NEXT:  ('sh_type', 0x00000007)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.note2'
-# CHECK-NEXT:  ('sh_type', 0x00000007)
-#CHECK:       (('sh_name', 0x00000{{...}}) # '.notefoo'
-# CHECK-NEXT:  ('sh_type', 0x00000007)
+# CHECK:        Name: .nobits
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .nobits2
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .nobitsfoo
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .init_array
+# CHECK-NEXT:   Type:  SHT_INIT_ARRAY
+# CHECK:        Name: .init_array2
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .init_arrayfoo
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .fini_array
+# CHECK-NEXT:   Type: SHT_FINI_ARRAY
+# CHECK:        Name: .fini_array2
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .fini_arrayfoo
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .preinit_array
+# CHECK-NEXT:   Type: SHT_PREINIT_ARRAY
+# CHECK:        Name: .preinit_array2
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .preinit_arrayfoo
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .note
+# CHECK-NEXT:   Type: SHT_NOTE
+# CHECK:        Name: .note2
+# CHECK-NEXT:   Type: SHT_NOTE
+# CHECK:        Name: .notefoo
+# CHECK-NEXT:   Type: SHT_NOTE
diff --git a/test/MC/COFF/align-nops.s b/test/MC/COFF/align-nops.s
index 2971ec67798e..02b488475e90 100644
--- a/test/MC/COFF/align-nops.s
+++ b/test/MC/COFF/align-nops.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s -sd | FileCheck %s
 
 // Test that we get optimal nops in text
     .text
@@ -15,36 +15,40 @@ f0:
     .long 0
     .align  8
 
-//CHECK:         Name                     = .text
-//CHECK-NEXT:    VirtualSize
-//CHECK-NEXT:    VirtualAddress
-//CHECK-NEXT:    SizeOfRawData            = 16
-//CHECK-NEXT:    PointerToRawData
-//CHECK-NEXT:    PointerToRelocations
-//CHECK-NEXT:    PointerToLineNumbers
-//CHECK-NEXT:    NumberOfRelocations
-//CHECK-NEXT:    NumberOfLineNumbers
-//CHECK-NEXT:    Charateristics           = 0x60400020
-//CHECK-NEXT:        IMAGE_SCN_CNT_CODE
+//CHECK:          Name: .text
+//CHECK-NEXT:     VirtualSize
+//CHECK-NEXT:     VirtualAddress
+//CHECK-NEXT:     RawDataSize: 16
+//CHECK-NEXT:     PointerToRawData
+//CHECK-NEXT:     PointerToRelocations
+//CHECK-NEXT:     PointerToLineNumbers
+//CHECK-NEXT:     RelocationCount
+//CHECK-NEXT:     LineNumberCount
+//CHECK-NEXT:     Characteristics [ (0x60400020)
 //CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
+//CHECK-NEXT:        IMAGE_SCN_CNT_CODE
 //CHECK-NEXT:        IMAGE_SCN_MEM_EXECUTE
 //CHECK-NEXT:        IMAGE_SCN_MEM_READ
-//CHECK-NEXT:      SectionData              =
-//CHECK-NEXT:        00 00 00 00 0F 1F 40 00 - 00 00 00 00 0F 1F 40 00
+//CHECK-NEXT:     ]
+//CHECK-NEXT:     SectionData (
+//CHECK-NEXT:       0000: 00000000 0F1F4000 00000000 0F1F4000
+//CHECK-NEXT:     )
 
-//CHECK:         Name                     = .data
-//CHECK-NEXT:      VirtualSize
-//CHECK-NEXT:      VirtualAddress
-//CHECK-NEXT:      SizeOfRawData            = 16
-//CHECK-NEXT:      PointerToRawData
-//CHECK-NEXT:      PointerToRelocations
-//CHECK-NEXT:      PointerToLineNumbers
-//CHECK-NEXT:      NumberOfRelocations
-//CHECK-NEXT:      NumberOfLineNumbers
-//CHECK-NEXT:      Charateristics           = 0xC0400040
-//CHECK-NEXT:        IMAGE_SCN_CNT_INITIALIZED_DATA
-//CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
-//CHECK-NEXT:        IMAGE_SCN_MEM_READ
-//CHECK-NEXT:        IMAGE_SCN_MEM_WRITE
-//CHECK-NEXT:      SectionData              =
-//CHECK-NEXT:        00 00 00 00 90 90 90 90 - 00 00 00 00 00 00 00 00
+//CHECK:          Name: .data
+//CHECK-NEXT:     VirtualSize:
+//CHECK-NEXT:     VirtualAddress:
+//CHECK-NEXT:     RawDataSize: 16
+//CHECK-NEXT:     PointerToRawData:
+//CHECK-NEXT:     PointerToRelocations:
+//CHECK-NEXT:     PointerToLineNumbers:
+//CHECK-NEXT:     RelocationCount:
+//CHECK-NEXT:     LineNumberCount:
+//CHECK-NEXT:     Characteristics [ (0xC0400040)
+//CHECK-NEXT:       IMAGE_SCN_ALIGN_8BYTES
+//CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+//CHECK-NEXT:       IMAGE_SCN_MEM_READ
+//CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+//CHECK-NEXT:     ]
+//CHECK-NEXT:     SectionData (
+//CHECK-NEXT:       0000: 00000000 90909090 00000000 00000000
+//CHECK-NEXT:     )
diff --git a/test/MC/COFF/basic-coff-64.s b/test/MC/COFF/basic-coff-64.s
new file mode 100644
index 000000000000..89d17452d0d7
--- /dev/null
+++ b/test/MC/COFF/basic-coff-64.s
@@ -0,0 +1,137 @@
+// This test checks that the COFF object emitter works for the most basic
+// programs.
+
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -h -s -sr -sd -t | FileCheck %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$.L_.str, (%esp)
+	call	_printf
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+.L_.str:                                # @.str
+	.asciz	"Hello World"
+
+// CHECK: ImageFileHeader {
+// CHECK:   Machine: IMAGE_FILE_MACHINE_AMD64
+// CHECK:   SectionCount: 2
+// CHECK:   TimeDateStamp: {{[0-9]+}}
+// CHECK:   PointerToSymbolTable: 0x{{[0-9A-F]+}}
+// CHECK:   SymbolCount: 6
+// CHECK:   OptionalHeaderSize: 0
+// CHECK:   Characteristics [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number:               [[TextNum:[0-9]+]]
+// CHECK:     Name:                 .text
+// CHECK:     VirtualSize:          0
+// CHECK:     VirtualAddress:       0
+// CHECK:     RawDataSize:          [[TextSize:[0-9]+]]
+// CHECK:     PointerToRawData:     0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations: 0x{{[0-9A-F]+}}
+// CHECK:     PointerToLineNumbers: 0x0
+// CHECK:     RelocationCount:      2
+// CHECK:     LineNumberCount:      0
+// CHECK:     Characteristics [ (0x60500020)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_CODE
+// CHECK:       IMAGE_SCN_MEM_EXECUTE
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:       0x{{[0-9A-F]+}} IMAGE_REL_AMD64_ADDR32 .data
+// CHECK:       0x{{[0-9A-F]+}} IMAGE_REL_AMD64_REL32 _printf
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number:               [[DataNum:[0-9]+]]
+// CHECK:     Name:                 .data
+// CHECK:     VirtualSize:          0
+// CHECK:     VirtualAddress:       0
+// CHECK:     RawDataSize:          [[DataSize:[0-9]+]]
+// CHECK:     PointerToRawData:     0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations: 0x0
+// CHECK:     PointerToLineNumbers: 0x0
+// CHECK:     RelocationCount:      0
+// CHECK:     LineNumberCount:      0
+// CHECK:     Characteristics [ (0xC0300040)
+// CHECK:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:       IMAGE_SCN_MEM_WRITE
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 48656C6C 6F20576F 726C6400             |Hello World.|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name:           .text
+// CHECK:     Value:          0
+// CHECK:     Section:        .text
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   Static
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: [[TextSize]]
+// CHECK:       RelocationCount: 2
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: [[TextNum]]
+// CHECK:       Selection: 0x0
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           .data
+// CHECK:     Value:          0
+// CHECK:     Section:        .data
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   Static
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: [[DataSize]]
+// CHECK:       RelocationCount: 0
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: [[DataNum]]
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           _main
+// CHECK:     Value:          0
+// CHECK:     Section:        .text
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Function
+// CHECK:     StorageClass:   External
+// CHECK:     AuxSymbolCount: 0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           _printf
+// CHECK:     Value:          0
+// CHECK:     Section:        (0)
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   External
+// CHECK:     AuxSymbolCount: 0
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/basic-coff.s b/test/MC/COFF/basic-coff.s
index 23156b82d2e5..9b299707a130 100644
--- a/test/MC/COFF/basic-coff.s
+++ b/test/MC/COFF/basic-coff.s
@@ -1,8 +1,7 @@
 // This test checks that the COFF object emitter works for the most basic
 // programs.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -h -s -sr -sd -t | FileCheck %s
 
 .def	 _main;
 	.scl	2;
@@ -15,119 +14,124 @@ _main:                                  # @main
 # BB#0:                                 # %entry
 	subl	$4, %esp
 	movl	$L_.str, (%esp)
-	calll	_printf
+	call	_printf
 	xorl	%eax, %eax
 	addl	$4, %esp
 	ret
 
 	.data
 L_.str:                                 # @.str
-	.asciz	 "Hello World"
+	.asciz	"Hello World"
 
-// CHECK: {
-// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
-// CHECK:   NumberOfSections         = 2
-// CHECK:   TimeDateStamp            = {{[0-9]+}}
-// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
-// CHECK:   NumberOfSymbols          = 6
-// CHECK:   SizeOfOptionalHeader     = 0
-// CHECK:   Characteristics          = 0x0
-// CHECK:   Sections                 = [
-// CHECK:     1 = {
-// CHECK:       Name                     = .text
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 2
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0x60500020
-// CHECK:         IMAGE_SCN_CNT_CODE
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_EXECUTE
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:       SectionData              =
-// CHECK:       Relocations              = [
-// CHECK:         0 = {
-// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         1 = {
-// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
-// CHECK:           SymbolTableIndex         = 5
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _printf
-// CHECK:         }
-// CHECK:       ]
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x0
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 0
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0xC0300040
-// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
-// CHECK:         IMAGE_SCN_ALIGN_4BYTES
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:         IMAGE_SCN_MEM_WRITE
-// CHECK:       SectionData              =
-// CHECK:         48 65 6C 6C 6F 20 57 6F - 72 6C 64 00             |Hello World.|
-// CHECK:       Relocations              = None
-// CHECK:     }
+// CHECK: ImageFileHeader {
+// CHECK:   Machine: IMAGE_FILE_MACHINE_I386
+// CHECK:   SectionCount: 2
+// CHECK:   TimeDateStamp: {{[0-9]+}}
+// CHECK:   PointerToSymbolTable: 0x{{[0-9A-F]+}}
+// CHECK:   SymbolCount: 6
+// CHECK:   OptionalHeaderSize: 0
+// CHECK:   Characteristics [ (0x0)
 // CHECK:   ]
-// CHECK:   Symbols                  = [
-// CHECK:     0 = {
-// CHECK:       Name                     = .text
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................|
-// CHECK:         00 00                                             |..|
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 2
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
-// CHECK:         00 00                                             |..|
-// CHECK:     }
-// CHECK:     4 = {
-// CHECK:       Name                     = _main
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number:               [[TextNum:[0-9]+]]
+// CHECK:     Name:                 .text
+// CHECK:     VirtualSize:          0
+// CHECK:     VirtualAddress:       0
+// CHECK:     RawDataSize:          {{[0-9]+}}
+// CHECK:     PointerToRawData:     0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations: 0x{{[0-9A-F]+}}
+// CHECK:     PointerToLineNumbers: 0x0
+// CHECK:     RelocationCount:      2
+// CHECK:     LineNumberCount:      0
+// CHECK:     Characteristics [ (0x60500020)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_CODE
+// CHECK:       IMAGE_SCN_MEM_EXECUTE
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:       0x{{[0-9A-F]+}} IMAGE_REL_I386_DIR32 .data
+// CHECK:       0x{{[0-9A-F]+}} IMAGE_REL_I386_REL32 _printf
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number:               [[DataNum:[0-9]+]]
+// CHECK:     Name:                 .data
+// CHECK:     VirtualSize:          0
+// CHECK:     VirtualAddress:       0
+// CHECK:     RawDataSize:          {{[0-9]+}}
+// CHECK:     PointerToRawData:     0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations: 0x0
+// CHECK:     PointerToLineNumbers: 0x0
+// CHECK:     RelocationCount:      0
+// CHECK:     LineNumberCount:      0
+// CHECK:     Characteristics [ (0xC0300040)
+// CHECK:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:       IMAGE_SCN_MEM_WRITE
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 48656C6C 6F20576F 726C6400             |Hello World.|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name:           .text
+// CHECK:     Value:          0
+// CHECK:     Section:        .text
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   Static
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 21
+// CHECK:       RelocationCount: 2
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 1
+// CHECK:       Selection: 0x0
 // CHECK:     }
-// CHECK:     5 = {
-// CHECK:       Name                     = _printf
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           .data
+// CHECK:     Value:          0
+// CHECK:     Section:        .data
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   Static
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 12
+// CHECK:       RelocationCount: 0
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 2
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
 // CHECK:     }
-// CHECK:   ]
-// CHECK: }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           _main
+// CHECK:     Value:          0
+// CHECK:     Section:        .text
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Function
+// CHECK:     StorageClass:   External
+// CHECK:     AuxSymbolCount: 0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           _printf
+// CHECK:     Value:          0
+// CHECK:     Section:        (0)
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   External
+// CHECK:     AuxSymbolCount: 0
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/bss.s b/test/MC/COFF/bss.s
index 3bed13d4aac9..86294c18683e 100644
--- a/test/MC/COFF/bss.s
+++ b/test/MC/COFF/bss.s
@@ -1,7 +1,7 @@
 // The purpose of this test is to verify that bss sections are emited correctly.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
 
     .bss
     .globl _g0
@@ -9,7 +9,7 @@
 _g0:
     .long 0
 
-// CHECK:      Name           = .bss
-// CHECK-NEXT: VirtualSize    = 0
-// CHECK-NEXT: VirtualAddress = 0
-// CHECK-NEXT: SizeOfRawData  = 4
+// CHECK:      Name:            .bss
+// CHECK-NEXT: VirtualSize:     0
+// CHECK-NEXT: VirtualAddress:  0
+// CHECK-NEXT: RawDataSize:     4
diff --git a/test/MC/COFF/diff.s b/test/MC/COFF/diff.s
index aa683f26a7b1..820272a40bf4 100644
--- a/test/MC/COFF/diff.s
+++ b/test/MC/COFF/diff.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -s -sr -sd | FileCheck %s
 
 	.def	 _foobar;
 	.scl	2;
@@ -21,26 +21,12 @@ _rust_crate:
 	.long	_foobar-_rust_crate
 	.long	_foobar-_rust_crate
 
-// CHECK:      Name                     = .data
-// CHECK:      SectionData              =
-// CHECK-NEXT:   00 00 00 00 00 00 00 00 - 1C 00 00 00 20 00 00 00 |............ ...|
-// CHECK:        Relocations              = [
-// CHECK-NEXT:   0 = {
-// CHECK-NEXT:     VirtualAddress           = 0x4
-// CHECK-NEXT:     SymbolTableIndex         =
-// CHECK-NEXT:     Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK-NEXT:     SymbolName               = _foobar
-// CHECK-NEXT:   }
-// CHECK-NEXT:   1 = {
-// CHECK-NEXT:     VirtualAddress           = 0x8
-// CHECK-NEXT:     SymbolTableIndex         = 0
-// CHECK-NEXT:     Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK-NEXT:     SymbolName               = .text
-// CHECK-NEXT:   }
-// CHECK-NEXT:   2 = {
-// CHECK-NEXT:     VirtualAddress           = 0xC
-// CHECK-NEXT:     SymbolTableIndex         = 0
-// CHECK-NEXT:     Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK-NEXT:     SymbolName               = .text
-// CHECK-NEXT:   }
-// CHECK-NEXT: ]
+// CHECK:        Name: .data
+// CHECK:        Relocations [
+// CHECK-NEXT:     0x4 IMAGE_REL_I386_DIR32 _foobar
+// CHECK-NEXT:     0x8 IMAGE_REL_I386_REL32 .text
+// CHECK-NEXT:     0xC IMAGE_REL_I386_REL32 .text
+// CHECK-NEXT:   ]
+// CHECK:        SectionData (
+// CHECK-NEXT:     0000: 00000000 00000000 1C000000 20000000
+// CHECK-NEXT:   )
diff --git a/test/MC/COFF/module-asm.ll b/test/MC/COFF/module-asm.ll
index 9c6d00d2f503..bf14dc695b58 100644
--- a/test/MC/COFF/module-asm.ll
+++ b/test/MC/COFF/module-asm.ll
@@ -1,26 +1,28 @@
 ; The purpose of this test is to verify that various module level assembly
 ; constructs work.
 
-; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
-; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | llvm-readobj -s -sd | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 module asm ".text"
 module asm "_foo:"
 module asm "  ret"
 
-; CHECK:            Name                     = .text
-; CHECK-NEXT:       VirtualSize              = 0
-; CHECK-NEXT:       VirtualAddress           = 0
-; CHECK-NEXT:       SizeOfRawData            = {{[0-9]+}}
-; CHECK-NEXT:       PointerToRawData         = 0x{{[0-9A-F]+}}
-; CHECK-NEXT:       PointerToRelocations     = 0x{{[0-9A-F]+}}
-; CHECK-NEXT:       PointerToLineNumbers     = 0x0
-; CHECK-NEXT:       NumberOfRelocations      = 0
-; CHECK-NEXT:       NumberOfLineNumbers      = 0
-; CHECK-NEXT:       Charateristics           = 0x60300020
-; CHECK-NEXT:         IMAGE_SCN_CNT_CODE
+; CHECK:            Name:                      .text
+; CHECK-NEXT:       VirtualSize:               0
+; CHECK-NEXT:       VirtualAddress:            0
+; CHECK-NEXT:       RawDataSize:               {{[0-9]+}}
+; CHECK-NEXT:       PointerToRawData:          0x{{[0-9A-F]+}}
+; CHECK-NEXT:       PointerToRelocations:      0x{{[0-9A-F]+}}
+; CHECK-NEXT:       PointerToLineNumbers:      0x0
+; CHECK-NEXT:       RelocationCount:           0
+; CHECK-NEXT:       LineNumberCount:           0
+; CHECK-NEXT:       Characteristics [ (0x60300020)
 ; CHECK-NEXT:         IMAGE_SCN_ALIGN_4BYTES
+; CHECK-NEXT:         IMAGE_SCN_CNT_CODE
 ; CHECK-NEXT:         IMAGE_SCN_MEM_EXECUTE
 ; CHECK-NEXT:         IMAGE_SCN_MEM_READ
-; CHECK-NEXT:       SectionData              =
-; CHECK-NEXT:         C3
+; CHECK-NEXT:       ]
+; CHECK-NEXT:       SectionData (
+; CHECK-NEXT:         0000: C3
+; CHECK-NEXT:       )
diff --git a/test/MC/COFF/relocation-imgrel.s b/test/MC/COFF/relocation-imgrel.s
new file mode 100644
index 000000000000..ccd19ee318b6
--- /dev/null
+++ b/test/MC/COFF/relocation-imgrel.s
@@ -0,0 +1,29 @@
+// COFF Image-relative relocations
+//
+// Test that we produce image-relative relocations (IMAGE_REL_I386_DIR32NB
+// and IMAGE_REL_AMD64_ADDR32NB) when accessing foo@imgrel.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -r | FileCheck --check-prefix=W32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -r | FileCheck --check-prefix=W64 %s
+
+.data
+foo:
+    .long 1
+
+.text
+    mov foo@IMGREL(%ebx, %ecx, 4), %eax
+    mov foo@imgrel(%ebx, %ecx, 4), %eax
+
+// W32:      Relocations [
+// W32-NEXT:   Section (1) .text {
+// W32-NEXT:     0x3 IMAGE_REL_I386_DIR32NB foo
+// W32-NEXT:     0xA IMAGE_REL_I386_DIR32NB foo
+// W32-NEXT:   }
+// W32-NEXT: ]
+
+// W64:      Relocations [
+// W64-NEXT:   Section (1) .text {
+// W64-NEXT:     0x4 IMAGE_REL_AMD64_ADDR32NB foo
+// W64-NEXT:     0xC IMAGE_REL_AMD64_ADDR32NB foo
+// W64-NEXT:   }
+// W64-NEXT: ]
diff --git a/test/MC/COFF/secrel32.s b/test/MC/COFF/secrel32.s
index ce148db9000c..deadfe09e0e4 100644
--- a/test/MC/COFF/secrel32.s
+++ b/test/MC/COFF/secrel32.s
@@ -1,14 +1,10 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s -sr | FileCheck %s
 
 // check that we produce the correct relocation for .secrel32
 
 Lfoo:
 	.secrel32	Lfoo
 
-// CHECK:       Relocations              = [
-// CHECK-NEXT:    0 = {
-// CHECK-NEXT:       VirtualAddress           = 0x0
-// CHECK-NEXT:       SymbolTableIndex         = 0
-// CHECK-NEXT:       Type                     = IMAGE_REL_I386_SECREL (11)
-// CHECK-NEXT:       SymbolName               = .text
-// CHECK-NEXT:     }
+// CHECK:       Relocations [
+// CHECK-NEXT:    0x0 IMAGE_REL_I386_SECREL .text
+// CHECK-NEXT:  ]
diff --git a/test/MC/COFF/seh-section.s b/test/MC/COFF/seh-section.s
index 802cba5e6b31..7f05cc372e07 100644
--- a/test/MC/COFF/seh-section.s
+++ b/test/MC/COFF/seh-section.s
@@ -1,24 +1,26 @@
 // This test ensures that, if the section containing a function has a suffix
 // (e.g. .text$foo), its unwind info section also has a suffix (.xdata$foo).
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s -sd | FileCheck %s
 // XFAIL: *
 
-// CHECK:      Name                 = .xdata$foo
+// CHECK:      Name: .xdata$foo
 // CHECK-NEXT: VirtualSize
 // CHECK-NEXT: VirtualAddress
-// CHECK-NEXT: SizeOfRawData        = 8
+// CHECK-NEXT: RawDataSize: 8
 // CHECK-NEXT: PointerToRawData
 // CHECK-NEXT: PointerToRelocations
 // CHECK-NEXT: PointerToLineNumbers
-// CHECK-NEXT: NumberOfRelocations  = 0
-// CHECK-NEXT: NumberOfLineNumbers  = 0
-// CHECK-NEXT: Charateristics
-// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: LineNumberCount: 0
+// CHECK-NEXT: Characteristics [
 // CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
 // CHECK-NEXT:   IMAGE_SCN_MEM_READ
 // CHECK-NEXT:   IMAGE_SCN_MEM_WRITE
-// CHECK-NEXT: SectionData
-// CHECK-NEXT:   01 05 02 00 05 50 04 02
+// CHECK-NEXT: ]
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 01050200 05500402
+// CHECK-NEXT: )
 
     .section .text$foo,"x"
     .globl foo
diff --git a/test/MC/COFF/seh.s b/test/MC/COFF/seh.s
index 3f728056005b..bef425efb437 100644
--- a/test/MC/COFF/seh.s
+++ b/test/MC/COFF/seh.s
@@ -1,24 +1,105 @@
 // This test checks that the SEH directives emit the correct unwind data.
-// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | coff-dump.py | FileCheck %s
 
-// CHECK:      Name                 = .xdata
-// CHECK-NEXT: VirtualSize
-// CHECK-NEXT: VirtualAddress
-// CHECK-NEXT: SizeOfRawData        = 52
-// CHECK-NEXT: PointerToRawData
-// CHECK-NEXT: PointerToRelocations
-// CHECK-NEXT: PointerToLineNumbers
-// CHECK-NEXT: NumberOfRelocations  = 4
-// CHECK-NEXT: NumberOfLineNumbers  = 0
-// CHECK-NEXT: Charateristics
-// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
-// CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
-// CHECK-NEXT:   IMAGE_SCN_MEM_READ
-// CHECK-NEXT: SectionData
-// CHECK-NEXT:   09 12 08 03 00 03 0F 30 - 0E 88 00 00 09 64 02 00
-// CHECK-NEXT:   04 22 00 1A 00 00 00 00 - 00 00 00 00 21 00 00 00
-// CHECK-NEXT:   00 00 00 00 1B 00 00 00 - 00 00 00 00 01 00 00 00
-// CHECK-NEXT:   00 00 00 00
+// TODO: Expected fail because SET_FPREG has a wrong offset.
+// XFAIL: *
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -u | FileCheck %s
+
+// CHECK:      Sections [
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK:          RelocationCount: 0
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_CODE
+// CHECK-NEXT:       MEM_EXECUTE
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .xdata
+// CHECK:          RawDataSize: 52
+// CHECK:          RelocationCount: 4
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .pdata
+// CHECK:          RelocationCount: 9
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// CHECK:      UnwindInformation [
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: [[CodeSect1:[^ ]+]] [[BeginDisp1:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     EndAddress: [[CodeSect1]] [[EndDisp1:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     UnwindInfoAddress:
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:         ExceptionHandler
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 18
+// CHECK-NEXT:       FrameRegister: RBX
+// CHECK-NEXT:       FrameOffset: 0x0
+// CHECK-NEXT:       UnwindCodeCount: 8
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:         0x12: SET_FPREG reg=RBX, offset=0x0
+// CHECK-NEXT:         0x0F: PUSH_NONVOL reg=RBX
+// CHECK-NEXT:         0x0E: SAVE_XMM128 reg=XMM8, offset=0x0
+// CHECK-NEXT:         0x09: SAVE_NONVOL reg=RSI, offset=0x10
+// CHECK-NEXT:         0x04: ALLOC_SMALL size=24
+// CHECK-NEXT:         0x00: PUSH_MACHFRAME errcode=yes
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       Handler: __C_specific_handler
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: [[CodeSect2:[^ ]+]] [[BeginDisp2:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     EndAddress: [[CodeSect2]] [[BeginDisp2:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     UnwindInfoAddress:
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:         ChainInfo
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 0
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       Chained {
+// CHECK-NEXT:         StartAddress: [[CodeSect1]] [[BeginDisp1]]
+// CHECK-NEXT:         EndAddress: [[CodeSect1]] [[EndDisp1]]
+// CHECK-NEXT:         UnwindInfoAddress:
+// CHECK-NEXT:       }
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: [[CodeSect3:[^ ]+]] [[BeginDisp3:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     EndAddress: [[CodeSect3]] [[BeginDisp3:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     UnwindInfoAddress:
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 0
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
     .text
     .globl func
diff --git a/test/MC/COFF/simple-fixups.s b/test/MC/COFF/simple-fixups.s
index 4c9b4d44528f..2a74f21f12d0 100644
--- a/test/MC/COFF/simple-fixups.s
+++ b/test/MC/COFF/simple-fixups.s
@@ -1,8 +1,8 @@
 // The purpose of this test is to verify that we do not produce unneeded
 // relocations when symbols are in the same section and we know their offset.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
 
 	.def	 _foo;
 	.scl	2;
@@ -41,10 +41,9 @@ _baz:                                   # @baz
 # BB#0:                                 # %e
 	subl	$4, %esp
 Ltmp0:
-	calll	_baz
+	call	_baz
 	addl	$4, %esp
 	ret
 
-// CHECK:     Sections = [
-// CHECK-NOT: NumberOfRelocations = {{[^0]}}
-// CHECK:     Symbols = [
+// CHECK:     Sections [
+// CHECK-NOT: RelocationCount: {{[^0]}}
diff --git a/test/MC/COFF/symbol-alias.s b/test/MC/COFF/symbol-alias.s
index 4b1772ce711b..ccada3793ca3 100644
--- a/test/MC/COFF/symbol-alias.s
+++ b/test/MC/COFF/symbol-alias.s
@@ -1,9 +1,9 @@
 // The purpose of this test is to verify that symbol aliases
-// (@foo = alias <type> @bar) generate the correct entries in the symbol table.
+// (@foo:  alias <type> @bar) generate the correct entries in the symbol table.
 // They should be identical except for the name.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -t | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -t | FileCheck %s
 
 	.def	 _foo;
 	.scl	2;
@@ -31,43 +31,43 @@ _bar_alias_alias = _bar_alias
 	.globl	_bar_alias
 _bar_alias = _bar
 
-// CHECK:      Name               = {{_?}}foo
-// CHECK-NEXT: Value              = [[FOO_VALUE:.*$]]
-// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER:.*$]]
-// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE:.*$]]
-// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE:.*$]]
-// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS:.*$]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS:.*$]]
+// CHECK:      Name:                {{_?}}foo
+// CHECK-NEXT: Value:               [[FOO_VALUE:.*$]]
+// CHECK-NEXT: Section:             [[FOO_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: BaseType:            [[FOO_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType:         [[FOO_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass:        [[FOO_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: AuxSymbolCount:      [[FOO_NUMBER_OF_AUX_SYMBOLS:.*$]]
 
-// CHECK:      Name               = {{_?}}bar
-// CHECK-NEXT: Value              = [[BAR_VALUE:.*$]]
-// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER:.*$]]
-// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE:.*$]]
-// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE:.*$]]
-// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS:.*$]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS:.*$]]
+// CHECK:      Name:                {{_?}}bar
+// CHECK-NEXT: Value:               [[BAR_VALUE:.*$]]
+// CHECK-NEXT: Section:             [[BAR_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: BaseType:            [[BAR_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType:         [[BAR_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass:        [[BAR_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: AuxSymbolCount:      [[BAR_NUMBER_OF_AUX_SYMBOLS:.*$]]
 
-// CHECK:      Name               = {{_?}}foo_alias
-// CHECK-NEXT: Value              = [[FOO_VALUE]]
-// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER]]
-// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE]]
-// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS]]
+// CHECK:      Name:                {{_?}}foo_alias
+// CHECK-NEXT: Value:               [[FOO_VALUE]]
+// CHECK-NEXT: Section:             [[FOO_SECTION_NUMBER]]
+// CHECK-NEXT: BaseType:            [[FOO_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType:         [[FOO_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass:        [[FOO_STORAGE_CLASS]]
+// CHECK-NEXT: AuxSymbolCount:      [[FOO_NUMBER_OF_AUX_SYMBOLS]]
 
-// CHECK:      Name               = {{_?}}bar_alias_alias
-// CHECK-NEXT: Value              = [[BAR_VALUE]]
-// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
-// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
-// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+// CHECK:      Name:                {{_?}}bar_alias_alias
+// CHECK-NEXT: Value:               [[BAR_VALUE]]
+// CHECK-NEXT: Section:             [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: BaseType:            [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType:         [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass:        [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: AuxSymbolCount:      [[BAR_NUMBER_OF_AUX_SYMBOLS]]
 
-// CHECK:      Name               = {{_?}}bar_alias
-// CHECK-NEXT: Value              = [[BAR_VALUE]]
-// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
-// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
-// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+// CHECK:      Name:                {{_?}}bar_alias
+// CHECK-NEXT: Value:               [[BAR_VALUE]]
+// CHECK-NEXT: Section:             [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: BaseType:            [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType:         [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass:        [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: AuxSymbolCount:      [[BAR_NUMBER_OF_AUX_SYMBOLS]]
 
diff --git a/test/MC/COFF/symbol-fragment-offset-64.s b/test/MC/COFF/symbol-fragment-offset-64.s
new file mode 100644
index 000000000000..b8244709aa75
--- /dev/null
+++ b/test/MC/COFF/symbol-fragment-offset-64.s
@@ -0,0 +1,168 @@
+// The purpose of this test is to see if the COFF object writer is emitting the
+// proper relocations for multiple pieces of data in a single data fragment.
+
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -h -s -sr -sd -t | FileCheck %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$.L_.str0, (%esp)
+	callq	_printf
+	movl	$.L_.str1, (%esp)
+	callq	_puts
+	movl	$.L_.str2, (%esp)
+	callq	_puts
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+.L_.str0:                                # @.str0
+	.asciz	 "Hello "
+
+.L_.str1:                                # @.str1
+	.asciz	 "World!"
+
+	.align	16                      # @.str2
+.L_.str2:
+	.asciz	 "I'm The Last Line."
+
+// CHECK: {
+// CHECK:   Machine:                   IMAGE_FILE_MACHINE_AMD64
+// CHECK:   SectionCount:              2
+// CHECK:   TimeDateStamp:             {{[0-9]+}}
+// CHECK:   PointerToSymbolTable:      0x{{[0-9A-F]+}}
+// CHECK:   SymbolCount:               7
+// CHECK:   OptionalHeaderSize:        0
+// CHECK:   Characteristics [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number:                    1
+// CHECK:     Name:                      .text
+// CHECK:     VirtualSize:               0
+// CHECK:     VirtualAddress:            0
+// CHECK:     RawDataSize:               {{[0-9]+}}
+// CHECK:     PointerToRawData:          0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations:      0x{{[0-9A-F]+}}
+// CHECK:     PointerToLineNumbers:      0x0
+// CHECK:     RelocationCount:           6
+// CHECK:     LineNumberCount:           0
+// CHECK:     Characteristics [ (0x60500020)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_CODE
+// CHECK:       IMAGE_SCN_MEM_EXECUTE
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:       0x7  IMAGE_REL_AMD64_ADDR32 .data
+// CHECK:       0xC  IMAGE_REL_AMD64_REL32 _printf
+// CHECK:       0x14 IMAGE_REL_AMD64_ADDR32 .data
+// CHECK:       0x19 IMAGE_REL_AMD64_REL32 _puts
+// CHECK:       0x21 IMAGE_REL_AMD64_ADDR32 .data
+// CHECK:       0x26 IMAGE_REL_AMD64_REL32 _puts
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 83EC0467 C7042400 000000E8 00000000
+// CHECK:       0010: 67C70424 07000000 E8000000 0067C704
+// CHECK:       0020: 24100000 00E80000 000031C0 83C404C3
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number:                    2
+// CHECK:     Name:                      .data
+// CHECK:     VirtualSize:               0
+// CHECK:     VirtualAddress:            0
+// CHECK:     RawDataSize:               {{[0-9]+}}
+// CHECK:     PointerToRawData:          0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations:      0x0
+// CHECK:     PointerToLineNumbers:      0x0
+// CHECK:     RelocationCount:           0
+// CHECK:     LineNumberCount:           0
+// CHECK:     Characteristics [ (0xC0500040)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:       IMAGE_SCN_MEM_WRITE
+// CHECK:     Relocations [
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 48656C6C 6F200057 6F726C64 21000000 |Hello .World!...|
+// CHECK:       0010: 49276D20 54686520 4C617374 204C696E |I'm The Last Lin|
+// CHECK:       0020: 652E00                              |e..|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name:                      .text
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .text
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              Static
+// CHECK:     AuxSymbolCount:            1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 48
+// CHECK:       RelocationCount: 6
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 1
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      .data
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .data
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              Static
+// CHECK:     AuxSymbolCount:            1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 35
+// CHECK:       RelocationCount: 0
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 2
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _main
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .text
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Function
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _printf
+// CHECK:     Value:                     0
+// CHECK:     Section:                   (0)
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _puts
+// CHECK:     Value:                     0
+// CHECK:     Section:                   (0)
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/symbol-fragment-offset.s b/test/MC/COFF/symbol-fragment-offset.s
index 1df8baacaf22..71b1703972ab 100644
--- a/test/MC/COFF/symbol-fragment-offset.s
+++ b/test/MC/COFF/symbol-fragment-offset.s
@@ -1,8 +1,7 @@
 // The purpose of this test is to see if the COFF object writer is emitting the
 // proper relocations for multiple pieces of data in a single data fragment.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -h -s -sr -sd -t | FileCheck %s
 
 .def	 _main;
 	.scl	2;
@@ -36,152 +35,134 @@ L_.str2:
 	.asciz	 "I'm The Last Line."
 
 // CHECK: {
-// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
-// CHECK:   NumberOfSections         = 2
-// CHECK:   TimeDateStamp            = {{[0-9]+}}
-// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
-// CHECK:   NumberOfSymbols          = 7
-// CHECK:   SizeOfOptionalHeader     = 0
-// CHECK:   Characteristics          = 0x0
-// CHECK:   Sections                 = [
-// CHECK:     1 = {
-// CHECK:       Name                     = .text
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 6
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0x60500020
-// CHECK:         IMAGE_SCN_CNT_CODE
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_EXECUTE
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:       SectionData              =
-// CHECK:         83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
-// CHECK:         04 24 07 00 00 00 E8 00 - 00 00 00 C7 04 24 10 00 |.$...........$..|
-// CHECK:         00 00 E8 00 00 00 00 31 - C0 83 C4 04 C3 |.......1.....|
-// CHECK:       Relocations              = [
-// CHECK:         0 = {
-// CHECK:           VirtualAddress           = 0x6
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         1 = {
-// CHECK:           VirtualAddress           = 0xB
-// CHECK:           SymbolTableIndex         = 5
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _printf
-// CHECK:         }
-// CHECK:         2 = {
-// CHECK:           VirtualAddress           = 0x12
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         3 = {
-// CHECK:           VirtualAddress           = 0x17
-// CHECK:           SymbolTableIndex         = 6
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _puts
-// CHECK:         }
-// CHECK:         4 = {
-// CHECK:           VirtualAddress           = 0x1E
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         5 = {
-// CHECK:           VirtualAddress           = 0x23
-// CHECK:           SymbolTableIndex         = 6
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _puts
-// CHECK:         }
-// CHECK:       ]
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x0
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 0
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0xC0500040
-// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:         IMAGE_SCN_MEM_WRITE
-// CHECK:       SectionData              =
-// CHECK:         48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 00 00 |Hello .World!...|
-// CHECK:         49 27 6D 20 54 68 65 20 - 4C 61 73 74 20 4C 69 6E |I'm The Last Lin|
-// CHECK:         65 2E 00                                          |e..|
-// CHECK:       Relocations              = None
-// CHECK:     }
+// CHECK:   Machine:                   IMAGE_FILE_MACHINE_I386 (0x14C)
+// CHECK:   SectionCount:              2
+// CHECK:   TimeDateStamp:             {{[0-9]+}}
+// CHECK:   PointerToSymbolTable:      0x{{[0-9A-F]+}}
+// CHECK:   SymbolCount:               7
+// CHECK:   OptionalHeaderSize:        0
+// CHECK:   Characteristics [ (0x0)
 // CHECK:   ]
-// CHECK:   Symbols                  = [
-// CHECK:     0 = {
-// CHECK:       Name                     = .text
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         2D 00 00 00 06 00 00 00 - 00 00 00 00 01 00 00 00 |-...............|
-// CHECK:         00 00                                             |..|
-
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 2
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         23 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |#...............|
-// CHECK:         00 00                                             |..|
-
-// CHECK:     }
-// CHECK:     4 = {
-// CHECK:       Name                     = _main
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
-// CHECK:     5 = {
-// CHECK:       Name                     = _printf
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number:                    1
+// CHECK:     Name:                      .text
+// CHECK:     VirtualSize:               0
+// CHECK:     VirtualAddress:            0
+// CHECK:     RawDataSize:               {{[0-9]+}}
+// CHECK:     PointerToRawData:          0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations:      0x{{[0-9A-F]+}}
+// CHECK:     PointerToLineNumbers:      0x0
+// CHECK:     RelocationCount:           6
+// CHECK:     LineNumberCount:           0
+// CHECK:     Characteristics [ (0x60500020)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_CODE
+// CHECK:       IMAGE_SCN_MEM_EXECUTE
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:       0x6  IMAGE_REL_I386_DIR32 .data
+// CHECK:       0xB  IMAGE_REL_I386_REL32 _printf
+// CHECK:       0x12 IMAGE_REL_I386_DIR32 .data
+// CHECK:       0x17 IMAGE_REL_I386_REL32 _puts
+// CHECK:       0x1E IMAGE_REL_I386_DIR32 .data
+// CHECK:       0x23 IMAGE_REL_I386_REL32 _puts
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 83EC04C7 04240000 0000E800 000000C7 |.....$..........|
+// CHECK:       0010: 04240700 0000E800 000000C7 04241000 |.$...........$..|
+// CHECK:       0020: 0000E800 00000031 C083C404 C3       |.......1.....|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number:                    2
+// CHECK:     Name:                      .data
+// CHECK:     VirtualSize:               0
+// CHECK:     VirtualAddress:            0
+// CHECK:     RawDataSize:               {{[0-9]+}}
+// CHECK:     PointerToRawData:          0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations:      0x0
+// CHECK:     PointerToLineNumbers:      0x0
+// CHECK:     RelocationCount:           0
+// CHECK:     LineNumberCount:           0
+// CHECK:     Characteristics [ (0xC0500040)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:       IMAGE_SCN_MEM_WRITE
+// CHECK:     Relocations [
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 48656C6C 6F200057 6F726C64 21000000 |Hello .World!...|
+// CHECK:       0010: 49276D20 54686520 4C617374 204C696E |I'm The Last Lin|
+// CHECK:       0020: 652E00                              |e..|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name:                      .text
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .text
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              Static
+// CHECK:     AuxSymbolCount:            1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 45
+// CHECK:       RelocationCount: 6
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 1
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
 // CHECK:     }
-// CHECK:     6 = {
-// CHECK:       Name                     = _puts
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      .data
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .data
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              Static
+// CHECK:     AuxSymbolCount:            1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 35
+// CHECK:       RelocationCount: 0
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 2
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
 // CHECK:     }
-// CHECK:   ]
-// CHECK: }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _main
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .text
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Function
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _printf
+// CHECK:     Value:                     0
+// CHECK:     Section:                   (0)
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _puts
+// CHECK:     Value:                     0
+// CHECK:     Section:                   (0)
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/weak-symbol-section-specification.ll b/test/MC/COFF/weak-symbol-section-specification.ll
index 5049372959fb..4772c929f29e 100644
--- a/test/MC/COFF/weak-symbol-section-specification.ll
+++ b/test/MC/COFF/weak-symbol-section-specification.ll
@@ -1,23 +1,25 @@
 ; The purpose of this test is to verify that weak linkage type is not ignored by backend,
 ; if section was specialized.
 
-; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 @a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
 
-; CHECK:           Name                     = .data$a
-; CHECK-NEXT:      VirtualSize              = 0
-; CHECK-NEXT:      VirtualAddress           = 0
-; CHECK-NEXT:      SizeOfRawData            = {{[0-9]+}}
-; CHECK-NEXT:      PointerToRawData         = 0x{{[0-9A-F]+}}
-; CHECK-NEXT:      PointerToRelocations     = 0x0
-; CHECK-NEXT:      PointerToLineNumbers     = 0x0
-; CHECK-NEXT:      NumberOfRelocations      = 0
-; CHECK-NEXT:      NumberOfLineNumbers      = 0
-; CHECK-NEXT:      Charateristics           = 0x40401040
+; CHECK:           Name:                      .data$a
+; CHECK-NEXT:      VirtualSize:               0
+; CHECK-NEXT:      VirtualAddress:            0
+; CHECK-NEXT:      RawDataSize:               {{[0-9]+}}
+; CHECK-NEXT:      PointerToRawData:          0x{{[0-9A-F]+}}
+; CHECK-NEXT:      PointerToRelocations:      0x0
+; CHECK-NEXT:      PointerToLineNumbers:      0x0
+; CHECK-NEXT:      RelocationCount:           0
+; CHECK-NEXT:      LineNumberCount:           0
+; CHECK-NEXT:      Characteristics [ (0x40401040)
+; CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
 ; CHECK-NEXT:        IMAGE_SCN_CNT_INITIALIZED_DATA
 ; CHECK-NEXT:        IMAGE_SCN_LNK_COMDAT
-; CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
 ; CHECK-NEXT:        IMAGE_SCN_MEM_READ
-; CHECK-NEXT:      SectionData              = 
-; CHECK-NEXT:        00 00 00 00 00 00 00 00 - 00 00 00 00 
+; CHECK-NEXT:      ]
+; CHECK-NEXT:      SectionData (
+; CHECK-NEXT:        0000: 00000000 00000000 00000000
+; CHECK-NEXT:      )
diff --git a/test/MC/COFF/weak.s b/test/MC/COFF/weak.s
index 0f99313c9c03..14f7c6564ab3 100644
--- a/test/MC/COFF/weak.s
+++ b/test/MC/COFF/weak.s
@@ -1,7 +1,8 @@
 // This tests that default-null weak symbols (a GNU extension) are created
 // properly via the .weak directive.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 < %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -t | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -t | FileCheck %s
 
     .def    _main;
     .scl    2;
@@ -17,7 +18,7 @@ _main:                                  # @main
     testl   %eax, %eax
     je      LBB0_2
 # BB#1:                                 # %if.then
-    calll   _test_weak
+    call    _test_weak
     movl    $1, %eax
     addl    $4, %esp
     ret
@@ -28,24 +29,29 @@ LBB0_2:                                 # %return
 
     .weak   _test_weak
 
-// CHECK: Symbols = [
+// CHECK: Symbols [
 
-// CHECK:      Name               = _test_weak
-// CHECK-NEXT: Value              = 0
-// CHECK-NEXT: SectionNumber      = 0
-// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
-// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_WEAK_EXTERNAL (105)
-// CHECK-NEXT: NumberOfAuxSymbols = 1
-// CHECK-NEXT: AuxillaryData      =
-// CHECK-NEXT: 05 00 00 00 02 00 00 00 - 00 00 00 00 00 00 00 00 |................|
-// CHECK-NEXT: 00 00                                             |..|
+// CHECK:      Symbol {
+// CHECK:        Name:           _test_weak
+// CHECK-NEXT:   Value:          0
+// CHECK-NEXT:   Section:        (0)
+// CHECK-NEXT:   BaseType:       Null
+// CHECK-NEXT:   ComplexType:    Null
+// CHECK-NEXT:   StorageClass:   WeakExternal
+// CHECK-NEXT:   AuxSymbolCount: 1
+// CHECK-NEXT:   AuxWeakExternal {
+// CHECK-NEXT:     Linked: .weak._test_weak.default
+// CHECK-NEXT:      Search: Library
+// CHECK-NEXT:      Unused: (00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
 
-// CHECK:      Name               = .weak._test_weak.default
-// CHECK-NEXT: Value              = 0
-// CHECK-NEXT: SectionNumber      = 65535
-// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
-// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK-NEXT: NumberOfAuxSymbols = 0
-// CHECK-NEXT: AuxillaryData      =
+// CHECK:      Symbol {
+// CHECK:        Name:                .weak._test_weak.default
+// CHECK-NEXT:   Value:               0
+// CHECK-NEXT:   Section:             (-1)
+// CHECK-NEXT:   BaseType:            Null
+// CHECK-NEXT:   ComplexType:         Null
+// CHECK-NEXT:   StorageClass:        External
+// CHECK-NEXT:   AuxSymbolCount:      0
+// CHECK-NEXT: }
diff --git a/test/MC/Disassembler/AArch64/gicv3-regs.txt b/test/MC/Disassembler/AArch64/gicv3-regs.txt
new file mode 100644
index 000000000000..4351f6460c75
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/gicv3-regs.txt
@@ -0,0 +1,222 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -disassemble < %s | FileCheck %s
+
+0x8 0xcc 0x38 0xd5
+# CHECK: mrs      x8, icc_iar1_el1
+0x1a 0xc8 0x38 0xd5
+# CHECK: mrs      x26, icc_iar0_el1
+0x42 0xcc 0x38 0xd5
+# CHECK: mrs      x2, icc_hppir1_el1
+0x51 0xc8 0x38 0xd5
+# CHECK: mrs      x17, icc_hppir0_el1
+0x7d 0xcb 0x38 0xd5
+# CHECK: mrs      x29, icc_rpr_el1
+0x24 0xcb 0x3c 0xd5
+# CHECK: mrs      x4, ich_vtr_el2
+0x78 0xcb 0x3c 0xd5
+# CHECK: mrs      x24, ich_eisr_el2
+0xa9 0xcb 0x3c 0xd5
+# CHECK: mrs      x9, ich_elsr_el2
+0x78 0xcc 0x38 0xd5
+# CHECK: mrs      x24, icc_bpr1_el1
+0x6e 0xc8 0x38 0xd5
+# CHECK: mrs      x14, icc_bpr0_el1
+0x13 0x46 0x38 0xd5
+# CHECK: mrs      x19, icc_pmr_el1
+0x97 0xcc 0x38 0xd5
+# CHECK: mrs      x23, icc_ctlr_el1
+0x94 0xcc 0x3e 0xd5
+# CHECK: mrs      x20, icc_ctlr_el3
+0xbc 0xcc 0x38 0xd5
+# CHECK: mrs      x28, icc_sre_el1
+0xb9 0xc9 0x3c 0xd5
+# CHECK: mrs      x25, icc_sre_el2
+0xa8 0xcc 0x3e 0xd5
+# CHECK: mrs      x8, icc_sre_el3
+0xd6 0xcc 0x38 0xd5
+# CHECK: mrs      x22, icc_igrpen0_el1
+0xe5 0xcc 0x38 0xd5
+# CHECK: mrs      x5, icc_igrpen1_el1
+0xe7 0xcc 0x3e 0xd5
+# CHECK: mrs      x7, icc_igrpen1_el3
+0x16 0xcd 0x38 0xd5
+# CHECK: mrs      x22, icc_seien_el1
+0x84 0xc8 0x38 0xd5
+# CHECK: mrs      x4, icc_ap0r0_el1
+0xab 0xc8 0x38 0xd5
+# CHECK: mrs      x11, icc_ap0r1_el1
+0xdb 0xc8 0x38 0xd5
+# CHECK: mrs      x27, icc_ap0r2_el1
+0xf5 0xc8 0x38 0xd5
+# CHECK: mrs      x21, icc_ap0r3_el1
+0x2 0xc9 0x38 0xd5
+# CHECK: mrs      x2, icc_ap1r0_el1
+0x35 0xc9 0x38 0xd5
+# CHECK: mrs      x21, icc_ap1r1_el1
+0x4a 0xc9 0x38 0xd5
+# CHECK: mrs      x10, icc_ap1r2_el1
+0x7b 0xc9 0x38 0xd5
+# CHECK: mrs      x27, icc_ap1r3_el1
+0x14 0xc8 0x3c 0xd5
+# CHECK: mrs      x20, ich_ap0r0_el2
+0x35 0xc8 0x3c 0xd5
+# CHECK: mrs      x21, ich_ap0r1_el2
+0x45 0xc8 0x3c 0xd5
+# CHECK: mrs      x5, ich_ap0r2_el2
+0x64 0xc8 0x3c 0xd5
+# CHECK: mrs      x4, ich_ap0r3_el2
+0xf 0xc9 0x3c 0xd5
+# CHECK: mrs      x15, ich_ap1r0_el2
+0x2c 0xc9 0x3c 0xd5
+# CHECK: mrs      x12, ich_ap1r1_el2
+0x5b 0xc9 0x3c 0xd5
+# CHECK: mrs      x27, ich_ap1r2_el2
+0x74 0xc9 0x3c 0xd5
+# CHECK: mrs      x20, ich_ap1r3_el2
+0xa 0xcb 0x3c 0xd5
+# CHECK: mrs      x10, ich_hcr_el2
+0x5b 0xcb 0x3c 0xd5
+# CHECK: mrs      x27, ich_misr_el2
+0xe6 0xcb 0x3c 0xd5
+# CHECK: mrs      x6, ich_vmcr_el2
+0x93 0xc9 0x3c 0xd5
+# CHECK: mrs      x19, ich_vseir_el2
+0x3 0xcc 0x3c 0xd5
+# CHECK: mrs      x3, ich_lr0_el2
+0x21 0xcc 0x3c 0xd5
+# CHECK: mrs      x1, ich_lr1_el2
+0x56 0xcc 0x3c 0xd5
+# CHECK: mrs      x22, ich_lr2_el2
+0x75 0xcc 0x3c 0xd5
+# CHECK: mrs      x21, ich_lr3_el2
+0x86 0xcc 0x3c 0xd5
+# CHECK: mrs      x6, ich_lr4_el2
+0xaa 0xcc 0x3c 0xd5
+# CHECK: mrs      x10, ich_lr5_el2
+0xcb 0xcc 0x3c 0xd5
+# CHECK: mrs      x11, ich_lr6_el2
+0xec 0xcc 0x3c 0xd5
+# CHECK: mrs      x12, ich_lr7_el2
+0x0 0xcd 0x3c 0xd5
+# CHECK: mrs      x0, ich_lr8_el2
+0x35 0xcd 0x3c 0xd5
+# CHECK: mrs      x21, ich_lr9_el2
+0x4d 0xcd 0x3c 0xd5
+# CHECK: mrs      x13, ich_lr10_el2
+0x7a 0xcd 0x3c 0xd5
+# CHECK: mrs      x26, ich_lr11_el2
+0x81 0xcd 0x3c 0xd5
+# CHECK: mrs      x1, ich_lr12_el2
+0xa8 0xcd 0x3c 0xd5
+# CHECK: mrs      x8, ich_lr13_el2
+0xc2 0xcd 0x3c 0xd5
+# CHECK: mrs      x2, ich_lr14_el2
+0xe8 0xcd 0x3c 0xd5
+# CHECK: mrs      x8, ich_lr15_el2
+0x3b 0xcc 0x18 0xd5
+# CHECK: msr      icc_eoir1_el1, x27
+0x25 0xc8 0x18 0xd5
+# CHECK: msr      icc_eoir0_el1, x5
+0x2d 0xcb 0x18 0xd5
+# CHECK: msr      icc_dir_el1, x13
+0xb5 0xcb 0x18 0xd5
+# CHECK: msr      icc_sgi1r_el1, x21
+0xd9 0xcb 0x18 0xd5
+# CHECK: msr      icc_asgi1r_el1, x25
+0xfc 0xcb 0x18 0xd5
+# CHECK: msr      icc_sgi0r_el1, x28
+0x67 0xcc 0x18 0xd5
+# CHECK: msr      icc_bpr1_el1, x7
+0x69 0xc8 0x18 0xd5
+# CHECK: msr      icc_bpr0_el1, x9
+0x1d 0x46 0x18 0xd5
+# CHECK: msr      icc_pmr_el1, x29
+0x98 0xcc 0x18 0xd5
+# CHECK: msr      icc_ctlr_el1, x24
+0x80 0xcc 0x1e 0xd5
+# CHECK: msr      icc_ctlr_el3, x0
+0xa2 0xcc 0x18 0xd5
+# CHECK: msr      icc_sre_el1, x2
+0xa5 0xc9 0x1c 0xd5
+# CHECK: msr      icc_sre_el2, x5
+0xaa 0xcc 0x1e 0xd5
+# CHECK: msr      icc_sre_el3, x10
+0xd6 0xcc 0x18 0xd5
+# CHECK: msr      icc_igrpen0_el1, x22
+0xeb 0xcc 0x18 0xd5
+# CHECK: msr      icc_igrpen1_el1, x11
+0xe8 0xcc 0x1e 0xd5
+# CHECK: msr      icc_igrpen1_el3, x8
+0x4 0xcd 0x18 0xd5
+# CHECK: msr      icc_seien_el1, x4
+0x9b 0xc8 0x18 0xd5
+# CHECK: msr      icc_ap0r0_el1, x27
+0xa5 0xc8 0x18 0xd5
+# CHECK: msr      icc_ap0r1_el1, x5
+0xd4 0xc8 0x18 0xd5
+# CHECK: msr      icc_ap0r2_el1, x20
+0xe0 0xc8 0x18 0xd5
+# CHECK: msr      icc_ap0r3_el1, x0
+0x2 0xc9 0x18 0xd5
+# CHECK: msr      icc_ap1r0_el1, x2
+0x3d 0xc9 0x18 0xd5
+# CHECK: msr      icc_ap1r1_el1, x29
+0x57 0xc9 0x18 0xd5
+# CHECK: msr      icc_ap1r2_el1, x23
+0x6b 0xc9 0x18 0xd5
+# CHECK: msr      icc_ap1r3_el1, x11
+0x2 0xc8 0x1c 0xd5
+# CHECK: msr      ich_ap0r0_el2, x2
+0x3b 0xc8 0x1c 0xd5
+# CHECK: msr      ich_ap0r1_el2, x27
+0x47 0xc8 0x1c 0xd5
+# CHECK: msr      ich_ap0r2_el2, x7
+0x61 0xc8 0x1c 0xd5
+# CHECK: msr      ich_ap0r3_el2, x1
+0x7 0xc9 0x1c 0xd5
+# CHECK: msr      ich_ap1r0_el2, x7
+0x2c 0xc9 0x1c 0xd5
+# CHECK: msr      ich_ap1r1_el2, x12
+0x4e 0xc9 0x1c 0xd5
+# CHECK: msr      ich_ap1r2_el2, x14
+0x6d 0xc9 0x1c 0xd5
+# CHECK: msr      ich_ap1r3_el2, x13
+0x1 0xcb 0x1c 0xd5
+# CHECK: msr      ich_hcr_el2, x1
+0x4a 0xcb 0x1c 0xd5
+# CHECK: msr      ich_misr_el2, x10
+0xf8 0xcb 0x1c 0xd5
+# CHECK: msr      ich_vmcr_el2, x24
+0x9d 0xc9 0x1c 0xd5
+# CHECK: msr      ich_vseir_el2, x29
+0x1a 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr0_el2, x26
+0x29 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr1_el2, x9
+0x52 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr2_el2, x18
+0x7a 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr3_el2, x26
+0x96 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr4_el2, x22
+0xba 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr5_el2, x26
+0xdb 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr6_el2, x27
+0xe8 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr7_el2, x8
+0x11 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr8_el2, x17
+0x33 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr9_el2, x19
+0x51 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr10_el2, x17
+0x65 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr11_el2, x5
+0x9d 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr12_el2, x29
+0xa2 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr13_el2, x2
+0xcd 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr14_el2, x13
+0xfb 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr15_el2, x27
diff --git a/test/MC/Disassembler/AArch64/trace-regs.txt b/test/MC/Disassembler/AArch64/trace-regs.txt
new file mode 100644
index 000000000000..10c5937f5dea
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/trace-regs.txt
@@ -0,0 +1,736 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -disassemble < %s | FileCheck %s
+
+0x8 0x3 0x31 0xd5
+# CHECK: mrs      x8, trcstatr
+0xc9 0x0 0x31 0xd5
+# CHECK: mrs      x9, trcidr8
+0xcb 0x1 0x31 0xd5
+# CHECK: mrs      x11, trcidr9
+0xd9 0x2 0x31 0xd5
+# CHECK: mrs      x25, trcidr10
+0xc7 0x3 0x31 0xd5
+# CHECK: mrs      x7, trcidr11
+0xc7 0x4 0x31 0xd5
+# CHECK: mrs      x7, trcidr12
+0xc6 0x5 0x31 0xd5
+# CHECK: mrs      x6, trcidr13
+0xfb 0x8 0x31 0xd5
+# CHECK: mrs      x27, trcidr0
+0xfd 0x9 0x31 0xd5
+# CHECK: mrs      x29, trcidr1
+0xe4 0xa 0x31 0xd5
+# CHECK: mrs      x4, trcidr2
+0xe8 0xb 0x31 0xd5
+# CHECK: mrs      x8, trcidr3
+0xef 0xc 0x31 0xd5
+# CHECK: mrs      x15, trcidr4
+0xf4 0xd 0x31 0xd5
+# CHECK: mrs      x20, trcidr5
+0xe6 0xe 0x31 0xd5
+# CHECK: mrs      x6, trcidr6
+0xe6 0xf 0x31 0xd5
+# CHECK: mrs      x6, trcidr7
+0x98 0x11 0x31 0xd5
+# CHECK: mrs      x24, trcoslsr
+0x92 0x15 0x31 0xd5
+# CHECK: mrs      x18, trcpdsr
+0xdc 0x7a 0x31 0xd5
+# CHECK: mrs      x28, trcdevaff0
+0xc5 0x7b 0x31 0xd5
+# CHECK: mrs      x5, trcdevaff1
+0xc5 0x7d 0x31 0xd5
+# CHECK: mrs      x5, trclsr
+0xcb 0x7e 0x31 0xd5
+# CHECK: mrs      x11, trcauthstatus
+0xcd 0x7f 0x31 0xd5
+# CHECK: mrs      x13, trcdevarch
+0xf2 0x72 0x31 0xd5
+# CHECK: mrs      x18, trcdevid
+0xf6 0x73 0x31 0xd5
+# CHECK: mrs      x22, trcdevtype
+0xee 0x74 0x31 0xd5
+# CHECK: mrs      x14, trcpidr4
+0xe5 0x75 0x31 0xd5
+# CHECK: mrs      x5, trcpidr5
+0xe5 0x76 0x31 0xd5
+# CHECK: mrs      x5, trcpidr6
+0xe9 0x77 0x31 0xd5
+# CHECK: mrs      x9, trcpidr7
+0xef 0x78 0x31 0xd5
+# CHECK: mrs      x15, trcpidr0
+0xe6 0x79 0x31 0xd5
+# CHECK: mrs      x6, trcpidr1
+0xeb 0x7a 0x31 0xd5
+# CHECK: mrs      x11, trcpidr2
+0xf4 0x7b 0x31 0xd5
+# CHECK: mrs      x20, trcpidr3
+0xf1 0x7c 0x31 0xd5
+# CHECK: mrs      x17, trccidr0
+0xe2 0x7d 0x31 0xd5
+# CHECK: mrs      x2, trccidr1
+0xf4 0x7e 0x31 0xd5
+# CHECK: mrs      x20, trccidr2
+0xe4 0x7f 0x31 0xd5
+# CHECK: mrs      x4, trccidr3
+0xb 0x1 0x31 0xd5
+# CHECK: mrs      x11, trcprgctlr
+0x17 0x2 0x31 0xd5
+# CHECK: mrs      x23, trcprocselr
+0xd 0x4 0x31 0xd5
+# CHECK: mrs      x13, trcconfigr
+0x17 0x6 0x31 0xd5
+# CHECK: mrs      x23, trcauxctlr
+0x9 0x8 0x31 0xd5
+# CHECK: mrs      x9, trceventctl0r
+0x10 0x9 0x31 0xd5
+# CHECK: mrs      x16, trceventctl1r
+0x4 0xb 0x31 0xd5
+# CHECK: mrs      x4, trcstallctlr
+0xe 0xc 0x31 0xd5
+# CHECK: mrs      x14, trctsctlr
+0x18 0xd 0x31 0xd5
+# CHECK: mrs      x24, trcsyncpr
+0x1c 0xe 0x31 0xd5
+# CHECK: mrs      x28, trcccctlr
+0xf 0xf 0x31 0xd5
+# CHECK: mrs      x15, trcbbctlr
+0x21 0x0 0x31 0xd5
+# CHECK: mrs      x1, trctraceidr
+0x34 0x1 0x31 0xd5
+# CHECK: mrs      x20, trcqctlr
+0x42 0x0 0x31 0xd5
+# CHECK: mrs      x2, trcvictlr
+0x4c 0x1 0x31 0xd5
+# CHECK: mrs      x12, trcviiectlr
+0x50 0x2 0x31 0xd5
+# CHECK: mrs      x16, trcvissctlr
+0x48 0x3 0x31 0xd5
+# CHECK: mrs      x8, trcvipcssctlr
+0x5b 0x8 0x31 0xd5
+# CHECK: mrs      x27, trcvdctlr
+0x49 0x9 0x31 0xd5
+# CHECK: mrs      x9, trcvdsacctlr
+0x40 0xa 0x31 0xd5
+# CHECK: mrs      x0, trcvdarcctlr
+0x8d 0x0 0x31 0xd5
+# CHECK: mrs      x13, trcseqevr0
+0x8b 0x1 0x31 0xd5
+# CHECK: mrs      x11, trcseqevr1
+0x9a 0x2 0x31 0xd5
+# CHECK: mrs      x26, trcseqevr2
+0x8e 0x6 0x31 0xd5
+# CHECK: mrs      x14, trcseqrstevr
+0x84 0x7 0x31 0xd5
+# CHECK: mrs      x4, trcseqstr
+0x91 0x8 0x31 0xd5
+# CHECK: mrs      x17, trcextinselr
+0xb5 0x0 0x31 0xd5
+# CHECK: mrs      x21, trccntrldvr0
+0xaa 0x1 0x31 0xd5
+# CHECK: mrs      x10, trccntrldvr1
+0xb4 0x2 0x31 0xd5
+# CHECK: mrs      x20, trccntrldvr2
+0xa5 0x3 0x31 0xd5
+# CHECK: mrs      x5, trccntrldvr3
+0xb1 0x4 0x31 0xd5
+# CHECK: mrs      x17, trccntctlr0
+0xa1 0x5 0x31 0xd5
+# CHECK: mrs      x1, trccntctlr1
+0xb1 0x6 0x31 0xd5
+# CHECK: mrs      x17, trccntctlr2
+0xa6 0x7 0x31 0xd5
+# CHECK: mrs      x6, trccntctlr3
+0xbc 0x8 0x31 0xd5
+# CHECK: mrs      x28, trccntvr0
+0xb7 0x9 0x31 0xd5
+# CHECK: mrs      x23, trccntvr1
+0xa9 0xa 0x31 0xd5
+# CHECK: mrs      x9, trccntvr2
+0xa6 0xb 0x31 0xd5
+# CHECK: mrs      x6, trccntvr3
+0xf8 0x0 0x31 0xd5
+# CHECK: mrs      x24, trcimspec0
+0xf8 0x1 0x31 0xd5
+# CHECK: mrs      x24, trcimspec1
+0xef 0x2 0x31 0xd5
+# CHECK: mrs      x15, trcimspec2
+0xea 0x3 0x31 0xd5
+# CHECK: mrs      x10, trcimspec3
+0xfd 0x4 0x31 0xd5
+# CHECK: mrs      x29, trcimspec4
+0xf2 0x5 0x31 0xd5
+# CHECK: mrs      x18, trcimspec5
+0xfd 0x6 0x31 0xd5
+# CHECK: mrs      x29, trcimspec6
+0xe2 0x7 0x31 0xd5
+# CHECK: mrs      x2, trcimspec7
+0x8 0x12 0x31 0xd5
+# CHECK: mrs      x8, trcrsctlr2
+0x0 0x13 0x31 0xd5
+# CHECK: mrs      x0, trcrsctlr3
+0xc 0x14 0x31 0xd5
+# CHECK: mrs      x12, trcrsctlr4
+0x1a 0x15 0x31 0xd5
+# CHECK: mrs      x26, trcrsctlr5
+0x1d 0x16 0x31 0xd5
+# CHECK: mrs      x29, trcrsctlr6
+0x11 0x17 0x31 0xd5
+# CHECK: mrs      x17, trcrsctlr7
+0x0 0x18 0x31 0xd5
+# CHECK: mrs      x0, trcrsctlr8
+0x1 0x19 0x31 0xd5
+# CHECK: mrs      x1, trcrsctlr9
+0x11 0x1a 0x31 0xd5
+# CHECK: mrs      x17, trcrsctlr10
+0x15 0x1b 0x31 0xd5
+# CHECK: mrs      x21, trcrsctlr11
+0x1 0x1c 0x31 0xd5
+# CHECK: mrs      x1, trcrsctlr12
+0x8 0x1d 0x31 0xd5
+# CHECK: mrs      x8, trcrsctlr13
+0x18 0x1e 0x31 0xd5
+# CHECK: mrs      x24, trcrsctlr14
+0x0 0x1f 0x31 0xd5
+# CHECK: mrs      x0, trcrsctlr15
+0x22 0x10 0x31 0xd5
+# CHECK: mrs      x2, trcrsctlr16
+0x3d 0x11 0x31 0xd5
+# CHECK: mrs      x29, trcrsctlr17
+0x36 0x12 0x31 0xd5
+# CHECK: mrs      x22, trcrsctlr18
+0x26 0x13 0x31 0xd5
+# CHECK: mrs      x6, trcrsctlr19
+0x3a 0x14 0x31 0xd5
+# CHECK: mrs      x26, trcrsctlr20
+0x3a 0x15 0x31 0xd5
+# CHECK: mrs      x26, trcrsctlr21
+0x24 0x16 0x31 0xd5
+# CHECK: mrs      x4, trcrsctlr22
+0x2c 0x17 0x31 0xd5
+# CHECK: mrs      x12, trcrsctlr23
+0x21 0x18 0x31 0xd5
+# CHECK: mrs      x1, trcrsctlr24
+0x20 0x19 0x31 0xd5
+# CHECK: mrs      x0, trcrsctlr25
+0x31 0x1a 0x31 0xd5
+# CHECK: mrs      x17, trcrsctlr26
+0x28 0x1b 0x31 0xd5
+# CHECK: mrs      x8, trcrsctlr27
+0x2a 0x1c 0x31 0xd5
+# CHECK: mrs      x10, trcrsctlr28
+0x39 0x1d 0x31 0xd5
+# CHECK: mrs      x25, trcrsctlr29
+0x2c 0x1e 0x31 0xd5
+# CHECK: mrs      x12, trcrsctlr30
+0x2b 0x1f 0x31 0xd5
+# CHECK: mrs      x11, trcrsctlr31
+0x52 0x10 0x31 0xd5
+# CHECK: mrs      x18, trcssccr0
+0x4c 0x11 0x31 0xd5
+# CHECK: mrs      x12, trcssccr1
+0x43 0x12 0x31 0xd5
+# CHECK: mrs      x3, trcssccr2
+0x42 0x13 0x31 0xd5
+# CHECK: mrs      x2, trcssccr3
+0x55 0x14 0x31 0xd5
+# CHECK: mrs      x21, trcssccr4
+0x4a 0x15 0x31 0xd5
+# CHECK: mrs      x10, trcssccr5
+0x56 0x16 0x31 0xd5
+# CHECK: mrs      x22, trcssccr6
+0x57 0x17 0x31 0xd5
+# CHECK: mrs      x23, trcssccr7
+0x57 0x18 0x31 0xd5
+# CHECK: mrs      x23, trcsscsr0
+0x53 0x19 0x31 0xd5
+# CHECK: mrs      x19, trcsscsr1
+0x59 0x1a 0x31 0xd5
+# CHECK: mrs      x25, trcsscsr2
+0x51 0x1b 0x31 0xd5
+# CHECK: mrs      x17, trcsscsr3
+0x53 0x1c 0x31 0xd5
+# CHECK: mrs      x19, trcsscsr4
+0x4b 0x1d 0x31 0xd5
+# CHECK: mrs      x11, trcsscsr5
+0x45 0x1e 0x31 0xd5
+# CHECK: mrs      x5, trcsscsr6
+0x49 0x1f 0x31 0xd5
+# CHECK: mrs      x9, trcsscsr7
+0x9a 0x14 0x31 0xd5
+# CHECK: mrs      x26, trcpdcr
+0x8 0x20 0x31 0xd5
+# CHECK: mrs      x8, trcacvr0
+0xf 0x22 0x31 0xd5
+# CHECK: mrs      x15, trcacvr1
+0x13 0x24 0x31 0xd5
+# CHECK: mrs      x19, trcacvr2
+0x8 0x26 0x31 0xd5
+# CHECK: mrs      x8, trcacvr3
+0x1c 0x28 0x31 0xd5
+# CHECK: mrs      x28, trcacvr4
+0x3 0x2a 0x31 0xd5
+# CHECK: mrs      x3, trcacvr5
+0x19 0x2c 0x31 0xd5
+# CHECK: mrs      x25, trcacvr6
+0x18 0x2e 0x31 0xd5
+# CHECK: mrs      x24, trcacvr7
+0x26 0x20 0x31 0xd5
+# CHECK: mrs      x6, trcacvr8
+0x23 0x22 0x31 0xd5
+# CHECK: mrs      x3, trcacvr9
+0x38 0x24 0x31 0xd5
+# CHECK: mrs      x24, trcacvr10
+0x23 0x26 0x31 0xd5
+# CHECK: mrs      x3, trcacvr11
+0x2c 0x28 0x31 0xd5
+# CHECK: mrs      x12, trcacvr12
+0x29 0x2a 0x31 0xd5
+# CHECK: mrs      x9, trcacvr13
+0x2e 0x2c 0x31 0xd5
+# CHECK: mrs      x14, trcacvr14
+0x23 0x2e 0x31 0xd5
+# CHECK: mrs      x3, trcacvr15
+0x55 0x20 0x31 0xd5
+# CHECK: mrs      x21, trcacatr0
+0x5a 0x22 0x31 0xd5
+# CHECK: mrs      x26, trcacatr1
+0x48 0x24 0x31 0xd5
+# CHECK: mrs      x8, trcacatr2
+0x56 0x26 0x31 0xd5
+# CHECK: mrs      x22, trcacatr3
+0x46 0x28 0x31 0xd5
+# CHECK: mrs      x6, trcacatr4
+0x5d 0x2a 0x31 0xd5
+# CHECK: mrs      x29, trcacatr5
+0x45 0x2c 0x31 0xd5
+# CHECK: mrs      x5, trcacatr6
+0x52 0x2e 0x31 0xd5
+# CHECK: mrs      x18, trcacatr7
+0x62 0x20 0x31 0xd5
+# CHECK: mrs      x2, trcacatr8
+0x73 0x22 0x31 0xd5
+# CHECK: mrs      x19, trcacatr9
+0x6d 0x24 0x31 0xd5
+# CHECK: mrs      x13, trcacatr10
+0x79 0x26 0x31 0xd5
+# CHECK: mrs      x25, trcacatr11
+0x72 0x28 0x31 0xd5
+# CHECK: mrs      x18, trcacatr12
+0x7d 0x2a 0x31 0xd5
+# CHECK: mrs      x29, trcacatr13
+0x69 0x2c 0x31 0xd5
+# CHECK: mrs      x9, trcacatr14
+0x72 0x2e 0x31 0xd5
+# CHECK: mrs      x18, trcacatr15
+0x9d 0x20 0x31 0xd5
+# CHECK: mrs      x29, trcdvcvr0
+0x8f 0x24 0x31 0xd5
+# CHECK: mrs      x15, trcdvcvr1
+0x8f 0x28 0x31 0xd5
+# CHECK: mrs      x15, trcdvcvr2
+0x8f 0x2c 0x31 0xd5
+# CHECK: mrs      x15, trcdvcvr3
+0xb3 0x20 0x31 0xd5
+# CHECK: mrs      x19, trcdvcvr4
+0xb6 0x24 0x31 0xd5
+# CHECK: mrs      x22, trcdvcvr5
+0xbb 0x28 0x31 0xd5
+# CHECK: mrs      x27, trcdvcvr6
+0xa1 0x2c 0x31 0xd5
+# CHECK: mrs      x1, trcdvcvr7
+0xdd 0x20 0x31 0xd5
+# CHECK: mrs      x29, trcdvcmr0
+0xc9 0x24 0x31 0xd5
+# CHECK: mrs      x9, trcdvcmr1
+0xc1 0x28 0x31 0xd5
+# CHECK: mrs      x1, trcdvcmr2
+0xc2 0x2c 0x31 0xd5
+# CHECK: mrs      x2, trcdvcmr3
+0xe5 0x20 0x31 0xd5
+# CHECK: mrs      x5, trcdvcmr4
+0xf5 0x24 0x31 0xd5
+# CHECK: mrs      x21, trcdvcmr5
+0xe5 0x28 0x31 0xd5
+# CHECK: mrs      x5, trcdvcmr6
+0xe1 0x2c 0x31 0xd5
+# CHECK: mrs      x1, trcdvcmr7
+0x15 0x30 0x31 0xd5
+# CHECK: mrs      x21, trccidcvr0
+0x18 0x32 0x31 0xd5
+# CHECK: mrs      x24, trccidcvr1
+0x18 0x34 0x31 0xd5
+# CHECK: mrs      x24, trccidcvr2
+0xc 0x36 0x31 0xd5
+# CHECK: mrs      x12, trccidcvr3
+0xa 0x38 0x31 0xd5
+# CHECK: mrs      x10, trccidcvr4
+0x9 0x3a 0x31 0xd5
+# CHECK: mrs      x9, trccidcvr5
+0x6 0x3c 0x31 0xd5
+# CHECK: mrs      x6, trccidcvr6
+0x14 0x3e 0x31 0xd5
+# CHECK: mrs      x20, trccidcvr7
+0x34 0x30 0x31 0xd5
+# CHECK: mrs      x20, trcvmidcvr0
+0x34 0x32 0x31 0xd5
+# CHECK: mrs      x20, trcvmidcvr1
+0x3a 0x34 0x31 0xd5
+# CHECK: mrs      x26, trcvmidcvr2
+0x21 0x36 0x31 0xd5
+# CHECK: mrs      x1, trcvmidcvr3
+0x2e 0x38 0x31 0xd5
+# CHECK: mrs      x14, trcvmidcvr4
+0x3b 0x3a 0x31 0xd5
+# CHECK: mrs      x27, trcvmidcvr5
+0x3d 0x3c 0x31 0xd5
+# CHECK: mrs      x29, trcvmidcvr6
+0x31 0x3e 0x31 0xd5
+# CHECK: mrs      x17, trcvmidcvr7
+0x4a 0x30 0x31 0xd5
+# CHECK: mrs      x10, trccidcctlr0
+0x44 0x31 0x31 0xd5
+# CHECK: mrs      x4, trccidcctlr1
+0x49 0x32 0x31 0xd5
+# CHECK: mrs      x9, trcvmidcctlr0
+0x4b 0x33 0x31 0xd5
+# CHECK: mrs      x11, trcvmidcctlr1
+0x96 0x70 0x31 0xd5
+# CHECK: mrs      x22, trcitctrl
+0xd7 0x78 0x31 0xd5
+# CHECK: mrs      x23, trcclaimset
+0xce 0x79 0x31 0xd5
+# CHECK: mrs      x14, trcclaimclr
+0x9c 0x10 0x11 0xd5
+# CHECK: msr      trcoslar, x28
+0xce 0x7c 0x11 0xd5
+# CHECK: msr      trclar, x14
+0xa 0x1 0x11 0xd5
+# CHECK: msr      trcprgctlr, x10
+0x1b 0x2 0x11 0xd5
+# CHECK: msr      trcprocselr, x27
+0x18 0x4 0x11 0xd5
+# CHECK: msr      trcconfigr, x24
+0x8 0x6 0x11 0xd5
+# CHECK: msr      trcauxctlr, x8
+0x10 0x8 0x11 0xd5
+# CHECK: msr      trceventctl0r, x16
+0x1b 0x9 0x11 0xd5
+# CHECK: msr      trceventctl1r, x27
+0x1a 0xb 0x11 0xd5
+# CHECK: msr      trcstallctlr, x26
+0x0 0xc 0x11 0xd5
+# CHECK: msr      trctsctlr, x0
+0xe 0xd 0x11 0xd5
+# CHECK: msr      trcsyncpr, x14
+0x8 0xe 0x11 0xd5
+# CHECK: msr      trcccctlr, x8
+0x6 0xf 0x11 0xd5
+# CHECK: msr      trcbbctlr, x6
+0x37 0x0 0x11 0xd5
+# CHECK: msr      trctraceidr, x23
+0x25 0x1 0x11 0xd5
+# CHECK: msr      trcqctlr, x5
+0x40 0x0 0x11 0xd5
+# CHECK: msr      trcvictlr, x0
+0x40 0x1 0x11 0xd5
+# CHECK: msr      trcviiectlr, x0
+0x41 0x2 0x11 0xd5
+# CHECK: msr      trcvissctlr, x1
+0x40 0x3 0x11 0xd5
+# CHECK: msr      trcvipcssctlr, x0
+0x47 0x8 0x11 0xd5
+# CHECK: msr      trcvdctlr, x7
+0x52 0x9 0x11 0xd5
+# CHECK: msr      trcvdsacctlr, x18
+0x58 0xa 0x11 0xd5
+# CHECK: msr      trcvdarcctlr, x24
+0x9c 0x0 0x11 0xd5
+# CHECK: msr      trcseqevr0, x28
+0x95 0x1 0x11 0xd5
+# CHECK: msr      trcseqevr1, x21
+0x90 0x2 0x11 0xd5
+# CHECK: msr      trcseqevr2, x16
+0x90 0x6 0x11 0xd5
+# CHECK: msr      trcseqrstevr, x16
+0x99 0x7 0x11 0xd5
+# CHECK: msr      trcseqstr, x25
+0x9d 0x8 0x11 0xd5
+# CHECK: msr      trcextinselr, x29
+0xb4 0x0 0x11 0xd5
+# CHECK: msr      trccntrldvr0, x20
+0xb4 0x1 0x11 0xd5
+# CHECK: msr      trccntrldvr1, x20
+0xb6 0x2 0x11 0xd5
+# CHECK: msr      trccntrldvr2, x22
+0xac 0x3 0x11 0xd5
+# CHECK: msr      trccntrldvr3, x12
+0xb4 0x4 0x11 0xd5
+# CHECK: msr      trccntctlr0, x20
+0xa4 0x5 0x11 0xd5
+# CHECK: msr      trccntctlr1, x4
+0xa8 0x6 0x11 0xd5
+# CHECK: msr      trccntctlr2, x8
+0xb0 0x7 0x11 0xd5
+# CHECK: msr      trccntctlr3, x16
+0xa5 0x8 0x11 0xd5
+# CHECK: msr      trccntvr0, x5
+0xbb 0x9 0x11 0xd5
+# CHECK: msr      trccntvr1, x27
+0xb5 0xa 0x11 0xd5
+# CHECK: msr      trccntvr2, x21
+0xa8 0xb 0x11 0xd5
+# CHECK: msr      trccntvr3, x8
+0xe6 0x0 0x11 0xd5
+# CHECK: msr      trcimspec0, x6
+0xfb 0x1 0x11 0xd5
+# CHECK: msr      trcimspec1, x27
+0xf7 0x2 0x11 0xd5
+# CHECK: msr      trcimspec2, x23
+0xef 0x3 0x11 0xd5
+# CHECK: msr      trcimspec3, x15
+0xed 0x4 0x11 0xd5
+# CHECK: msr      trcimspec4, x13
+0xf9 0x5 0x11 0xd5
+# CHECK: msr      trcimspec5, x25
+0xf3 0x6 0x11 0xd5
+# CHECK: msr      trcimspec6, x19
+0xfb 0x7 0x11 0xd5
+# CHECK: msr      trcimspec7, x27
+0x4 0x12 0x11 0xd5
+# CHECK: msr      trcrsctlr2, x4
+0x0 0x13 0x11 0xd5
+# CHECK: msr      trcrsctlr3, x0
+0x15 0x14 0x11 0xd5
+# CHECK: msr      trcrsctlr4, x21
+0x8 0x15 0x11 0xd5
+# CHECK: msr      trcrsctlr5, x8
+0x14 0x16 0x11 0xd5
+# CHECK: msr      trcrsctlr6, x20
+0xb 0x17 0x11 0xd5
+# CHECK: msr      trcrsctlr7, x11
+0x12 0x18 0x11 0xd5
+# CHECK: msr      trcrsctlr8, x18
+0x18 0x19 0x11 0xd5
+# CHECK: msr      trcrsctlr9, x24
+0xf 0x1a 0x11 0xd5
+# CHECK: msr      trcrsctlr10, x15
+0x15 0x1b 0x11 0xd5
+# CHECK: msr      trcrsctlr11, x21
+0x4 0x1c 0x11 0xd5
+# CHECK: msr      trcrsctlr12, x4
+0x1c 0x1d 0x11 0xd5
+# CHECK: msr      trcrsctlr13, x28
+0x3 0x1e 0x11 0xd5
+# CHECK: msr      trcrsctlr14, x3
+0x14 0x1f 0x11 0xd5
+# CHECK: msr      trcrsctlr15, x20
+0x2c 0x10 0x11 0xd5
+# CHECK: msr      trcrsctlr16, x12
+0x31 0x11 0x11 0xd5
+# CHECK: msr      trcrsctlr17, x17
+0x2a 0x12 0x11 0xd5
+# CHECK: msr      trcrsctlr18, x10
+0x2b 0x13 0x11 0xd5
+# CHECK: msr      trcrsctlr19, x11
+0x23 0x14 0x11 0xd5
+# CHECK: msr      trcrsctlr20, x3
+0x32 0x15 0x11 0xd5
+# CHECK: msr      trcrsctlr21, x18
+0x3a 0x16 0x11 0xd5
+# CHECK: msr      trcrsctlr22, x26
+0x25 0x17 0x11 0xd5
+# CHECK: msr      trcrsctlr23, x5
+0x39 0x18 0x11 0xd5
+# CHECK: msr      trcrsctlr24, x25
+0x25 0x19 0x11 0xd5
+# CHECK: msr      trcrsctlr25, x5
+0x24 0x1a 0x11 0xd5
+# CHECK: msr      trcrsctlr26, x4
+0x34 0x1b 0x11 0xd5
+# CHECK: msr      trcrsctlr27, x20
+0x25 0x1c 0x11 0xd5
+# CHECK: msr      trcrsctlr28, x5
+0x2a 0x1d 0x11 0xd5
+# CHECK: msr      trcrsctlr29, x10
+0x38 0x1e 0x11 0xd5
+# CHECK: msr      trcrsctlr30, x24
+0x34 0x1f 0x11 0xd5
+# CHECK: msr      trcrsctlr31, x20
+0x57 0x10 0x11 0xd5
+# CHECK: msr      trcssccr0, x23
+0x5b 0x11 0x11 0xd5
+# CHECK: msr      trcssccr1, x27
+0x5b 0x12 0x11 0xd5
+# CHECK: msr      trcssccr2, x27
+0x46 0x13 0x11 0xd5
+# CHECK: msr      trcssccr3, x6
+0x43 0x14 0x11 0xd5
+# CHECK: msr      trcssccr4, x3
+0x4c 0x15 0x11 0xd5
+# CHECK: msr      trcssccr5, x12
+0x47 0x16 0x11 0xd5
+# CHECK: msr      trcssccr6, x7
+0x46 0x17 0x11 0xd5
+# CHECK: msr      trcssccr7, x6
+0x54 0x18 0x11 0xd5
+# CHECK: msr      trcsscsr0, x20
+0x51 0x19 0x11 0xd5
+# CHECK: msr      trcsscsr1, x17
+0x4b 0x1a 0x11 0xd5
+# CHECK: msr      trcsscsr2, x11
+0x44 0x1b 0x11 0xd5
+# CHECK: msr      trcsscsr3, x4
+0x4e 0x1c 0x11 0xd5
+# CHECK: msr      trcsscsr4, x14
+0x56 0x1d 0x11 0xd5
+# CHECK: msr      trcsscsr5, x22
+0x43 0x1e 0x11 0xd5
+# CHECK: msr      trcsscsr6, x3
+0x4b 0x1f 0x11 0xd5
+# CHECK: msr      trcsscsr7, x11
+0x83 0x14 0x11 0xd5
+# CHECK: msr      trcpdcr, x3
+0x6 0x20 0x11 0xd5
+# CHECK: msr      trcacvr0, x6
+0x14 0x22 0x11 0xd5
+# CHECK: msr      trcacvr1, x20
+0x19 0x24 0x11 0xd5
+# CHECK: msr      trcacvr2, x25
+0x1 0x26 0x11 0xd5
+# CHECK: msr      trcacvr3, x1
+0x1c 0x28 0x11 0xd5
+# CHECK: msr      trcacvr4, x28
+0xf 0x2a 0x11 0xd5
+# CHECK: msr      trcacvr5, x15
+0x19 0x2c 0x11 0xd5
+# CHECK: msr      trcacvr6, x25
+0xc 0x2e 0x11 0xd5
+# CHECK: msr      trcacvr7, x12
+0x25 0x20 0x11 0xd5
+# CHECK: msr      trcacvr8, x5
+0x39 0x22 0x11 0xd5
+# CHECK: msr      trcacvr9, x25
+0x2d 0x24 0x11 0xd5
+# CHECK: msr      trcacvr10, x13
+0x2a 0x26 0x11 0xd5
+# CHECK: msr      trcacvr11, x10
+0x33 0x28 0x11 0xd5
+# CHECK: msr      trcacvr12, x19
+0x2a 0x2a 0x11 0xd5
+# CHECK: msr      trcacvr13, x10
+0x33 0x2c 0x11 0xd5
+# CHECK: msr      trcacvr14, x19
+0x22 0x2e 0x11 0xd5
+# CHECK: msr      trcacvr15, x2
+0x4f 0x20 0x11 0xd5
+# CHECK: msr      trcacatr0, x15
+0x4d 0x22 0x11 0xd5
+# CHECK: msr      trcacatr1, x13
+0x48 0x24 0x11 0xd5
+# CHECK: msr      trcacatr2, x8
+0x41 0x26 0x11 0xd5
+# CHECK: msr      trcacatr3, x1
+0x4b 0x28 0x11 0xd5
+# CHECK: msr      trcacatr4, x11
+0x48 0x2a 0x11 0xd5
+# CHECK: msr      trcacatr5, x8
+0x58 0x2c 0x11 0xd5
+# CHECK: msr      trcacatr6, x24
+0x46 0x2e 0x11 0xd5
+# CHECK: msr      trcacatr7, x6
+0x77 0x20 0x11 0xd5
+# CHECK: msr      trcacatr8, x23
+0x65 0x22 0x11 0xd5
+# CHECK: msr      trcacatr9, x5
+0x6b 0x24 0x11 0xd5
+# CHECK: msr      trcacatr10, x11
+0x6b 0x26 0x11 0xd5
+# CHECK: msr      trcacatr11, x11
+0x63 0x28 0x11 0xd5
+# CHECK: msr      trcacatr12, x3
+0x7c 0x2a 0x11 0xd5
+# CHECK: msr      trcacatr13, x28
+0x79 0x2c 0x11 0xd5
+# CHECK: msr      trcacatr14, x25
+0x64 0x2e 0x11 0xd5
+# CHECK: msr      trcacatr15, x4
+0x86 0x20 0x11 0xd5
+# CHECK: msr      trcdvcvr0, x6
+0x83 0x24 0x11 0xd5
+# CHECK: msr      trcdvcvr1, x3
+0x85 0x28 0x11 0xd5
+# CHECK: msr      trcdvcvr2, x5
+0x8b 0x2c 0x11 0xd5
+# CHECK: msr      trcdvcvr3, x11
+0xa9 0x20 0x11 0xd5
+# CHECK: msr      trcdvcvr4, x9
+0xae 0x24 0x11 0xd5
+# CHECK: msr      trcdvcvr5, x14
+0xaa 0x28 0x11 0xd5
+# CHECK: msr      trcdvcvr6, x10
+0xac 0x2c 0x11 0xd5
+# CHECK: msr      trcdvcvr7, x12
+0xc8 0x20 0x11 0xd5
+# CHECK: msr      trcdvcmr0, x8
+0xc8 0x24 0x11 0xd5
+# CHECK: msr      trcdvcmr1, x8
+0xd6 0x28 0x11 0xd5
+# CHECK: msr      trcdvcmr2, x22
+0xd6 0x2c 0x11 0xd5
+# CHECK: msr      trcdvcmr3, x22
+0xe5 0x20 0x11 0xd5
+# CHECK: msr      trcdvcmr4, x5
+0xf0 0x24 0x11 0xd5
+# CHECK: msr      trcdvcmr5, x16
+0xfb 0x28 0x11 0xd5
+# CHECK: msr      trcdvcmr6, x27
+0xf5 0x2c 0x11 0xd5
+# CHECK: msr      trcdvcmr7, x21
+0x8 0x30 0x11 0xd5
+# CHECK: msr      trccidcvr0, x8
+0x6 0x32 0x11 0xd5
+# CHECK: msr      trccidcvr1, x6
+0x9 0x34 0x11 0xd5
+# CHECK: msr      trccidcvr2, x9
+0x8 0x36 0x11 0xd5
+# CHECK: msr      trccidcvr3, x8
+0x3 0x38 0x11 0xd5
+# CHECK: msr      trccidcvr4, x3
+0x15 0x3a 0x11 0xd5
+# CHECK: msr      trccidcvr5, x21
+0xc 0x3c 0x11 0xd5
+# CHECK: msr      trccidcvr6, x12
+0x7 0x3e 0x11 0xd5
+# CHECK: msr      trccidcvr7, x7
+0x24 0x30 0x11 0xd5
+# CHECK: msr      trcvmidcvr0, x4
+0x23 0x32 0x11 0xd5
+# CHECK: msr      trcvmidcvr1, x3
+0x29 0x34 0x11 0xd5
+# CHECK: msr      trcvmidcvr2, x9
+0x31 0x36 0x11 0xd5
+# CHECK: msr      trcvmidcvr3, x17
+0x2e 0x38 0x11 0xd5
+# CHECK: msr      trcvmidcvr4, x14
+0x2c 0x3a 0x11 0xd5
+# CHECK: msr      trcvmidcvr5, x12
+0x2a 0x3c 0x11 0xd5
+# CHECK: msr      trcvmidcvr6, x10
+0x23 0x3e 0x11 0xd5
+# CHECK: msr      trcvmidcvr7, x3
+0x4e 0x30 0x11 0xd5
+# CHECK: msr      trccidcctlr0, x14
+0x56 0x31 0x11 0xd5
+# CHECK: msr      trccidcctlr1, x22
+0x48 0x32 0x11 0xd5
+# CHECK: msr      trcvmidcctlr0, x8
+0x4f 0x33 0x11 0xd5
+# CHECK: msr      trcvmidcctlr1, x15
+0x81 0x70 0x11 0xd5
+# CHECK: msr      trcitctrl, x1
+0xc7 0x78 0x11 0xd5
+# CHECK: msr      trcclaimset, x7
+0xdd 0x79 0x11 0xd5
+# CHECK: msr      trcclaimclr, x29
+
+
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index 0c9aaab38c7e..98daaa7649aa 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -51,24 +51,48 @@
 # CHECKx:	ldclvc	p5, cr15, [r8], #-0
 #0x00 0xf5 0x78 0x7c
 
+# CHECK:        ldc     p13, c9, [r2, #0]!
+0x00 0x9d 0xb2 0xed
+
+# CHECK:        ldcl    p1, c9, [r3, #0]!
+0x00 0x91 0xf3 0xed
+
 # CHECK:	ldr	r0, [r2], #15
 0x0f 0x00 0x92 0xe4
 
 # CHECK:	ldr	r5, [r7, -r10, lsl #2]
 0x0a 0x51 0x17 0xe7
 
+# CHECK:        ldr     r4, [r5, #0]!
+0x00 0x40 0xb5 0xe5
+
+# CHECK:        ldrb    lr, [r10, #0]!
+0x00 0xe0 0xfa 0xe5
+
+# CHECK:	ldrd	r4, r5, [r0, #0]!
+0xd0 0x40 0xe0 0xe1
+
 # CHECK:	ldrh	r0, [r2], #0
 0xb0 0x00 0xd2 0xe0
 
 # CHECK:	ldrh	r0, [r2]
 0xb0 0x00 0xd2 0xe1
 
+# CHECK:	ldrh    lr, [sp, #0]!
+0xb0 0xe0 0xfd 0xe1
+
 # CHECK:	ldrht	r0, [r2], #15
 0xbf 0x00 0xf2 0xe0
 
+# CHECK:        ldrsb   r1, [lr, #0]!
+0xd0 0x10 0xfe 0xe1
+
 # CHECK:	ldrsbtvs	lr, [r2], -r9
 0xd9 0xe0 0x32 0x60
 
+# CHECK:        ldrsh   r9, [r1, #0]
+0xf0 0x90 0xf1 0xe1
+
 # CHECK:	lsls	r0, r2, #31
 0x82 0x0f 0xb0 0xe1
 
@@ -245,9 +269,27 @@
 # CHECK:	stc	p2, c4, [r9], {157}
 0x9d 0x42 0x89 0xec
 
+# CHECK:        stc     p15, c0, [r3, #0]!
+0x00 0x0f 0xa3 0xed
+
 # CHECK:	stc2	p2, c4, [r9], {157}
 0x9d 0x42 0x89 0xfc
 
+# CHECK:	stcl    p13, c12, [r9, #0]!
+0x00 0xcd 0xe9 0xed
+
+# CHECK:        str     pc, [r11, #0]!
+0x00 0xf0 0xab 0xe5
+
+# CHECK:        strb    r9, [r10, #0]!
+0x00 0x90 0xea 0xe5
+
+# CHECK:        strd    r12, sp, [r6, #0]!
+0xf0 0xc0 0xe6 0xe1
+
+# CHECK:        strh    r7, [r9, #0]!
+0xb0 0x70 0xe9 0xe1
+
 # CHECK:	bne #-24
 0xfa 0xff 0xff 0x1a
 
diff --git a/test/MC/Disassembler/ARM/arm-thumb-trustzone.txt b/test/MC/Disassembler/ARM/arm-thumb-trustzone.txt
new file mode 100644
index 000000000000..d6b7cf1a0bb5
--- /dev/null
+++ b/test/MC/Disassembler/ARM/arm-thumb-trustzone.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -disassemble -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -disassemble -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+
+
+#------------------------------------------------------------------------------
+# SMC
+#------------------------------------------------------------------------------
+
+0xff 0xf7 0x00 0x80
+0x0c 0xbf
+0xf0 0xf7 0x00 0x80
+
+# NOTZ-NOT: smc #15
+# NOTZ-NOT: smceq #0
+# TZ: smc #15
+# TZ: ite eq
+# TZ: smceq #0
diff --git a/test/MC/Disassembler/ARM/arm-trustzone.txt b/test/MC/Disassembler/ARM/arm-trustzone.txt
new file mode 100644
index 000000000000..92d5d6b29072
--- /dev/null
+++ b/test/MC/Disassembler/ARM/arm-trustzone.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -disassemble -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+# RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -disassemble -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+
+
+#------------------------------------------------------------------------------
+# SMC
+#------------------------------------------------------------------------------
+
+0x7f 0x00 0x60 0xe1
+0x70 0x00 0x60 0x01
+
+# NOTZ-NOT: smc #15
+# NOTZ-NOT: smceq #0
+# TZ: smc #15
+# TZ: smceq #0
+
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions.txt b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
index 1100ce64a947..9f63e1e914ff 100644
--- a/test/MC/Disassembler/ARM/basic-arm-instructions.txt
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
@@ -707,8 +707,10 @@
 # CHECK: mov r3, #7
 # CHECK: mov r4, #4080
 # CHECK: mov r5, #16711680
+# CHECK: mov sp, #35
 # CHECK: movw r6, #65535
 # CHECK: movw r9, #65535
+# CHECK: movw sp, #1193
 # CHECK: movs r3, #7
 # CHECK: moveq r4, #4080
 # CHECK: movseq r5, #16711680
@@ -716,8 +718,10 @@
 0x07 0x30 0xa0 0xe3
 0xff 0x4e 0xa0 0xe3
 0xff 0x58 0xa0 0xe3
+0x23 0xd0 0xa0 0xe3
 0xff 0x6f 0x0f 0xe3
 0xff 0x9f 0x0f 0xe3
+0xa9 0xd4 0x00 0xe3
 0x07 0x30 0xb0 0xe3
 0xff 0x4e 0xa0 0x03
 0xff 0x58 0xb0 0x03
@@ -740,10 +744,12 @@
 #------------------------------------------------------------------------------
 # CHECK: movt r3, #7
 # CHECK: movt r6, #65535
+# CHECK: movt sp, #3397 
 # CHECK: movteq r4, #4080
 
 0x07 0x30 0x40 0xe3
 0xff 0x6f 0x4f 0xe3
+0x45 0xdd 0x40 0xe3
 0xf0 0x4f 0x40 0x03
 
 
@@ -1441,15 +1447,6 @@
 0xf2 0x4f 0x38 0xe6
 0xf2 0x4f 0x38 0xc6
 
-#------------------------------------------------------------------------------
-# SMC
-#------------------------------------------------------------------------------
-# CHECK: smc #15
-# CHECK: smceq #0
-
-0x7f 0x00 0x60 0xe1
-0x70 0x00 0x60 0x01
-
 #------------------------------------------------------------------------------
 # SMLABB/SMLABT/SMLATB/SMLATT
 #------------------------------------------------------------------------------
@@ -1826,12 +1823,13 @@
 # CHECK: strexh  r4, r2, [r5
 # CHECK: strex  r2, r1, [r7
 # CHECK: strexd  r6, r2, r3, [r8
+# CHECK: strexd  sp, r0, r1, [r0]
 
 0x93 0x1f 0xc4 0xe1
 0x92 0x4f 0xe5 0xe1
 0x91 0x2f 0x87 0xe1
 0x92 0x6f 0xa8 0xe1
-
+0x90 0xdf 0xa0 0xe1
 
 #------------------------------------------------------------------------------
 # SUB
diff --git a/test/MC/Disassembler/ARM/thumb2.txt b/test/MC/Disassembler/ARM/thumb2.txt
index 45dace3b09c5..31f75b39fa9c 100644
--- a/test/MC/Disassembler/ARM/thumb2.txt
+++ b/test/MC/Disassembler/ARM/thumb2.txt
@@ -254,9 +254,12 @@
 #------------------------------------------------------------------------------
 # CHECK: cbnz    r7, #6
 # CHECK: cbnz    r7, #12
+# CHECK: cbz     r4, #64
 
 0x1f 0xb9
 0x37 0xb9
+0x04 0xb3
+
 
 #------------------------------------------------------------------------------
 # CDP/CDP2
@@ -554,6 +557,7 @@
 # CHECK: ldr.w r8, [r8, r2, lsl #2]
 # CHECK: ldr.w r7, [sp, r2, lsl #1]
 # CHECK: ldr.w r7, [sp, r2]
+# CHECK: ldr pc, [sp], #12
 # CHECK: ldr r2, [r4, #255]!
 # CHECK: ldr r8, [sp, #4]!
 # CHECK: ldr lr, [sp, #-4]!
@@ -567,6 +571,7 @@
 0x58 0xf8 0x22 0x80
 0x5d 0xf8 0x12 0x70
 0x5d 0xf8 0x02 0x70
+0x5d 0xf8 0x0c 0xfb
 0x54 0xf8 0xff 0x2f
 0x5d 0xf8 0x04 0x8f
 0x5d 0xf8 0x04 0xed
diff --git a/test/MC/Disassembler/Mips/mips-dsp.txt b/test/MC/Disassembler/Mips/mips-dsp.txt
new file mode 100644
index 000000000000..d10e62cd23cd
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips-dsp.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=mipsel-unknown-linux -mattr=+dsp -disassemble < %s | FileCheck %s
+
+# CHECK: mfhi $21, $ac3
+0x10 0xa8 0x60 0x00
+
+# CHECK: mflo $21, $ac3
+0x12 0xa8 0x60 0x00
+
+# CHECK: mthi $21, $ac3
+0x11 0x18 0xa0 0x02
+
+# CHECK: mtlo $21, $ac3
+0x13 0x18 0xa0 0x02
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
index 70224860bc71..ef8bf71bd3a6 100644
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x05
 
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
index 48fa8e2c7fac..a0885a4bfe85 100644
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ b/test/MC/Disassembler/Mips/mips32_le.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 # CHECK: abs.d $f12, $f14
 0x05 0x73 0x20 0x46
 
diff --git a/test/MC/Disassembler/Mips/mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2.txt
index 3b70db3bc29d..991eaa6cc97f 100644
--- a/test/MC/Disassembler/Mips/mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x05
 
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
index ecfde7a39c4f..10c293821c9d 100644
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips32r2_le.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 # CHECK: abs.d $f12, $f14
 0x05 0x73 0x20 0x46
 
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index 38b137766125..b88747370b67 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
-# CHECK: .section	 __TEXT,__text,regular,pure_instructions
 # CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
index a7ef0e473bbe..ddc3c2b60be4 100644
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ b/test/MC/Disassembler/Mips/mips64_le.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
-# CHECK: .section	 __TEXT,__text,regular,pure_instructions
 # CHECK: daddiu $11, $26, 31949
 0xcd 0x7c 0x4b 0x67
 
diff --git a/test/MC/Disassembler/Mips/mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2.txt
index 0b421fc551e2..cee6f3c21f72 100644
--- a/test/MC/Disassembler/Mips/mips64r2.txt
+++ b/test/MC/Disassembler/Mips/mips64r2.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: .section	 __TEXT,__text,regular,pure_instructions
 # CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
diff --git a/test/MC/Disassembler/Mips/mips64r2_le.txt b/test/MC/Disassembler/Mips/mips64r2_le.txt
index c1d326f6d674..82e4d6ae1ce0 100644
--- a/test/MC/Disassembler/Mips/mips64r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips64r2_le.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: .section	 __TEXT,__text,regular,pure_instructions
 # CHECK: daddiu $11, $26, 31949
 0xcd 0x7c 0x4b 0x67
 
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index 27694cd01446..57e602f1500e 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -110,3 +110,12 @@
 
 # CHECK: vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
 0xc4 0x02 0x39 0x90 0x14 0x4f
+
+# CHECK: xsave64 OPAQUE PTR [RAX]
+0x48 0x0f 0xae 0x20
+
+# CHECK: xrstor64 OPAQUE PTR [RAX]
+0x48 0x0f 0xae 0x28
+
+# CHECK: xsaveopt64 OPAQUE PTR [RAX]
+0x48 0x0f 0xae 0x30
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 5ea40eb91348..9827a1809f1b 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -753,3 +753,18 @@
 # CHECK: lock
 # CHECK-NEXT: xaddq	%rcx, %rbx
 0xf0 0x48 0x0f 0xc1 0xcb
+
+# rdar://13493622 lldb doesn't print the x86 rep/repne prefix when disassembling
+# CHECK: repne
+# CHECK-NEXT: movsd
+0xf2 0xa5
+# CHECK: repne
+# CHECK-NEXT: movsq
+0xf2 0x48 0xa5
+# CHECK: repne
+# CHECK-NEXT: movb  $0, (%rax)
+0xf2 0xc6 0x0 0x0
+# CHECK: rep
+# CHECK-NEXT: lock
+# CHECK-NEXT: incl   (%rax)
+0xf3 0xf0 0xff 0x00
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 5de1d5978433..c285af72b358 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -112,3 +112,18 @@
 
 # CHECK: xabort $13
 0xc6 0xf8 0x0d
+
+# CHECK: xsaveq (%rax)
+0x48 0x0f 0xae 0x20
+
+# CHECK: xrstorq (%rax)
+0x48 0x0f 0xae 0x28
+
+# CHECK: xsaveoptq (%rax)
+0x48 0x0f 0xae 0x30
+
+# CHECK: clac
+0x0f 0x01 0xca
+
+# CHECK: stac
+0x0f 0x01 0xcb
diff --git a/test/MC/Disassembler/XCore/xcore.txt b/test/MC/Disassembler/XCore/xcore.txt
index 8ad75884f40f..d509aff7d386 100644
--- a/test/MC/Disassembler/XCore/xcore.txt
+++ b/test/MC/Disassembler/XCore/xcore.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=xcore-xmos-elf | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 
 # 0r instructions
 
@@ -95,6 +94,9 @@
 # CHECK: bla r6
 0xe6 0x27
 
+# CHECK: bru r8
+0xe8 0x2f
+
 # CHECK: syncr res[r7]
 0xf7 0x87
 
@@ -237,8 +239,8 @@
 # CHECK: mkmsk r4, 24
 0x72 0xa7
 
-# CHECK: outct res[r3], r0
-0xcc 0x4e
+# CHECK: outct res[r3], 0
+0xdc 0x4e
 
 # CHECK: sext r8, 16
 0xb1 0x37
@@ -383,8 +385,8 @@
 # CHECK: ldaw r9, r1[r2]
 0x96 0xf8 0xec 0x1f
 
-# CHECK: ldaw r8, r7[r11]
-0xcf 0xfd 0xec 0x1f
+# CHECK: ldaw r8, r7[-r11]
+0xcf 0xfd 0xec 0x27
 
 # CHECK: mul r0, r4, r2
 0xc2 0xf8 0xec 0x3f
@@ -398,6 +400,9 @@
 # CHECK: st16 r5, r3[r8]
 0xdc 0xfc 0xec 0x87
 
+# CHECK: st8 r9, r1[r3]
+0x97 0xf8 0xec 0x8f
+
 # CHECK: stw r7, r10[r1]
 0xf9 0xf9 0xec 0x07
 
@@ -453,36 +458,72 @@
 # CHECK: ldaw r1, dp[456]
 0x07 0xf0 0x48 0x60
 
+# CHECK: ldaw cp, dp[5]
+0x05 0x63
+
+# CHECK: ldaw sp, dp[9929]
+0x9b 0xf0 0x89 0x63
+
 # CHECK: ldaw r3, sp[2]
 0xc2 0x64
 
 # CHECK: ldaw r8, sp[65535]
 0xff 0xf3 0x3f 0x66
 
+# CHECK: ldaw sp, sp[41]
+0xa9 0x67
+
+# CHECK: ldaw sp, sp[13121]
+0xcd 0xf0 0x81 0x67
+
 # CHECK: ldc r3, 30
 0xde 0x68
 
 # CHECK: ldc r11, 1000
 0x0f 0xf0 0xe8 0x6a
 
+# CHECK: ldc sp, 0
+0x80 0x6b
+
+# CHECK: ldc lr, 81
+0x01 0xf0 0xd1 0x6b
+
 # CHECK: ldw r0, cp[4]
 0x04 0x6c
 
 # CHECK: ldw r1, cp[32345]
 0xf9 0xf1 0x59 0x6c
 
+# CHECK: ldw cp, cp[8]
+0x08 0x6f
+
+# CHECK: ldw sp, cp[10222]
+0x9f 0xf0 0xae 0x6f
+
 # CHECK: ldw r10, dp[16]
 0x90 0x5a
 
 # CHECK: ldw r10, dp[76]
 0x01 0xf0 0x8c 0x5a
 
+# CHECK: ldw lr, dp[8]
+0xc8 0x5b
+
+# CHECK: ldw dp, dp[33221]
+0x07 0xf2 0x45 0x5b
+
 # CHECK: ldw r8, sp[51]
 0x33 0x5e
 
 # CHECK: ldw r8, sp[1225]
 0x13 0xf0 0x09 0x5e
 
+# CHECK: ldw cp, sp[31]
+0x1f 0x5f
+
+# CHECK: ldw sp, sp[1000]
+0x0f 0xf0 0xa8 0x5f
+
 # CHECK: setc res[r5], 36
 0x64 0xe9
 
@@ -495,12 +536,24 @@
 # CHECK: stw r9, dp[654]
 0x0a 0xf0 0x4e 0x52
 
+# CHECK: stw lr, dp[23]
+0xd7 0x53
+
+# CHECK: stw sp, dp[44442]
+0xb6 0xf2 0x9a 0x53
+
 # CHECK: stw r1, sp[32]
 0x60 0x54
 
 # CHECK: stw r0, sp[8761]
 0x88 0xf0 0x39 0x54
 
+# CHECK: stw cp, sp[63]
+0x3f 0x57
+
+# CHECK: stw lr, sp[4391]
+0x44 0xf0 0xe7 0x57
+
 # u6 / lu6 instructions
 
 # CHECK: bu -20
@@ -610,8 +663,8 @@
 # CHECK: ldw r11, cp[132]
 0x84 0xe4
 
-# CHECK: ldw r11, cp[3444]
-0x35 0xf0 0xf4 0x6e
+# CHECK: ldw r11, cp[102741]
+0x64 0xf0 0x55 0xe5
 
 # l6r instructions
 
diff --git a/test/MC/ELF/abs.s b/test/MC/ELF/abs.s
index 48dbe3dffcc5..1836f4005973 100644
--- a/test/MC/ELF/abs.s
+++ b/test/MC/ELF/abs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that zed will be an ABS symbol
 
@@ -6,11 +6,12 @@
 .Lbar:
         zed = .Lfoo - .Lbar
 
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000001) # 'zed'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0xfff1)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK:        Symbol {
+// CHECK:          Name: zed
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/alias-reloc.s b/test/MC/ELF/alias-reloc.s
index f0db81592e78..c25c25932f09 100644
--- a/test/MC/ELF/alias-reloc.s
+++ b/test/MC/ELF/alias-reloc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
 // Test that this produces a R_X86_64_PLT32 with bar.
 
@@ -17,36 +17,30 @@ foo2:
     .set    bar2,foo2
     .quad    bar2
 
-// CHECK:       # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x0000000000000001)
-// CHECK-NEXT:   ('r_sym', 0x00000001)
-// CHECK-NEXT:   ('r_type', 0x00000004)
-// CHECK-NEXT:   ('r_addend', 0xfffffffffffffffc)
-// CHECK-NEXT:  ),
-
-// CHECK:      # Relocation 1
-// CHECK-NEXT: (('r_offset', 0x0000000000000005)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000001)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-
-// CHECK:       # Symbol 1
-// CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-
-// CHECK:      # Symbol 6
-// CHECK-NEXT: (('st_name', 0x0000000e) # 'bar2'
-// CHECK-NEXT:  ('st_bind', 0x2)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0004)
-// CHECK-NEXT:  ('st_value', 0x0000000000000005)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[0-9]+}}) zed {
+// CHECK-NEXT:     0x1 R_X86_64_PLT32 bar 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:     0x5 R_X86_64_64 bar2 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// CHECK:      Symbols [
+// CHECK:        Symbol {
+// CHECK-NEXT:     Name: bar
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+
+// CHECK:        Symbol {
+// CHECK:          Name: bar2
+// CHECK-NEXT:     Value: 0x5
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: zed
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/alias.s b/test/MC/ELF/alias.s
index f38262801353..0575f41fc851 100644
--- a/test/MC/ELF/alias.s
+++ b/test/MC/ELF/alias.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 foo:
 bar = foo
@@ -16,70 +16,78 @@ foo4:
 bar4 = foo4
 
         .long foo2
-// CHECK:       # Symbol 1
-// CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 2
-// CHECK-NEXT: (('st_name', 0x0000001d) # 'bar4'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x2)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Symbol 3
-// CHECK-NEXT:  (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Symbol 4
-// CHECK-NEXT:  (('st_name', 0x0000000e) # 'foo3'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000018) # 'foo4'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x2)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 8
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 9
-// CHECK-NEXT:  (('st_name', 0x00000013) # 'bar3'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK:       # Symbol 10
-// CHECK-NEXT:  (('st_name', 0x00000009) # 'bar2'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+
+// CHECK:      Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar4
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Function
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo3
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo4
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Function
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text (0)
+// CHECK:        }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .data (0)
+// CHECK:        }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .bss (0)
+// CHECK:        }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar3
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar2
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/align-bss.s b/test/MC/ELF/align-bss.s
index a59232b812a4..776eef38467c 100644
--- a/test/MC/ELF/align-bss.s
+++ b/test/MC/ELF/align-bss.s
@@ -1,17 +1,22 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the bss section is correctly aligned
 
 	.local	foo
 	.comm	foo,2048,16
 
-// CHECK:        ('sh_name', 0x00000007) # '.bss'
-// CHECK-NEXT:   ('sh_type', 0x00000008)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000800)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000010)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK:        Section {
+// CHECK:          Name: .bss
+// CHECK-NEXT:     Type: SHT_NOBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 2048
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 16
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/align-nops.s b/test/MC/ELF/align-nops.s
index 3bf96e956a93..5e3386823f26 100644
--- a/test/MC/ELF/align-nops.s
+++ b/test/MC/ELF/align-nops.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 // Test that we get optimal nops in text
     .text
@@ -15,26 +15,40 @@ f0:
     .long 0
     .align  8
 
-// CHECK: (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr',
-// CHECK-NEXT:  ('sh_offset',
-// CHECK-NEXT:  ('sh_size', 0x0000000000000010)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '00000000 0f1f4000 00000000 0f1f4000')
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00000000 0F1F4000 00000000 0F1F4000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-// CHECK: (('sh_name', 0x00000026) # '.data'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT:  ('sh_addr',
-// CHECK-NEXT:  ('sh_offset',
-// CHECK-NEXT:  ('sh_size', 0x0000000000000010)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '00000000 90909090 00000000 00000000')
+// CHECK:        Section {
+// CHECK:          Name: .data
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00000000 90909090 00000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/align-size.s b/test/MC/ELF/align-size.s
index f6282910db07..84a6e9981488 100644
--- a/test/MC/ELF/align-size.s
+++ b/test/MC/ELF/align-size.s
@@ -1,13 +1,18 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the alignment does contribute to the size of the section.
 
 	.zero 4
 	.align	8
 
-// CHECK:      (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000008)
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 8
+// CHECK:        }
diff --git a/test/MC/ELF/align-text.s b/test/MC/ELF/align-text.s
index 2fd3cba50231..b00af4a34b08 100644
--- a/test/MC/ELF/align-text.s
+++ b/test/MC/ELF/align-text.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the .text directive doesn't cause alignment.
 
@@ -6,14 +6,18 @@
         .text
         .zero 1
 
-// CHECK:      (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 2
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/align.s b/test/MC/ELF/align.s
index 3142ffb55351..46be3df62f2d 100644
--- a/test/MC/ELF/align.s
+++ b/test/MC/ELF/align.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the alignment of rodata doesn't force a alignment of the
 // previous section (.bss)
@@ -7,26 +7,33 @@
 	.section	.rodata,"a",@progbits
 	.align	8
 
-// CHECK: # Section 3
-// CHECK-NEXT:  (('sh_name', 0x00000007) # '.bss'
-// CHECK-NEXT:   ('sh_type', 0x00000008)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000044)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000026) # '.rodata'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK:        Section {
+// CHECK:          Name: .bss
+// CHECK-NEXT:     Type: SHT_NOBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 4
+// CHECK-NEXT:     Name: .rodata
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/basic-elf-32.s b/test/MC/ELF/basic-elf-32.s
index 2c6a9841956c..3ddb53981a9f 100644
--- a/test/MC/ELF/basic-elf-32.s
+++ b/test/MC/ELF/basic-elf-32.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -h -s -r -t | FileCheck %s
 
 	.text
 	.globl	main
@@ -30,49 +30,53 @@ main:                                   # @main
 
 	.section	.note.GNU-stack,"",@progbits
 
-// CHECK: ('e_indent[EI_CLASS]', 0x01)
-// CHECK: ('e_indent[EI_DATA]', 0x01)
-// CHECK: ('e_indent[EI_VERSION]', 0x01)
-// CHECK: ('_sections', [
-// CHECK:   # Section 0
-// CHECK:   (('sh_name', 0x00000000) # ''
+// CHECK: ElfHeader {
+// CHECK:   Class: 32-bit
+// CHECK:   DataEncoding: LittleEndian
+// CHECK:   FileVersion: 1
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: (0)
 
-// CHECK:   # '.text'
+// CHECK:     Name: .text
 
-// CHECK:   # '.rel.text'
+// CHECK:     Name: .rel.text
 
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('r_offset', 0x00000006)
-// CHECK:      ('r_type', 0x01)
-// CHECK:     ),
-// CHECK:     # Relocation 1
-// CHECK:     (('r_offset', 0x0000000b)
-// CHECK:      ('r_type', 0x02)
-// CHECK:     ),
-// CHECK:     # Relocation 2
-// CHECK:     (('r_offset', 0x00000012)
-// CHECK:      ('r_type', 0x01)
-// CHECK:     ),
-// CHECK:     # Relocation 3
-// CHECK:     (('r_offset', 0x00000017)
-// CHECK:      ('r_type', 0x02)
-// CHECK:     ),
-// CHECK:   ])
+// CHECK: Relocations [
+// CHECK:   Section (1) .text {
+// CHECK:     0x6  R_386_32   .rodata.str1.1
+// CHECK:     0xB  R_386_PC32 puts
+// CHECK:     0x12 R_386_32   .rodata.str1.1
+// CHECK:     0x17 R_386_PC32 puts
+// CHECK:   }
+// CHECK: ]
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK:   # 'main'
-// CHECK:   ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x2)
+// CHECK:   Symbol {
+// CHECK:     Name: main
+// CHECK:     Binding: Global
+// CHECK:     Type: Function
+// CHECK:   }
 
-// CHECK:   # 'puts'
-// CHECK:   ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x0)
+// CHECK:   Symbol {
+// CHECK:     Name: puts
+// CHECK:     Binding: Global
+// CHECK:     Type: None
+// CHECK:   }
diff --git a/test/MC/ELF/basic-elf-64.s b/test/MC/ELF/basic-elf-64.s
index 38ffaa724acc..f98623ad1e3c 100644
--- a/test/MC/ELF/basic-elf-64.s
+++ b/test/MC/ELF/basic-elf-64.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -h -s -r -t | FileCheck %s
 
         .text
 	.globl	main
@@ -30,53 +30,51 @@ main:                                   # @main
 
 	.section	.note.GNU-stack,"",@progbits
 
-// CHECK: ('e_indent[EI_CLASS]', 0x02)
-// CHECK: ('e_indent[EI_DATA]', 0x01)
-// CHECK: ('e_indent[EI_VERSION]', 0x01)
-// CHECK: ('_sections', [
-// CHECK:   # Section 0
-// CHECK:   (('sh_name', 0x00000000) # ''
+// CHECK: ElfHeader {
+// CHECK:   Class: 64-bit
+// CHECK:   DataEncoding: LittleEndian
+// CHECK:   FileVersion: 1
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: (0)
 
-// CHECK:   # '.text'
+// CHECK:     Name: .text
 
-// CHECK:   # '.rela.text'
+// CHECK:     Name: .rela.text
 
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('r_offset', 0x0000000000000005)
-// CHECK:      ('r_type', 0x0000000a)
-// CHECK:      ('r_addend', 0x0000000000000000)
-// CHECK:     ),
-// CHECK:     # Relocation 1
-// CHECK:     (('r_offset', 0x000000000000000a)
-// CHECK:      ('r_type', 0x00000002)
-// CHECK:      ('r_addend', 0xfffffffffffffffc)
-// CHECK:     ),
-// CHECK:     # Relocation 2
-// CHECK:     (('r_offset', 0x000000000000000f)
-// CHECK:      ('r_type', 0x0000000a)
-// CHECK:      ('r_addend', 0x0000000000000006)
-// CHECK:     ),
-// CHECK:     # Relocation 3
-// CHECK:     (('r_offset', 0x0000000000000014)
-// CHECK:      ('r_type', 0x00000002)
-// CHECK:      ('r_addend', 0xfffffffffffffffc)
-// CHECK:     ),
-// CHECK:   ])
+// CHECK: Relocations [
+// CHECK:   Section (1) .text {
+// CHECK:     0x5  R_X86_64_32   .rodata.str1.1 0x0
+// CHECK:     0xA  R_X86_64_PC32 puts           0xFFFFFFFFFFFFFFFC
+// CHECK:     0xF  R_X86_64_32   .rodata.str1.1 0x6
+// CHECK:     0x14 R_X86_64_PC32 puts           0xFFFFFFFFFFFFFFFC
+// CHECK:   }
+// CHECK: ]
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK:   # 'main'
-// CHECK-NEXT: ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x2)
+// CHECK:   Symbol {
+// CHECK:     Name: main
+// CHECK:     Binding: Global
+// CHECK:     Type: Function
+// CHECK:  }
 
-// CHECK:   # 'puts'
-// CHECK-NEXT: ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x0)
+// CHECK:   Symbol {
+// CHECK:     Name: puts
+// CHECK:     Binding: Global
+// CHECK:     Type: None
+// CHECK:  }
diff --git a/test/MC/ELF/call-abs.s b/test/MC/ELF/call-abs.s
index 795a65993c0d..81265a1b075d 100644
--- a/test/MC/ELF/call-abs.s
+++ b/test/MC/ELF/call-abs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
 
 	.text
 	.globl	f
@@ -15,10 +15,8 @@ f:                                      # @f
 
 	.section	.note.GNU-stack,"",@progbits
 
-// CHECK:      ('_relocations', [
-// CHECK-NEXT:  # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x00000004)
-// CHECK-NEXT:   ('r_sym', 0x000000)
-// CHECK-NEXT:   ('r_type', 0x02)
-// CHECK-NEXT:  ),
-// CHECK-NEXT: ])
+// CHECK:      Relocations [
+// CHECK:        Section ({{[^ ]+}}) {{[^ ]+}} {
+// CHECK-NEXT:     0x4 R_386_PC32 -
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index f0d9c5fe23ee..137b8b6f8490 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -11,36 +11,43 @@ f:
 	ret
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000038)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 1c000000 1c000000 00000000 0a000000 00440e10 410e1444 0e080000 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x00000000000003a0)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 56
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
+// CHECK-NEXT:       0020: 00000000 0A000000 00440E10 410E1444
+// CHECK-NEXT:       0030: 0E080000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x3A0
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index b3c08e0a0ec0..1cad32507cde 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 // test that this produces a correctly encoded cfi_advance_loc2
 
@@ -10,36 +10,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000148)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 01010000 00030001 0e080000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x148
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 01010000 00030001 0E080000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000490)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x490
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index 0ed2be057edd..f1a54a810e0a 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -10,37 +10,43 @@ f:
 	ret
 	.cfi_endproc
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 0a000000 00440e10 450e0800')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 0A000000 00440E10 450E0800
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-// CHECK:       # Section 5
-// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000398)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x398
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index e87b4f67d4b6..b1e74ea098c2 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,35 +7,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410d06 00000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410D06 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index e25bf5c8e3c4..abde0de4fad0 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410c07 08000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410C07 08000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index 3a5af00901cf..a910faba64cc 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00411507 7f000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00411507 7F000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 9acb76c24d5b..f7f95fbc5f2d 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00418602 00000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00418602 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index 37723097030f..f7a07e45d8b5 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:        # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410906 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410906 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 82bbd8d6cef5..35a73efb0f4a 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -14,36 +14,43 @@ f:
         .cfi_rel_offset 6,0
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 24000000 1c000000 00000000 05000000 00410e08 410d0641 11067f41 0e104186 02000000 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x00000000000003a0)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 64
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 24000000 1C000000
+// CHECK-NEXT:       0020: 00000000 05000000 00410E08 410D0641
+// CHECK-NEXT:       0030: 11067F41 0E104186 02000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x3A0
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 7726adbe38f1..5817d1f4af7f 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -6,36 +6,42 @@ f:
         .cfi_rel_offset 6,16
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 01000000 00411106 7f000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 01000000 00411106 7F000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index 1717662c1090..932a1828eb29 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -9,37 +9,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 03000000 00410a41 0b000000')
-// CHECK-NEXT: ),
-
-// CHECK:      # Section 5
-// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 03000000 00410A41 0B000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index 0fc3129c713c..6c25d5b47195 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 0041c600 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 0041C600 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 4c80a0a06660..075c6b9b8435 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410806 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410806 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-sections.s b/test/MC/ELF/cfi-sections.s
index b256bbf4cd44..15a79e5c055e 100644
--- a/test/MC/ELF/cfi-sections.s
+++ b/test/MC/ELF/cfi-sections.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
 
 .cfi_sections .debug_frame
 
@@ -13,26 +13,43 @@ f2:
         nop
         .cfi_endproc
 
-// ELF_64:      (('sh_name', 0x00000011) # '.debug_frame'
-// ELF_64-NEXT:  ('sh_type', 0x00000001)
-// ELF_64-NEXT:  ('sh_flags', 0x0000000000000000)
-// ELF_64-NEXT:  ('sh_addr', 0x0000000000000000)
-// ELF_64-NEXT:  ('sh_offset', 0x0000000000000048)
-// ELF_64-NEXT:  ('sh_size', 0x0000000000000048)
-// ELF_64-NEXT:  ('sh_link', 0x00000000)
-// ELF_64-NEXT:  ('sh_info', 0x00000000)
-// ELF_64-NEXT:  ('sh_addralign', 0x0000000000000008)
-// ELF_64-NEXT:  ('sh_entsize', 0x0000000000000000)
-// ELF_64-NEXT:  ('_section_data', '14000000 ffffffff 01000178 100c0708 90010000 00000000 14000000 00000000 00000000 00000000 01000000 00000000 14000000 00000000 00000000 00000000 01000000 00000000')
+// ELF_64:        Section {
+// ELF_64:          Name: .debug_frame
+// ELF_64-NEXT:     Type: SHT_PROGBITS
+// ELF_64-NEXT:     Flags [
+// ELF_64-NEXT:     ]
+// ELF_64-NEXT:     Address: 0x0
+// ELF_64-NEXT:     Offset: 0x48
+// ELF_64-NEXT:     Size: 72
+// ELF_64-NEXT:     Link: 0
+// ELF_64-NEXT:     Info: 0
+// ELF_64-NEXT:     AddressAlignment: 8
+// ELF_64-NEXT:     EntrySize: 0
+// ELF_64-NEXT:     SectionData (
+// ELF_64-NEXT:       0000: 14000000 FFFFFFFF 01000178 100C0708
+// ELF_64-NEXT:       0010: 90010000 00000000 14000000 00000000
+// ELF_64-NEXT:       0020: 00000000 00000000 01000000 00000000
+// ELF_64-NEXT:       0030: 14000000 00000000 00000000 00000000
+// ELF_64-NEXT:       0040: 01000000 00000000
+// ELF_64-NEXT:     )
+// ELF_64-NEXT:   }
 
-// ELF_32:      (('sh_name', 0x00000010) # '.debug_frame'
-// ELF_32-NEXT:  ('sh_type', 0x00000001)
-// ELF_32-NEXT:  ('sh_flags', 0x00000000)
-// ELF_32-NEXT:  ('sh_addr', 0x00000000)
-// ELF_32-NEXT:  ('sh_offset', 0x00000038)
-// ELF_32-NEXT:  ('sh_size', 0x00000034)
-// ELF_32-NEXT:  ('sh_link', 0x00000000)
-// ELF_32-NEXT:  ('sh_info', 0x00000000)
-// ELF_32-NEXT:  ('sh_addralign', 0x00000004)
-// ELF_32-NEXT:  ('sh_entsize', 0x00000000)
-// ELF_32-NEXT:  ('_section_data', '10000000 ffffffff 0100017c 080c0404 88010000 0c000000 00000000 00000000 01000000 0c000000 00000000 01000000 01000000')
+// ELF_32:        Section {
+// ELF_32:          Name: .debug_frame
+// ELF_32-NEXT:     Type: SHT_PROGBITS
+// ELF_32-NEXT:     Flags [
+// ELF_32-NEXT:     ]
+// ELF_32-NEXT:     Address: 0x0
+// ELF_32-NEXT:     Offset: 0x38
+// ELF_32-NEXT:     Size: 52
+// ELF_32-NEXT:     Link: 0
+// ELF_32-NEXT:     Info: 0
+// ELF_32-NEXT:     AddressAlignment: 4
+// ELF_32-NEXT:     EntrySize: 0
+// ELF_32-NEXT:     SectionData (
+// ELF_32-NEXT:       0000: 10000000 FFFFFFFF 0100017C 080C0404
+// ELF_32-NEXT:       0010: 88010000 0C000000 00000000 00000000
+// ELF_32-NEXT:       0020: 01000000 0C000000 00000000 01000000
+// ELF_32-NEXT:       0030: 01000000
+// ELF_32-NEXT:     )
+// ELF_32-NEXT:   }
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
index cf6d16073abe..023311962189 100644
--- a/test/MC/ELF/cfi-signal-frame.s
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 f:
         .cfi_startproc
@@ -9,15 +9,25 @@ g:
         .cfi_startproc
         .cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000058)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5253 00017810 011b0c07 08900100 10000000 1c000000 00000000 00000000 00000000 14000000 00000000 017a5200 01781001 1b0c0708 90010000 10000000 1c000000 00000000 00000000 00000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 88
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5253 00017810
+// CHECK-NEXT:       0010: 011B0C07 08900100 10000000 1C000000
+// CHECK-NEXT:       0020: 00000000 00000000 00000000 14000000
+// CHECK-NEXT:       0030: 00000000 017A5200 01781001 1B0C0708
+// CHECK-NEXT:       0040: 90010000 10000000 1C000000 00000000
+// CHECK-NEXT:       0050: 00000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index 28049faec285..c83b47c142b4 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -6,36 +6,43 @@ f:
 	.cfi_undefined %rbp
         nop
 	.cfi_endproc
-// CHECK:  # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410706 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410706 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 9e818e694846..4ac0e34f10c3 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 // Test that we don't produce a DW_CFA_advance_loc 0
 
@@ -14,35 +14,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000038)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 1c000000 1c000000 00000000 04000000 00410e10 410a0e08 410b0000 00000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 56
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
+// CHECK-NEXT:       0020: 00000000 04000000 00410E10 410A0E08
+// CHECK-NEXT:       0030: 410B0000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000398)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x398
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index 9320894226fa..98f4fa9c62e6 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f1:
         .cfi_startproc
@@ -212,463 +212,220 @@ f36:
         nop
         .cfi_endproc
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000068)
-// CHECK-NEXT:  ('sh_size', 0x00000000000006c8)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a4c52 00017810 02031b0c 07089001 14000000 1c000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000003 1b0c0708 90010000 14000000 28000000 00000000 01000000 04000000 00000000 14000000 70000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000002 1b0c0708 90010000 10000000 28000000 00000000 01000000 02000000 18000000 00000000 017a5052 00017810 04020000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06030000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a040000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 040a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 060b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a0c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a080000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a100000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04120000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06130000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a140000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 041a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 061b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a1c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a180000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a800000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04820000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06830000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a840000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 048a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 068b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a8c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a880000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a900000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04920000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06930000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a940000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 049a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 069b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a9c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a980000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000')
-// CHECK-NEXT: ),
-
-// CHECK:        # Section 5
-// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000e30)
-// CHECK-NEXT:  ('sh_size', 0x00000000000006c0)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 1
-// CHECK-NEXT:   (('r_offset', 0x0000000000000029)
-// CHECK-NEXT:    ('r_sym', 0x00000028)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 2
-// CHECK-NEXT:   (('r_offset', 0x0000000000000043)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 3
-// CHECK-NEXT:   (('r_offset', 0x000000000000005c)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000001)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 4
-// CHECK-NEXT:   (('r_offset', 0x0000000000000065)
-// CHECK-NEXT:    ('r_sym', 0x00000028)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 5
-// CHECK-NEXT:   (('r_offset', 0x0000000000000074)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000002)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 6
-// CHECK-NEXT:   (('r_offset', 0x000000000000007d)
-// CHECK-NEXT:    ('r_sym', 0x00000028)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 7
-// CHECK-NEXT:   (('r_offset', 0x0000000000000097)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 8
-// CHECK-NEXT:   (('r_offset', 0x00000000000000b0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000003)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 9
-// CHECK-NEXT:   (('r_offset', 0x00000000000000b9)
-// CHECK-NEXT:    ('r_sym', 0x00000028)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 10
-// CHECK-NEXT:   (('r_offset', 0x00000000000000ce)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 11
-// CHECK-NEXT:   (('r_offset', 0x00000000000000e0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000004)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 12
-// CHECK-NEXT:   (('r_offset', 0x00000000000000fe)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 13
-// CHECK-NEXT:   (('r_offset', 0x0000000000000110)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000005)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 14
-// CHECK-NEXT:   (('r_offset', 0x000000000000012e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 15
-// CHECK-NEXT:   (('r_offset', 0x0000000000000144)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000006)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 16
-// CHECK-NEXT:   (('r_offset', 0x0000000000000162)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 17
-// CHECK-NEXT:   (('r_offset', 0x0000000000000174)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000007)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 18
-// CHECK-NEXT:   (('r_offset', 0x0000000000000192)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 19
-// CHECK-NEXT:   (('r_offset', 0x00000000000001a4)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000008)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 20
-// CHECK-NEXT:   (('r_offset', 0x00000000000001c2)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 21
-// CHECK-NEXT:   (('r_offset', 0x00000000000001d8)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000009)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 22
-// CHECK-NEXT:   (('r_offset', 0x00000000000001f6)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 23
-// CHECK-NEXT:   (('r_offset', 0x000000000000020c)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000a)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 24
-// CHECK-NEXT:   (('r_offset', 0x000000000000022a)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 25
-// CHECK-NEXT:   (('r_offset', 0x0000000000000240)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000b)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 26
-// CHECK-NEXT:   (('r_offset', 0x000000000000025e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000d)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 27
-// CHECK-NEXT:   (('r_offset', 0x0000000000000270)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000c)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 28
-// CHECK-NEXT:   (('r_offset', 0x000000000000028e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 29
-// CHECK-NEXT:   (('r_offset', 0x00000000000002a0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000d)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 30
-// CHECK-NEXT:   (('r_offset', 0x00000000000002be)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 31
-// CHECK-NEXT:   (('r_offset', 0x00000000000002d4)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000e)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 32
-// CHECK-NEXT:   (('r_offset', 0x00000000000002f2)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000d)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 33
-// CHECK-NEXT:   (('r_offset', 0x0000000000000304)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000f)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 34
-// CHECK-NEXT:   (('r_offset', 0x0000000000000322)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 35
-// CHECK-NEXT:   (('r_offset', 0x0000000000000334)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000010)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 36
-// CHECK-NEXT:   (('r_offset', 0x0000000000000352)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 37
-// CHECK-NEXT:   (('r_offset', 0x0000000000000368)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000011)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 38
-// CHECK-NEXT:   (('r_offset', 0x0000000000000386)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 39
-// CHECK-NEXT:   (('r_offset', 0x000000000000039c)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000012)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 40
-// CHECK-NEXT:   (('r_offset', 0x00000000000003ba)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 41
-// CHECK-NEXT:   (('r_offset', 0x00000000000003d0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000013)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 42
-// CHECK-NEXT:   (('r_offset', 0x00000000000003ee)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 43
-// CHECK-NEXT:   (('r_offset', 0x0000000000000400)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000014)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 44
-// CHECK-NEXT:   (('r_offset', 0x000000000000041e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 45
-// CHECK-NEXT:   (('r_offset', 0x0000000000000430)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000015)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 46
-// CHECK-NEXT:   (('r_offset', 0x000000000000044e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 47
-// CHECK-NEXT:   (('r_offset', 0x0000000000000464)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000016)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 48
-// CHECK-NEXT:   (('r_offset', 0x0000000000000482)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 49
-// CHECK-NEXT:   (('r_offset', 0x0000000000000494)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000017)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 50
-// CHECK-NEXT:   (('r_offset', 0x00000000000004b2)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 51
-// CHECK-NEXT:   (('r_offset', 0x00000000000004c4)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000018)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 52
-// CHECK-NEXT:   (('r_offset', 0x00000000000004e2)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 53
-// CHECK-NEXT:   (('r_offset', 0x00000000000004f8)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000019)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 54
-// CHECK-NEXT:   (('r_offset', 0x0000000000000516)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 55
-// CHECK-NEXT:   (('r_offset', 0x000000000000052c)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001a)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 56
-// CHECK-NEXT:   (('r_offset', 0x000000000000054a)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 57
-// CHECK-NEXT:   (('r_offset', 0x0000000000000560)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001b)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 58
-// CHECK-NEXT:   (('r_offset', 0x000000000000057e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000d)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 59
-// CHECK-NEXT:   (('r_offset', 0x0000000000000590)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001c)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 60
-// CHECK-NEXT:   (('r_offset', 0x00000000000005ae)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 61
-// CHECK-NEXT:   (('r_offset', 0x00000000000005c0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001d)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 62
-// CHECK-NEXT:   (('r_offset', 0x00000000000005de)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 63
-// CHECK-NEXT:   (('r_offset', 0x00000000000005f4)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001e)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 64
-// CHECK-NEXT:   (('r_offset', 0x0000000000000612)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000d)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 65
-// CHECK-NEXT:   (('r_offset', 0x0000000000000624)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001f)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 66
-// CHECK-NEXT:   (('r_offset', 0x0000000000000642)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 67
-// CHECK-NEXT:   (('r_offset', 0x0000000000000654)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000020)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 68
-// CHECK-NEXT:   (('r_offset', 0x0000000000000672)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 69
-// CHECK-NEXT:   (('r_offset', 0x0000000000000688)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000021)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 70
-// CHECK-NEXT:   (('r_offset', 0x00000000000006a6)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 71
-// CHECK-NEXT:   (('r_offset', 0x00000000000006bc)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000022)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x68
+// CHECK-NEXT:     Size: 1736
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20  R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:       0x29  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x43  R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x5C  R_X86_64_PC32 .text 0x1
+// CHECK-NEXT:       0x65  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x74  R_X86_64_PC32 .text 0x2
+// CHECK-NEXT:       0x7D  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x97  R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0xB0  R_X86_64_PC32 .text 0x3
+// CHECK-NEXT:       0xB9  R_X86_64_16   bar   0x0
+// CHECK-NEXT:       0xCE  R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0xE0  R_X86_64_PC32 .text 0x4
+// CHECK-NEXT:       0xFE  R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x110 R_X86_64_PC32 .text 0x5
+// CHECK-NEXT:       0x12E R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x144 R_X86_64_PC32 .text 0x6
+// CHECK-NEXT:       0x162 R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x174 R_X86_64_PC32 .text 0x7
+// CHECK-NEXT:       0x192 R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x1A4 R_X86_64_PC32 .text 0x8
+// CHECK-NEXT:       0x1C2 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x1D8 R_X86_64_PC32 .text 0x9
+// CHECK-NEXT:       0x1F6 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x20C R_X86_64_PC32 .text 0xA
+// CHECK-NEXT:       0x22A R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x240 R_X86_64_PC32 .text 0xB
+// CHECK-NEXT:       0x25E R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x270 R_X86_64_PC32 .text 0xC
+// CHECK-NEXT:       0x28E R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x2A0 R_X86_64_PC32 .text 0xD
+// CHECK-NEXT:       0x2BE R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x2D4 R_X86_64_PC32 .text 0xE
+// CHECK-NEXT:       0x2F2 R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x304 R_X86_64_PC32 .text 0xF
+// CHECK-NEXT:       0x322 R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x334 R_X86_64_PC32 .text 0x10
+// CHECK-NEXT:       0x352 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x368 R_X86_64_PC32 .text 0x11
+// CHECK-NEXT:       0x386 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x39C R_X86_64_PC32 .text 0x12
+// CHECK-NEXT:       0x3BA R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x3D0 R_X86_64_PC32 .text 0x13
+// CHECK-NEXT:       0x3EE R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x400 R_X86_64_PC32 .text 0x14
+// CHECK-NEXT:       0x41E R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x430 R_X86_64_PC32 .text 0x15
+// CHECK-NEXT:       0x44E R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x464 R_X86_64_PC32 .text 0x16
+// CHECK-NEXT:       0x482 R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x494 R_X86_64_PC32 .text 0x17
+// CHECK-NEXT:       0x4B2 R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x4C4 R_X86_64_PC32 .text 0x18
+// CHECK-NEXT:       0x4E2 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x4F8 R_X86_64_PC32 .text 0x19
+// CHECK-NEXT:       0x516 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x52C R_X86_64_PC32 .text 0x1A
+// CHECK-NEXT:       0x54A R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x560 R_X86_64_PC32 .text 0x1B
+// CHECK-NEXT:       0x57E R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x590 R_X86_64_PC32 .text 0x1C
+// CHECK-NEXT:       0x5AE R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x5C0 R_X86_64_PC32 .text 0x1D
+// CHECK-NEXT:       0x5DE R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x5F4 R_X86_64_PC32 .text 0x1E
+// CHECK-NEXT:       0x612 R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x624 R_X86_64_PC32 .text 0x1F
+// CHECK-NEXT:       0x642 R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x654 R_X86_64_PC32 .text 0x20
+// CHECK-NEXT:       0x672 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x688 R_X86_64_PC32 .text 0x21
+// CHECK-NEXT:       0x6A6 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x6BC R_X86_64_PC32 .text 0x22
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A4C52 00017810
+// CHECK-NEXT:       0010: 02031B0C 07089001 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 01000000 04000000 00000000
+// CHECK-NEXT:       0030: 20000000 00000000 017A504C 52000178
+// CHECK-NEXT:       0040: 100B0000 00000000 00000003 1B0C0708
+// CHECK-NEXT:       0050: 90010000 14000000 28000000 00000000
+// CHECK-NEXT:       0060: 01000000 04000000 00000000 14000000
+// CHECK-NEXT:       0070: 70000000 00000000 01000000 04000000
+// CHECK-NEXT:       0080: 00000000 20000000 00000000 017A504C
+// CHECK-NEXT:       0090: 52000178 100B0000 00000000 00000002
+// CHECK-NEXT:       00A0: 1B0C0708 90010000 10000000 28000000
+// CHECK-NEXT:       00B0: 00000000 01000000 02000000 18000000
+// CHECK-NEXT:       00C0: 00000000 017A5052 00017810 04020000
+// CHECK-NEXT:       00D0: 1B0C0708 90010000 10000000 20000000
+// CHECK-NEXT:       00E0: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       00F0: 00000000 017A5052 00017810 06030000
+// CHECK-NEXT:       0100: 00001B0C 07089001 10000000 20000000
+// CHECK-NEXT:       0110: 00000000 01000000 00000000 1C000000
+// CHECK-NEXT:       0120: 00000000 017A5052 00017810 0A040000
+// CHECK-NEXT:       0130: 00000000 00001B0C 07089001 10000000
+// CHECK-NEXT:       0140: 24000000 00000000 01000000 00000000
+// CHECK-NEXT:       0150: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0160: 040A0000 1B0C0708 90010000 10000000
+// CHECK-NEXT:       0170: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0180: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0190: 060B0000 00001B0C 07089001 10000000
+// CHECK-NEXT:       01A0: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       01B0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       01C0: 0A0C0000 00000000 00001B0C 07089001
+// CHECK-NEXT:       01D0: 10000000 24000000 00000000 01000000
+// CHECK-NEXT:       01E0: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       01F0: 00017810 0A080000 00000000 00001B0C
+// CHECK-NEXT:       0200: 07089001 10000000 24000000 00000000
+// CHECK-NEXT:       0210: 01000000 00000000 1C000000 00000000
+// CHECK-NEXT:       0220: 017A5052 00017810 0A100000 00000000
+// CHECK-NEXT:       0230: 00001B0C 07089001 10000000 24000000
+// CHECK-NEXT:       0240: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       0250: 00000000 017A5052 00017810 04120000
+// CHECK-NEXT:       0260: 1B0C0708 90010000 10000000 20000000
+// CHECK-NEXT:       0270: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       0280: 00000000 017A5052 00017810 06130000
+// CHECK-NEXT:       0290: 00001B0C 07089001 10000000 20000000
+// CHECK-NEXT:       02A0: 00000000 01000000 00000000 1C000000
+// CHECK-NEXT:       02B0: 00000000 017A5052 00017810 0A140000
+// CHECK-NEXT:       02C0: 00000000 00001B0C 07089001 10000000
+// CHECK-NEXT:       02D0: 24000000 00000000 01000000 00000000
+// CHECK-NEXT:       02E0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       02F0: 041A0000 1B0C0708 90010000 10000000
+// CHECK-NEXT:       0300: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0310: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0320: 061B0000 00001B0C 07089001 10000000
+// CHECK-NEXT:       0330: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0340: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0350: 0A1C0000 00000000 00001B0C 07089001
+// CHECK-NEXT:       0360: 10000000 24000000 00000000 01000000
+// CHECK-NEXT:       0370: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0380: 00017810 0A180000 00000000 00001B0C
+// CHECK-NEXT:       0390: 07089001 10000000 24000000 00000000
+// CHECK-NEXT:       03A0: 01000000 00000000 1C000000 00000000
+// CHECK-NEXT:       03B0: 017A5052 00017810 0A800000 00000000
+// CHECK-NEXT:       03C0: 00001B0C 07089001 10000000 24000000
+// CHECK-NEXT:       03D0: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       03E0: 00000000 017A5052 00017810 04820000
+// CHECK-NEXT:       03F0: 1B0C0708 90010000 10000000 20000000
+// CHECK-NEXT:       0400: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       0410: 00000000 017A5052 00017810 06830000
+// CHECK-NEXT:       0420: 00001B0C 07089001 10000000 20000000
+// CHECK-NEXT:       0430: 00000000 01000000 00000000 1C000000
+// CHECK-NEXT:       0440: 00000000 017A5052 00017810 0A840000
+// CHECK-NEXT:       0450: 00000000 00001B0C 07089001 10000000
+// CHECK-NEXT:       0460: 24000000 00000000 01000000 00000000
+// CHECK-NEXT:       0470: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0480: 048A0000 1B0C0708 90010000 10000000
+// CHECK-NEXT:       0490: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       04A0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04B0: 068B0000 00001B0C 07089001 10000000
+// CHECK-NEXT:       04C0: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       04D0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04E0: 0A8C0000 00000000 00001B0C 07089001
+// CHECK-NEXT:       04F0: 10000000 24000000 00000000 01000000
+// CHECK-NEXT:       0500: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0510: 00017810 0A880000 00000000 00001B0C
+// CHECK-NEXT:       0520: 07089001 10000000 24000000 00000000
+// CHECK-NEXT:       0530: 01000000 00000000 1C000000 00000000
+// CHECK-NEXT:       0540: 017A5052 00017810 0A900000 00000000
+// CHECK-NEXT:       0550: 00001B0C 07089001 10000000 24000000
+// CHECK-NEXT:       0560: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       0570: 00000000 017A5052 00017810 04920000
+// CHECK-NEXT:       0580: 1B0C0708 90010000 10000000 20000000
+// CHECK-NEXT:       0590: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       05A0: 00000000 017A5052 00017810 06930000
+// CHECK-NEXT:       05B0: 00001B0C 07089001 10000000 20000000
+// CHECK-NEXT:       05C0: 00000000 01000000 00000000 1C000000
+// CHECK-NEXT:       05D0: 00000000 017A5052 00017810 0A940000
+// CHECK-NEXT:       05E0: 00000000 00001B0C 07089001 10000000
+// CHECK-NEXT:       05F0: 24000000 00000000 01000000 00000000
+// CHECK-NEXT:       0600: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0610: 049A0000 1B0C0708 90010000 10000000
+// CHECK-NEXT:       0620: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0630: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0640: 069B0000 00001B0C 07089001 10000000
+// CHECK-NEXT:       0650: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0660: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0670: 0A9C0000 00000000 00001B0C 07089001
+// CHECK-NEXT:       0680: 10000000 24000000 00000000 01000000
+// CHECK-NEXT:       0690: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       06A0: 00017810 0A980000 00000000 00001B0C
+// CHECK-NEXT:       06B0: 07089001 10000000 24000000 00000000
+// CHECK-NEXT:       06C0: 01000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+
+// CHECK:        Section {
+// CHECK:          Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0xE30
+// CHECK-NEXT:     Size: 1728
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index d7acea6778d4..f9469dfae273 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -1,75 +1,81 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
 
 // Test that we produce the group sections and that they are a the beginning
 // of the file.
 
-// CHECK:       # Section 1
-// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
-// CHECK-NEXT:   ('sh_type', 0x00000011)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x000000000000000c)
-// CHECK-NEXT:   ('sh_link', 0x0000000d)
-// CHECK-NEXT:   ('sh_info', 0x00000001)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000004)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 2
-// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
-// CHECK-NEXT:   ('sh_type', 0x00000011)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x000000000000004c)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_link', 0x0000000d)
-// CHECK-NEXT:   ('sh_info', 0x00000002)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000004)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 3
-// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
-// CHECK-NEXT:   ('sh_type', 0x00000011)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000054)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_link', 0x0000000d)
-// CHECK-NEXT:   ('sh_info', 0x0000000d)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000004)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 1
+// CHECK-NEXT:     Name: .group
+// CHECK-NEXT:     Type: SHT_GROUP
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 12
+// CHECK-NEXT:     Link: 13
+// CHECK-NEXT:     Info: 1
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 4
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 2
+// CHECK-NEXT:     Name: .group
+// CHECK-NEXT:     Type: SHT_GROUP
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x4C
+// CHECK-NEXT:     Size: 8
+// CHECK-NEXT:     Link: 13
+// CHECK-NEXT:     Info: 2
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 4
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 3
+// CHECK-NEXT:     Name: .group
+// CHECK-NEXT:     Type: SHT_GROUP
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x54
+// CHECK-NEXT:     Size: 8
+// CHECK-NEXT:     Link: 13
+// CHECK-NEXT:     Info: 13
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 4
+// CHECK-NEXT:   }
 
 // Test that g1 and g2 are local, but g3 is an undefined global.
 
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000001) # 'g1'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0007)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 2
-// CHECK-NEXT: (('st_name', 0x00000004) # 'g2'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0002)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: g1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .foo (0x7)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: g2 (4)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .group (0x2)
+// CHECK-NEXT:   }
 
-// CHECK:      # Symbol 13
-// CHECK-NEXT: (('st_name', 0x00000007) # 'g3'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: g3 (7)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
 
 
 	.section	.foo,"axG",@progbits,g1,comdat
diff --git a/test/MC/ELF/common.s b/test/MC/ELF/common.s
index 046306e3d37d..4fc2154d850c 100644
--- a/test/MC/ELF/common.s
+++ b/test/MC/ELF/common.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 
 	.text
@@ -8,13 +8,15 @@
 	.local	common1
 	.comm	common1,1,1
 
-// CHECK: ('st_name', 0x00000001) # 'common1'
-// CHECK-NEXT: ('st_bind', 0x0)
-// CHECK-NEXT: ('st_type', 0x1)
-// CHECK-NEXT: ('st_other', 0x00)
-// CHECK-NEXT: ('st_shndx',
-// CHECK-NEXT: ('st_value', 0x0000000000000000)
-// CHECK-NEXT: ('st_size', 0x0000000000000001)
+// CHECK:        Symbol {
+// CHECK:          Name: common1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 1
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section:
+// CHECK-NEXT:   }
 
 
 // Same as common1, but with directives in a different order.
@@ -22,38 +24,44 @@
 	.type	common2,@object
 	.comm	common2,1,1
 
-// CHECK: ('st_name', 0x00000009) # 'common2'
-// CHECK-NEXT: ('st_bind', 0x0)
-// CHECK-NEXT: ('st_type', 0x1)
-// CHECK-NEXT: ('st_other', 0x00)
-// CHECK-NEXT: ('st_shndx',
-// CHECK-NEXT: ('st_value', 0x0000000000000001)
-// CHECK-NEXT: ('st_size', 0x0000000000000001)
+// CHECK:        Symbol {
+// CHECK:          Name: common2 (9)
+// CHECK-NEXT:     Value: 0x1
+// CHECK-NEXT:     Size: 1
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section:
+// CHECK-NEXT:   }
+
 
         .local	common6
         .comm	common6,8,16
 
-// CHECK:      # Symbol 3
-// CHECK-NEXT: (('st_name', 0x00000011) # 'common6'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x1)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0004)
-// CHECK-NEXT:  ('st_value', 0x0000000000000010)
-// CHECK-NEXT:  ('st_size', 0x0000000000000008)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: common6 (17)
+// CHECK-NEXT:     Value: 0x10
+// CHECK-NEXT:     Size: 8
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:   }
+
 
 // Test that without an explicit .local we produce a global.
 	.type	common3,@object
 	.comm	common3,4,4
 
-// CHECK: ('st_name', 0x00000019) # 'common3'
-// CHECK-NEXT: ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x1)
-// CHECK-NEXT: ('st_other', 0x00)
-// CHECK-NEXT: ('st_shndx', 0xfff2)
-// CHECK-NEXT: ('st_value', 0x0000000000000004)
-// CHECK-NEXT: ('st_size', 0x0000000000000004)
+// CHECK:        Symbol {
+// CHECK:          Name: common3 (25)
+// CHECK-NEXT:     Value: 0x4
+// CHECK-NEXT:     Size: 4
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF2)
+// CHECK-NEXT:   }
 
 
 // Test that without an explicit .local we produce a global, even if the first
@@ -67,22 +75,25 @@ foo:
 	.type	common4,@object
 	.comm	common4,40,16
 
-// CHECK: ('st_name', 0x00000025) # 'common4'
-// CHECK-NEXT: ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x1)
-// CHECK-NEXT: ('st_other', 0x00)
-// CHECK-NEXT: ('st_shndx', 0xfff2)
-// CHECK-NEXT: ('st_value', 0x0000000000000010)
-// CHECK-NEXT: ('st_size', 0x0000000000000028)
+// CHECK:        Symbol {
+// CHECK:          Name: common4 (37)
+// CHECK-NEXT:     Value: 0x10
+// CHECK-NEXT:     Size: 40
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF2)
+// CHECK-NEXT:   }
+
 
         .comm	common5,4,4
 
-// CHECK:      # Symbol 9
-// CHECK-NEXT: (('st_name', 0x0000002d) # 'common5'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x1)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0xfff2)
-// CHECK-NEXT:  ('st_value', 0x0000000000000004)
-// CHECK-NEXT:  ('st_size', 0x0000000000000004)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: common5 (45)
+// CHECK-NEXT:     Value: 0x4
+// CHECK-NEXT:     Size: 4
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF2)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/common2.s b/test/MC/ELF/common2.s
index b13577d4a004..526ebc2a95ff 100644
--- a/test/MC/ELF/common2.s
+++ b/test/MC/ELF/common2.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the common symbols are placed at the end of .bss. In this example
 // it causes .bss to have size 9 instead of 8.
@@ -9,13 +9,16 @@
         .zero 1
 	.align	8
 
-// CHECK:      (('sh_name', 0x00000007) # '.bss'
-// CHECK-NEXT:  ('sh_type',
-// CHECK-NEXT:  ('sh_flags'
-// CHECK-NEXT:  ('sh_addr',
-// CHECK-NEXT:  ('sh_offset',
-// CHECK-NEXT:  ('sh_size', 0x0000000000000009)
-// CHECK-NEXT:  ('sh_link',
-// CHECK-NEXT:  ('sh_info',
-// CHECK-NEXT:  ('sh_addralign',
-// CHECK-NEXT:  ('sh_entsize',
+// CHECK:        Section {
+// CHECK:          Name: .bss (7)
+// CHECK-NEXT:     Type:
+// CHECK-NEXT:     Flags [
+// CHECK:          ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 9
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment:
+// CHECK-NEXT:     EntrySize:
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/debug-line.s b/test/MC/ELF/debug-line.s
index fed816afccef..75e050e9da1c 100644
--- a/test/MC/ELF/debug-line.s
+++ b/test/MC/ELF/debug-line.s
@@ -1,18 +1,26 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 // Test that .debug_line is populated.
 
-// CHECK:     (('sh_name', 0x00000011) # '.debug_line'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000044)
-// CHECK-NEXT: ('sh_size', 0x0000000000000037)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ('_section_data', '33000000 02001c00 00000101 fb0e0d00 01010101 00000001 00000100 666f6f2e 63000000 00000009 02000000 00000000 00150204 000101')
+// CHECK:        Section {
+// CHECK:          Name: .debug_line
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Size: 55
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 33000000 02001C00 00000101 FB0E0D00
+// CHECK-NEXT:       0010: 01010101 00000001 00000100 666F6F2E
+// CHECK-NEXT:       0020: 63000000 00000009 02000000 00000000
+// CHECK-NEXT:       0030: 00150204 000101
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
 	.section	.debug_line,"",@progbits
 	.text
diff --git a/test/MC/ELF/debug-loc.s b/test/MC/ELF/debug-loc.s
index 3eb3797f443f..b24fa169deb2 100644
--- a/test/MC/ELF/debug-loc.s
+++ b/test/MC/ELF/debug-loc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that we don't regress on the size of the line info section. We used
 // to handle negative line diffs incorrectly which manifested as very
@@ -7,18 +7,20 @@
 // FIXME: This size is the same as gnu as, but we can probably do a bit better.
 // FIXME2: We need a debug_line dumper so that we can test the actual contents.
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x00000011) # '.debug_line'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000044)
-// CHECK-NEXT:  ('sh_size', 0x000000000000003d)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .debug_line
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Size: 61
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
 	.section	.debug_line,"",@progbits
 	.text
diff --git a/test/MC/ELF/diff.s b/test/MC/ELF/diff.s
index 4214fc7c0368..543651066969 100644
--- a/test/MC/ELF/diff.s
+++ b/test/MC/ELF/diff.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
 
         .global zed
 foo:
@@ -8,8 +8,4 @@ bar:
 zed:
         mov zed+(bar-foo), %eax
 
-// CHECK:       # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x0000000000000005)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend', 0x0000000000000001)
+// CHECK:       0x5 R_X86_64_32S zed 0x1
diff --git a/test/MC/ELF/empty-dwarf-lines.s b/test/MC/ELF/empty-dwarf-lines.s
index 7baedbcfb826..241580bb3b83 100644
--- a/test/MC/ELF/empty-dwarf-lines.s
+++ b/test/MC/ELF/empty-dwarf-lines.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the dwarf debug_line section contains no line directives.
 
@@ -7,15 +7,17 @@
 c:
         .asciz   "hi\n"
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x0000000c) # '.debug_line'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000044)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000027)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .debug_line
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Size: 39
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/empty.s b/test/MC/ELF/empty.s
index b38a621054b4..c421fe844882 100644
--- a/test/MC/ELF/empty.s
+++ b/test/MC/ELF/empty.s
@@ -1,70 +1,89 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that like gnu as we create text, data and bss by default. Also test
 // that shstrtab, symtab and strtab are listed in that order.
 
-// CHECK:      ('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000000)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-
-// CHECK:      ('sh_name', 0x00000026) # '.data'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000000)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-
-// CHECK:      ('sh_name', 0x00000007) # '.bss'
-// CHECK-NEXT: ('sh_type', 0x00000008)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000000)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-
-// CHECK:      ('sh_name', 0x0000000c) # '.shstrtab'
-// CHECK-NEXT: ('sh_type', 0x00000003)
-// CHECK-NEXT:    ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:    ('sh_size', 0x000000000000002c)
-// CHECK-NEXT:    ('sh_link', 0x00000000)
-// CHECK-NEXT:    ('sh_info', 0x00000000)
-// CHECK-NEXT:    ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:    ('sh_entsize', 0x0000000000000000)
-
-// CHECK: ('sh_name', 0x0000001e) # '.symtab'
-// CHECK-NEXT:    ('sh_type', 0x00000002)
-// CHECK-NEXT:    ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_offset',
-// CHECK-NEXT:    ('sh_size', 0x0000000000000060)
-// CHECK-NEXT:    ('sh_link', 0x00000006)
-// CHECK-NEXT:    ('sh_info', 0x00000004)
-// CHECK-NEXT:    ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:    ('sh_entsize', 0x0000000000000018)
-
-// CHECK: ('sh_name', 0x00000016) # '.strtab'
-// CHECK-NEXT:    ('sh_type', 0x00000003)
-// CHECK-NEXT:    ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_offset',
-// CHECK-NEXT:    ('sh_size', 0x0000000000000001)
-// CHECK-NEXT:    ('sh_link', 0x00000000)
-// CHECK-NEXT:    ('sh_info', 0x00000000)
-// CHECK-NEXT:    ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:    ('sh_entsize', 0x0000000000000000)
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .data
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .bss
+// CHECK-NEXT:     Type: SHT_NOBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .shstrtab
+// CHECK-NEXT:     Type: SHT_STRTAB
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 44
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .symtab
+// CHECK-NEXT:     Type: SHT_SYMTAB
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 96
+// CHECK-NEXT:     Link: 6
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .strtab
+// CHECK-NEXT:     Type: SHT_STRTAB
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 1
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/entsize.ll b/test/MC/ELF/entsize.ll
index dce6dbaa2a07..2bf9fa9fb594 100644
--- a/test/MC/ELF/entsize.ll
+++ b/test/MC/ELF/entsize.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj -mtriple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck -check-prefix=64 %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck -check-prefix=64 %s
 
 ; Test that constant mergeable strings have sh_entsize set.
 
@@ -20,25 +20,35 @@ declare void @foo(i64* nocapture) nounwind
 
 ;;;;;
 
-; 64: (('sh_name', 0x0000004e) # '.rodata.str1.1'
-; 64-NEXT:   ('sh_type', 0x00000001)
-; 64-NEXT:   ('sh_flags', 0x0000000000000032)
-; 64-NEXT:   ('sh_addr',
-; 64-NEXT:   ('sh_offset',
-; 64-NEXT:   ('sh_size', 0x000000000000000d)
-; 64-NEXT:   ('sh_link',
-; 64-NEXT:   ('sh_info',
-; 64-NEXT:   ('sh_addralign', 0x0000000000000001)
-; 64-NEXT:   ('sh_entsize', 0x0000000000000001)
-
-; 64: (('sh_name', 0x00000041) # '.rodata.cst8'
-; 64-NEXT:   ('sh_type', 0x00000001)
-; 64-NEXT:   ('sh_flags', 0x0000000000000012)
-; 64-NEXT:   ('sh_addr',
-; 64-NEXT:   ('sh_offset',
-; 64-NEXT:   ('sh_size', 0x0000000000000010)
-; 64-NEXT:   ('sh_link',
-; 64-NEXT:   ('sh_info',
-; 64-NEXT:   ('sh_addralign', 0x0000000000000008)
-; 64-NEXT:   ('sh_entsize', 0x0000000000000008)
-
+; 64:        Section {
+; 64:          Name: .rodata.str1.1
+; 64-NEXT:     Type: SHT_PROGBITS
+; 64-NEXT:     Flags [
+; 64-NEXT:       SHF_ALLOC
+; 64-NEXT:       SHF_MERGE
+; 64-NEXT:       SHF_STRINGS
+; 64-NEXT:     ]
+; 64-NEXT:     Address:
+; 64-NEXT:     Offset:
+; 64-NEXT:     Size: 13
+; 64-NEXT:     Link:
+; 64-NEXT:     Info:
+; 64-NEXT:     AddressAlignment: 1
+; 64-NEXT:     EntrySize: 1
+; 64-NEXT:   }
+
+; 64:        Section {
+; 64:          Name: .rodata.cst8
+; 64-NEXT:     Type: SHT_PROGBITS
+; 64-NEXT:     Flags [
+; 64-NEXT:       SHF_ALLOC
+; 64-NEXT:       SHF_MERGE
+; 64-NEXT:     ]
+; 64-NEXT:     Address:
+; 64-NEXT:     Offset:
+; 64-NEXT:     Size: 16
+; 64-NEXT:     Link:
+; 64-NEXT:     Info:
+; 64-NEXT:     AddressAlignment: 8
+; 64-NEXT:     EntrySize: 8
+; 64-NEXT:   }
diff --git a/test/MC/ELF/entsize.s b/test/MC/ELF/entsize.s
index 4645686b6d4e..8e084e24b255 100644
--- a/test/MC/ELF/entsize.s
+++ b/test/MC/ELF/entsize.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck  %s
 
 // Test that mergeable constants have sh_entsize set.
 
@@ -32,38 +32,53 @@
     .quad 42
     .quad 42
 
-// CHECK: # Section 4
-// CHECK-NEXT:   ('sh_name', 0x00000048) # '.rodata.str1.1'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000032)
-// CHECK-NEXT:   ('sh_addr',
-// CHECK-NEXT:   ('sh_offset',
-// CHECK-NEXT:   ('sh_size', 0x000000000000000d)
-// CHECK-NEXT:   ('sh_link',
-// CHECK-NEXT:   ('sh_info',
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000001)
-
-// CHECK: # Section 5
-// CHECK-NEXT:   ('sh_name', 0x00000039) # '.rodata.str2.1'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000032)
-// CHECK-NEXT:   ('sh_addr',
-// CHECK-NEXT:   ('sh_offset',
-// CHECK-NEXT:   ('sh_size', 0x0000000000000010)
-// CHECK-NEXT:   ('sh_link',
-// CHECK-NEXT:   ('sh_info',
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000002)
-
-// CHECK: # Section 6
-// CHECK-NEXT:   ('sh_name', 0x0000002c) # '.rodata.cst8
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000012)
-// CHECK-NEXT:   ('sh_addr',
-// CHECK-NEXT:   ('sh_offset',
-// CHECK-NEXT:   ('sh_size', 0x0000000000000010)
-// CHECK-NEXT:   ('sh_link',
-// CHECK-NEXT:   ('sh_info',
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000008)
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .rodata.str1.1
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_MERGE
+// CHECK-NEXT:       SHF_STRINGS
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 13
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 1
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Index: 5
+// CHECK-NEXT:     Name: .rodata.str2.1
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_MERGE
+// CHECK-NEXT:       SHF_STRINGS
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 2
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Index: 6
+// CHECK-NEXT:     Name: .rodata.cst8
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_MERGE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 8
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/file.s b/test/MC/ELF/file.s
index 434fb6e2b3c6..7e287f7e3fa9 100644
--- a/test/MC/ELF/file.s
+++ b/test/MC/ELF/file.s
@@ -1,23 +1,25 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that the STT_FILE symbol precedes the other local symbols.
 
 .file "foo"
 foa:
-// CHECK:    # Symbol 1
-// CHECK-NEXT:    (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x4)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0xfff1)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:    # Symbol 2
-// CHECK-NEXT:    (('st_name', 0x00000005) # 'foa'
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x0)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0001)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+
+// CHECK:        Symbol {
+// CHECK:          Name: foo (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: File
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foa (5)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
index 85e02428fe3f..907bf424f470 100644
--- a/test/MC/ELF/gen-dwarf.s
+++ b/test/MC/ELF/gen-dwarf.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
 
 // Test that on ELF:
@@ -14,97 +14,13 @@ foo:
     ret
     .size foo, .-foo
 
-// Section 4 is .debug_line
-// CHECK:       # Section 4
-// CHECK-NEXT:  # '.debug_line'
-
-
-
-// The two relocations, one to symbol 6 and one to 4
-// CHECK:         # '.rel.debug_info'
-// CHECK-NEXT:   ('sh_type',
-// CHECK-NEXT:   ('sh_flags'
-// CHECK-NEXT:   ('sh_addr',
-// CHECK-NEXT:   ('sh_offset',
-// CHECK-NEXT:   ('sh_size',
-// CHECK-NEXT:   ('sh_link',
-// CHECK-NEXT:   ('sh_info',
-// CHECK-NEXT:   ('sh_addralign',
-// CHECK-NEXT:   ('sh_entsize',
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x00000006)
-// CHECK-NEXT:     ('r_sym', 0x000006)
-// CHECK-NEXT:     ('r_type', 0x01)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:    # Relocation 1
-// CHECK-NEXT:    (('r_offset', 0x0000000c)
-// CHECK-NEXT:     ('r_sym', 0x000004)
-// CHECK-NEXT:     ('r_type', 0x01)
-// CHECK-NEXT:    ),
-
-
-// Section 8 is .debug_abbrev
-// CHECK:       # Section 8
-// CHECK-NEXT:  (('sh_name', 0x00000001) # '.debug_abbrev'
-
-// Section 9 is .debug_aranges
-// CHECK:       # Section 9
-// CHECK-NEXT:  (('sh_name', 0x0000001e) # '.debug_aranges'
-
-// Two relocations in .debug_aranges, one to text and one to debug_info.
-// CHECK:       # '.rel.debug_aranges'
-// CHECK:       # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x00000006)
-// CHECK-NEXT:   ('r_sym', 0x000005)
-// CHECK-NEXT:   ('r_type', 0x01)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Relocation 1
-// CHECK-NEXT: (('r_offset', 0x00000010)
-// CHECK-NEXT:  ('r_sym', 0x000001)
-// CHECK-NEXT:  ('r_type', 0x01)
-// CHECK-NEXT: ),
-
-// Symbol 1 is section 1 (.text)
-// CHECK:         # Symbol 1
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_value', 0x00000000)
-// CHECK-NEXT:     ('st_size', 0x00000000)
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0001)
-// CHECK-NEXT:    ),
-
-// Symbol 4 is section 4 (.debug_line)
-// CHECK:         # Symbol 4
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_value', 0x00000000)
-// CHECK-NEXT:     ('st_size', 0x00000000)
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0004)
-// CHECK-NEXT:    ),
-
-// Symbol 5 is section 6 (.debug_info)
-// CHECK:         # Symbol 5
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_value', 0x00000000)
-// CHECK-NEXT:     ('st_size', 0x00000000)
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0006)
-// CHECK-NEXT:    ),
-
-// Symbol 6 is section 8 (.debug_abbrev)
-// CHECK:         # Symbol 6
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_value', 0x00000000)
-// CHECK-NEXT:     ('st_size', 0x00000000)
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0008)
-// CHECK-NEXT:    ),
+// CHECK:      Relocations [
+// CHECK:        Section ({{[^ ]+}}) .debug_info {
+// CHECK-NEXT:     0x6 R_386_32 .debug_abbrev 0x0
+// CHECK-NEXT:     0xC R_386_32 .debug_line 0x0
+// CHECK:        }
+// CHECK-NEXT:   Section ({{[^ ]+}}) .debug_aranges {
+// CHECK-NEXT:     0x6 R_386_32 .debug_info 0x0
+// CHECK-NEXT:     0x10 R_386_32 .text 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/global-offset.s b/test/MC/ELF/global-offset.s
index 81ae5d785df9..c6886734ca40 100644
--- a/test/MC/ELF/global-offset.s
+++ b/test/MC/ELF/global-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
 
 // We test that _GLOBAL_OFFSET_TABLE_ will account for the two bytes at the
 // start of the addl/leal.
@@ -10,14 +10,20 @@
 foo:
         addl    _GLOBAL_OFFSET_TABLE_-foo,%ebx
 
-// CHECK:      ('sh_name', 0x00000005) # '.text'
-// CHECK-NEXT: ('sh_type',
-// CHECK-NEXT: ('sh_flags',
-// CHECK-NEXT: ('sh_addr',
-// CHECK-NEXT: ('sh_offset',
-// CHECK-NEXT: ('sh_size',
-// CHECK-NEXT: ('sh_link',
-// CHECK-NEXT: ('sh_info',
-// CHECK-NEXT: ('sh_addralign',
-// CHECK-NEXT: ('sh_entsize',
-// CHECK-NEXT: ('_section_data', '81c30200 00008d9b 02000000 031d0200 0000')
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type:
+// CHECK-NEXT:     Flags [
+// CHECK:          ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size:
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment:
+// CHECK-NEXT:     EntrySize:
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 81C30200 00008D9B 02000000 031D0200
+// CHECK-NEXT:       0010: 0000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/got.s b/test/MC/ELF/got.s
index a84987281818..60dea6d3b172 100644
--- a/test/MC/ELF/got.s
+++ b/test/MC/ELF/got.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
 // Test that this produces a R_X86_64_GOT32 and that we have an undefined
 // reference to _GLOBAL_OFFSET_TABLE_.
@@ -6,20 +6,15 @@
         movl	foo@GOT, %eax
         movl	foo@GOTPCREL(%rip), %eax
 
-// CHECK:      ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:    (('r_offset',
-// CHECK-NEXT:     ('r_sym',
-// CHECK-NEXT:     ('r_type', 0x00000003)
-// CHECK-NEXT:     ('r_addend',
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   # Relocation 1
-// CHECK-NEXT:    (('r_offset',
-// CHECK-NEXT:     ('r_sym',
-// CHECK-NEXT:     ('r_type', 0x00000009)
-// CHECK-NEXT:     ('r_addend',
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
+// CHECK:      Relocations [
+// CHECK:        Section ({{[^ ]+}}) .text {
+// CHECK-NEXT:       0x{{[^ ]+}} R_X86_64_GOT32 foo 0x{{[^ ]+}}
+// CHECK-NEXT:       0x{{[^ ]+}} R_X86_64_GOTPCREL foo 0x{{[^ ]+}}
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
-// CHECK:     (('st_name', 0x00000005) # '_GLOBAL_OFFSET_TABLE_'
-// CHECK-NEXT: ('st_bind', 0x1)
+// CHECK:        Symbol {
+// CHECK:          Name: _GLOBAL_OFFSET_TABLE_
+// CHECK-NEXT:     Value:
+// CHECK-NEXT:     Size:
+// CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/ident.s b/test/MC/ELF/ident.s
index 56af19a30752..259220522e3c 100644
--- a/test/MC/ELF/ident.s
+++ b/test/MC/ELF/ident.s
@@ -1,16 +1,23 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
-// CHECK:       (('sh_name', 0x00000007) # '.comment'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x000000000000000d)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000001)
-// CHECK-NEXT:   ('_section_data', '00666f6f 00626172 007a6564 00')
+// CHECK:        Section {
+// CHECK:          Name: .comment
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_MERGE
+// CHECK-NEXT:       SHF_STRINGS
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 13
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 1
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00666F6F 00626172 007A6564 00
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
         .ident "foo"
         .ident "bar"
diff --git a/test/MC/ELF/lcomm.s b/test/MC/ELF/lcomm.s
index ae8d0baa3323..430b79b54b0a 100644
--- a/test/MC/ELF/lcomm.s
+++ b/test/MC/ELF/lcomm.s
@@ -1,21 +1,23 @@
-// RUN: llvm-mc -triple i386-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-pc-linux-gnu %s -filetype=obj -o - | llvm-readobj -t | FileCheck %s
 
 .lcomm A, 5
 .lcomm B, 32 << 20
 
-// CHECK: (('st_name', 0x00000001) # 'A'
-// CHECK:  ('st_value', 0x00000000)
-// CHECK:  ('st_size', 0x00000005)
-// CHECK:  ('st_bind', 0x0)
-// CHECK:  ('st_type', 0x1)
-// CHECK:  ('st_other', 0x00)
-// CHECK:  ('st_shndx', 0x0003)
-// CHECK: ),
-// CHECK: (('st_name', 0x00000003) # 'B'
-// CHECK:  ('st_value', 0x00000005)
-// CHECK:  ('st_size', 0x02000000)
-// CHECK:  ('st_bind', 0x0)
-// CHECK:  ('st_type', 0x1)
-// CHECK:  ('st_other', 0x00)
-// CHECK:  ('st_shndx', 0x0003)
-// CHECK: ),
+// CHECK:        Symbol {
+// CHECK:          Name: A (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 5
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x3)
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:          Name: B (3)
+// CHECK-NEXT:     Value: 0x5
+// CHECK-NEXT:     Size: 33554432
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x3)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/leb128.s b/test/MC/ELF/leb128.s
index f6daac8ace5c..84c5b54fa1a5 100644
--- a/test/MC/ELF/leb128.s
+++ b/test/MC/ELF/leb128.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
         .sleb128 .Lfoo - .Lbar
 .Lfoo:
@@ -6,14 +6,29 @@
         .fill 126, 1, 0x90
 .Lbar:
 
-// CHECK:     (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000081)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ('_section_data', '817f7f90 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90')
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 129
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 817F7F90 90909090 90909090 90909090
+// CHECK-NEXT:       0010: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0020: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0030: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0040: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0050: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0060: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0070: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0080: 90
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/local-reloc.s b/test/MC/ELF/local-reloc.s
index b32a9cc16973..4241ba5af410 100644
--- a/test/MC/ELF/local-reloc.s
+++ b/test/MC/ELF/local-reloc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -r -t | FileCheck  %s
 
 // Test that relocations with local symbols are represented as relocations
 // with the section. They should be equivalent, but gas behaves like this.
@@ -6,26 +6,8 @@
 	movl	foo, %r14d
 foo:
 
-// Section number 1 is .text
-// CHECK:        # Section 1
-// CHECK-next:  (('sh_name', 0x00000001) # '.text'
-
-// Relocation refers to symbol number 2
-// CHECK:      ('_relocations', [
-// CHECK-NEXT:  # Relocation 0
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type',
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-
-// Symbol number 2 is section number 1
-// CHECK:    # Symbol 2
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0001)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+// CHECKT:     Relocations [
+// CHECK:        Section (1) .text {
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32S .text 0x{{[^ ]+}}
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/many-sections-2.s b/test/MC/ELF/many-sections-2.s
index f5a1a0e43762..789ebf378d8e 100644
--- a/test/MC/ELF/many-sections-2.s
+++ b/test/MC/ELF/many-sections-2.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
-// RUN: llvm-readobj %t | FileCheck %s
+// RUN: llvm-readobj -s %t | FileCheck %s
 
 // CHECK: symtab_shndx
 
diff --git a/test/MC/ELF/merge.s b/test/MC/ELF/merge.s
index 11a80ad0350d..d34635a6710a 100644
--- a/test/MC/ELF/merge.s
+++ b/test/MC/ELF/merge.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck  %s
 
 // Test that PIC relocations with local symbols in a mergeable section are done
 // with a reference to the symbol. Not sure if this is a linker limitation,
@@ -22,76 +22,13 @@ zed:
         .section	bar,"ax",@progbits
 foo:
 
-// Relocation 0 refers to symbol 1
-// CHECK:       ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000001)
-// CHECK-NEXT:    ('r_type', 0x00000002
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-
-// Relocation 1 refers to symbol 6
-// CHECK-NEXT:  # Relocation 1
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend',
-// CHECK-NEXT: ),
-
-// Relocation 2 refers to symbol 1
-// CHECK-NEXT:   # Relocation 2
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000001)
-// CHECK-NEXT:    ('r_type', 0x0000000a
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-
-// Relocation 3 refers to symbol 2
-// CHECK-NEXT:   # Relocation 3
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000004
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-
-// Relocation 4 refers to symbol 2
-// CHECK-NEXT:   # Relocation 4
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000009
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-
-// Relocation 5 refers to symbol 8
-// CHECK-NEXT:   # Relocation 5
-// CHECK-NEXT:   (('r_offset', 0x0000000000000023)
-// CHECK-NEXT:    ('r_sym', 0x00000008)
-// CHECK-NEXT:    ('r_type', 0x0000000b)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-
-// Section 5 is "sec1"
-// CHECK: # Section 5
-// CHECK-NEXT:  (('sh_name', 0x00000035) # '.sec1'
-
-// Symbol number 1 is .Lfoo
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000001) # '.Lfoo'
-
-// Symbol number 2 is foo
-// CHECK:      # Symbol 2
-// CHECK-NEXT: (('st_name', 0x00000007) # 'foo'
-
-// Symbol number 6 is section 5
-// CHECK:        # Symbol 6
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0005)
-
-// Symbol number 8 is zed
-// CHECK:        # Symbol 8
-// CHECK-NEXT:    (('st_name', 0x0000000b) # 'zed'
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_PC32    .Lfoo 0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32      .sec1 0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32      .Lfoo 0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_PLT32    foo  0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_GOTPCREL foo  0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32S      zed  0x{{[^ ]+}}
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/n_bytes.s b/test/MC/ELF/n_bytes.s
index de6632232c06..e658de09a2fa 100644
--- a/test/MC/ELF/n_bytes.s
+++ b/test/MC/ELF/n_bytes.s
@@ -1,20 +1,30 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
 
         .2byte 42, 1, 2, 3
         .4byte 42, 1, 2, 3
         .8byte 42, 1, 2, 3
         .int 42, 1, 2, 3
 
-// CHECK:      # Section 1
-// CHECK-NEXT: (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '2a000100 02000300 2a000000 01000000 02000000 03000000 2a000000 00000000 01000000 00000000 02000000 00000000 03000000 00000000 2a000000 01000000 02000000 03000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 1
+// CHECK-NEXT:     Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 72
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 2A000100 02000300 2A000000 01000000
+// CHECK-NEXT:       0010: 02000000 03000000 2A000000 00000000
+// CHECK-NEXT:       0020: 01000000 00000000 02000000 00000000
+// CHECK-NEXT:       0030: 03000000 00000000 2A000000 01000000
+// CHECK-NEXT:       0040: 02000000 03000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/noexec.s b/test/MC/ELF/noexec.s
index d8b7b3233864..33cb8ae3452b 100644
--- a/test/MC/ELF/noexec.s
+++ b/test/MC/ELF/noexec.s
@@ -1,24 +1,26 @@
-// RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck  %s
+// RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck  %s
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.note.GNU-stack'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .note.GNU-stack
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
-// CHECK:       # Symbol 4
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x3)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0004)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Symbol {
+// CHECK:          Name: .note.GNU-stack (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .note.GNU-stack (0x4)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/norelocation.s b/test/MC/ELF/norelocation.s
index c6394791ad80..137038292802 100644
--- a/test/MC/ELF/norelocation.s
+++ b/test/MC/ELF/norelocation.s
@@ -1,18 +1,26 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd -sr | FileCheck  %s
 
         call bar
 bar:
 
-// CHECK: ('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000005)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ('_section_data', 'e8000000 00')
-// CHECK-NOT: .rela.text
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [ (0x6)
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 5
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: E8000000 00
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 // CHECK: shstrtab
diff --git a/test/MC/ELF/org.s b/test/MC/ELF/org.s
index 3afc364b2102..d878fa1aab45 100644
--- a/test/MC/ELF/org.s
+++ b/test/MC/ELF/org.s
@@ -1,13 +1,15 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
         .zero 4
 foo:
         .zero 4
         .org foo+16
 
-// CHECK:     (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type',
-// CHECK-NEXT: ('sh_flags',
-// CHECK-NEXT: ('sh_addr',
-// CHECK-NEXT: ('sh_offset'
-// CHECK-NEXT: ('sh_size', 0x0000000000000014)
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type:
+// CHECK-NEXT:     Flags [
+// CHECK:          ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 20
diff --git a/test/MC/ELF/pic-diff.s b/test/MC/ELF/pic-diff.s
index 2c68f6cc718a..cffa0dd368eb 100644
--- a/test/MC/ELF/pic-diff.s
+++ b/test/MC/ELF/pic-diff.s
@@ -1,23 +1,20 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
-// CHECK:       ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x000000000000000c)
-// CHECK-NEXT:     ('r_sym', 0x00000005)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000008)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[^ ]+}}) {{[^ ]+}} {
+// CHECK-NEXT:     0xC R_X86_64_PC32 baz 0x8
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
-// CHECK:         # Symbol 5
-// CHECK-NEXT:    (('st_name', 0x00000005) # 'baz'
-// CHECK-NEXT:     ('st_bind', 0x1)
-// CHECK-NEXT:     ('st_type', 0x0)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0000)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
-// CHECK-NEXT:    ),
+// CHECK:        Symbol {
+// CHECK:          Name: baz (5)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
 
 .zero 4
 .data
diff --git a/test/MC/ELF/plt.s b/test/MC/ELF/plt.s
index 7d78e23443e7..604a4bf3a38e 100644
--- a/test/MC/ELF/plt.s
+++ b/test/MC/ELF/plt.s
@@ -1,14 +1,11 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
 
 // Test that this produces a R_X86_64_PLT32.
 
 	jmp	foo@PLT
 
-// CHECK:      ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:    (('r_offset',
-// CHECK-NEXT:     ('r_sym',
-// CHECK-NEXT:     ('r_type', 0x00000004)
-// CHECK-NEXT:     ('r_addend',
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[^ ]+}}) {{[^ ]+}} {
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_PLT32 {{[^ ]+}} 0x{{[^ ]+}}
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/pr9292.s b/test/MC/ELF/pr9292.s
index 05f377faa71c..a6e78dc992a8 100644
--- a/test/MC/ELF/pr9292.s
+++ b/test/MC/ELF/pr9292.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that both foo and bar are undefined.
 
@@ -7,20 +7,21 @@
 mov %eax,bar
 
 
-// CHECK:      (('st_name', 0x00000005) # 'bar'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: bar (5)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/relax-arith.s b/test/MC/ELF/relax-arith.s
index 3236b41e532c..b8145564db08 100644
--- a/test/MC/ELF/relax-arith.s
+++ b/test/MC/ELF/relax-arith.s
@@ -1,11 +1,16 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
 
 // Test that we correctly relax these instructions into versions that use
 // 16 or 32 bit immediate values.
 
 bar:
-// CHECK: 'imul'
-// CHECK: ('_section_data', '6669db00 0066691c 25000000 00000069 db000000 00691c25 00000000 00000000 4869db00 00000048 691c2500 00000000 000000')
+// CHECK:      Name: imul
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6669DB00 0066691C 25000000 00000069
+// CHECK-NEXT:   0010: DB000000 00691C25 00000000 00000000
+// CHECK-NEXT:   0020: 4869DB00 00000048 691C2500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section imul
         imul $foo, %bx,  %bx
         imul $foo, bar,  %bx
@@ -14,8 +19,14 @@ bar:
         imul $foo, %rbx, %rbx
         imul $foo, bar,  %rbx
 
-// CHECK: and'
-// CHECK:('_section_data', '6681e300 00668124 25000000 00000081 e3000000 00812425 00000000 00000000 4881e300 00000048 81242500 00000000 000000')
+
+// CHECK:      Name: and
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681E300 00668124 25000000 00000081
+// CHECK-NEXT:   0010: E3000000 00812425 00000000 00000000
+// CHECK-NEXT:   0020: 4881E300 00000048 81242500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section and
         and  $foo, %bx
         andw $foo, bar
@@ -24,8 +35,13 @@ bar:
         and  $foo, %rbx
         andq $foo, bar
 
-// CHECK: 'or'
-// CHECK: ('_section_data', '6681cb00 0066810c 25000000 00000081 cb000000 00810c25 00000000 00000000 4881cb00 00000048 810c2500 00000000 000000')
+// CHECK:      Name: or
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681CB00 0066810C 25000000 00000081
+// CHECK-NEXT:   0010: CB000000 00810C25 00000000 00000000
+// CHECK-NEXT:   0020: 4881CB00 00000048 810C2500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section or
         or  $foo, %bx
         orw $foo, bar
@@ -34,8 +50,13 @@ bar:
         or  $foo, %rbx
         orq $foo, bar
 
-// CHECK: 'xor'
-// CHECK: ('_section_data', '6681f300 00668134 25000000 00000081 f3000000 00813425 00000000 00000000 4881f300 00000048 81342500 00000000 000000')
+// CHECK:      Name: xor
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681F300 00668134 25000000 00000081
+// CHECK-NEXT:   0010: F3000000 00813425 00000000 00000000
+// CHECK-NEXT:   0020: 4881F300 00000048 81342500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section xor
         xor  $foo, %bx
         xorw $foo, bar
@@ -44,8 +65,13 @@ bar:
         xor  $foo, %rbx
         xorq $foo, bar
 
-// CHECK: 'add'
-// CHECK: ('_section_data', '6681c300 00668104 25000000 00000081 c3000000 00810425 00000000 00000000 4881c300 00000048 81042500 00000000 000000')
+// CHECK:      Name: add
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681C300 00668104 25000000 00000081
+// CHECK-NEXT:   0010: C3000000 00810425 00000000 00000000
+// CHECK-NEXT:   0020: 4881C300 00000048 81042500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section add
         add  $foo, %bx
         addw $foo, bar
@@ -54,8 +80,13 @@ bar:
         add  $foo, %rbx
         addq $foo, bar
 
-// CHECK: 'sub'
-// CHECK: ('_section_data', '6681eb00 0066812c 25000000 00000081 eb000000 00812c25 00000000 00000000 4881eb00 00000048 812c2500 00000000 000000')
+// CHECK:      Name: sub
+// CHECK:      SectionData (
+// CHECK-NEXT:   000: 6681EB00 0066812C 25000000 00000081
+// CHECK-NEXT:   010: EB000000 00812C25 00000000 00000000
+// CHECK-NEXT:   020: 4881EB00 00000048 812C2500 00000000
+// CHECK-NEXT:   030: 000000
+// CHECK-NEXT: )
         .section sub
         sub  $foo, %bx
         subw $foo, bar
@@ -64,8 +95,13 @@ bar:
         sub  $foo, %rbx
         subq $foo, bar
 
-// CHECK: 'cmp'
-// CHECK: ('_section_data', '6681fb00 0066813c 25000000 00000081 fb000000 00813c25 00000000 00000000 4881fb00 00000048 813c2500 00000000 000000')
+// CHECK:      Name: cmp
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681FB00 0066813C 25000000 00000081
+// CHECK-NEXT:   0010: FB000000 00813C25 00000000 00000000
+// CHECK-NEXT:   0020: 4881FB00 00000048 813C2500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section cmp
         cmp  $foo, %bx
         cmpw $foo, bar
diff --git a/test/MC/ELF/relax.s b/test/MC/ELF/relax.s
index 0b5d24f0f7ff..49ee8e2eefe0 100644
--- a/test/MC/ELF/relax.s
+++ b/test/MC/ELF/relax.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd -t | FileCheck  %s
 
 // Test that we do not relax these.
 
@@ -11,17 +11,23 @@ foo:
         jmp foo
         jmp zed
 
-// CHECK: ('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000006)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ('_section_data', 'ebfeebfc ebfa')
-
-// CHECK:       # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000005) # 'foo'
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 6
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: EBFEEBFC EBFA
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:          Name: foo (5)
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 85da2eb8c949..24d0172e724e 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -1,205 +1,86 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck  %s
 
 // Test that we produce the correct relocation types and that the relocations
 // correctly point to the section or the symbol.
 
-// CHECK:      # Relocation 0
-// CHECK-NEXT: (('r_offset', 0x00000002)
-// CHECK-NEXT:  ('r_sym', 0x000001)
-// CHECK-NEXT:  ('r_type', 0x09)
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Relocation 1
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym',
-// CHECK-NEXT:  ('r_type', 0x04)
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Relocation 2
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym',
-// CHECK-NEXT:  ('r_type', 0x0a)
-// CHECK-NEXT: ),
-
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:     0x2          R_386_GOTOFF     .Lfoo 0x0
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_PLT32      bar2 0x0
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_GOTPC      _GLOBAL_OFFSET_TABLE_ 0x0
 // Relocation 3 (bar3@GOTOFF) is done with symbol 7 (bss)
-// CHECK-NEXT:  # Relocation 3
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym', 0x000007
-// CHECK-NEXT:  ('r_type',
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_GOTOFF     .bss 0x0
 // Relocation 4 (bar2@GOT) is of type R_386_GOT32
-// CHECK-NEXT:  # Relocation 4
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym',
-// CHECK-NEXT:  ('r_type', 0x03
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_GOT32      bar2j 0x0
 
 // Relocation 5 (foo@TLSGD) is of type R_386_TLS_GD
-// CHECK-NEXT: # Relocation 5
-// CHECK-NEXT: (('r_offset', 0x00000020)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x12)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x20         R_386_TLS_GD     foo 0x0
 // Relocation 6 ($foo@TPOFF) is of type R_386_TLS_LE_32
-// CHECK-NEXT: # Relocation 6
-// CHECK-NEXT: (('r_offset', 0x00000025)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x22)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x25         R_386_TLS_LE_32  foo 0x0
 // Relocation 7 (foo@INDNTPOFF) is of type R_386_TLS_IE
-// CHECK-NEXT: # Relocation 7
-// CHECK-NEXT: (('r_offset', 0x0000002b)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x0f)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x2B         R_386_TLS_IE     foo 0x0
 // Relocation 8 (foo@NTPOFF) is of type R_386_TLS_LE
-// CHECK-NEXT: # Relocation 8
-// CHECK-NEXT: (('r_offset', 0x00000031)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x11)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x31         R_386_TLS_LE     foo 0x0
 // Relocation 9 (foo@GOTNTPOFF) is of type R_386_TLS_GOTIE
-// CHECK-NEXT: # Relocation 9
-// CHECK-NEXT: (('r_offset', 0x00000037)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x10)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x37         R_386_TLS_GOTIE  foo 0x0
 // Relocation 10 (foo@TLSLDM) is of type R_386_TLS_LDM
-// CHECK-NEXT: # Relocation 10
-// CHECK-NEXT: (('r_offset', 0x0000003d)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x13)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x3D         R_386_TLS_LDM    foo 0x0
 // Relocation 11 (foo@DTPOFF) is of type R_386_TLS_LDO_32
-// CHECK-NEXT: # Relocation 11
-// CHECK-NEXT: (('r_offset', 0x00000043)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x20)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x43         R_386_TLS_LDO_32 foo 0x0
 // Relocation 12 (calll 4096) is of type R_386_PC32
-// CHECK-NEXT: # Relocation 12
-// CHECK-NEXT: (('r_offset', 0x00000048)
-// CHECK-NEXT:  ('r_sym', 0x000000)
-// CHECK-NEXT:  ('r_type', 0x02)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x48         R_386_PC32       - 0x0
 // Relocation 13 (zed@GOT) is of type R_386_GOT32 and uses the symbol
-// CHECK-NEXT: # Relocation 13
-// CHECK-NEXT: (('r_offset', 0x0000004e)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x03)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x4E         R_386_GOT32      zed 0x0
 // Relocation 14 (zed@GOTOFF) is of type R_386_GOTOFF and uses the symbol
-// CHECK-NEXT: # Relocation 14
-// CHECK-NEXT: (('r_offset', 0x00000054)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x09)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x54         R_386_GOTOFF     zed 0x0
 // Relocation 15 (zed@INDNTPOFF) is of type R_386_TLS_IE and uses the symbol
-// CHECK-NEXT: # Relocation 15
-// CHECK-NEXT: (('r_offset', 0x0000005a)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x0f)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x5A         R_386_TLS_IE     zed 0x0
 // Relocation 16 (zed@NTPOFF) is of type R_386_TLS_LE and uses the symbol
-// CHECK-NEXT: # Relocation 16
-// CHECK-NEXT: (('r_offset', 0x00000060)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x11)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x60         R_386_TLS_LE     zed 0x0
 // Relocation 17 (zed@GOTNTPOFF) is of type R_386_TLS_GOTIE and uses the symbol
-// CHECK-NEXT: # Relocation 17
-// CHECK-NEXT: (('r_offset', 0x00000066)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x10)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x66         R_386_TLS_GOTIE  zed 0x0
 // Relocation 18 (zed@PLT) is of type R_386_PLT32 and uses the symbol
-// CHECK-NEXT: # Relocation 18
-// CHECK-NEXT: (('r_offset', 0x0000006b)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x04)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x6B         R_386_PLT32      zed 0x0
 // Relocation 19 (zed@TLSGD) is of type R_386_TLS_GD and uses the symbol
-// CHECK-NEXT: # Relocation 19
-// CHECK-NEXT: (('r_offset', 0x00000071)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x12)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x71         R_386_TLS_GD     zed 0x0
 // Relocation 20 (zed@TLSLDM) is of type R_386_TLS_LDM and uses the symbol
-// CHECK-NEXT: # Relocation 20
-// CHECK-NEXT: (('r_offset', 0x00000077)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x13)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x77         R_386_TLS_LDM    zed 0x0
 // Relocation 21 (zed@TPOFF) is of type R_386_TLS_LE_32 and uses the symbol
-// CHECK-NEXT:# Relocation 21
-// CHECK-NEXT: (('r_offset', 0x0000007d)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x22)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x7D         R_386_TLS_LE_32  zed 0x0
 // Relocation 22 (zed@DTPOFF) is of type R_386_TLS_LDO_32 and uses the symbol
-// CHECK-NEXT: Relocation 22
-// CHECK-NEXT: (('r_offset', 0x00000083)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x20)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x83         R_386_TLS_LDO_32 zed 0x0
 // Relocation 23 ($bar) is of type R_386_32 and uses the section
-// CHECK-NEXT: Relocation 23
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym',
-// CHECK-NEXT:  ('r_type', 0x01)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_32         .text 0x0
 // Relocation 24 (foo@GOTTPOFF(%edx)) is of type R_386_TLS_IE_32 and uses the
 // symbol
-// CHECK-NEXT: Relocation 24
-// CHECK-NEXT: (('r_offset', 0x0000008e)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x21)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x8E         R_386_TLS_IE_32  foo 0x0
 // Relocation 25 (_GLOBAL_OFFSET_TABLE_-bar2) is of type R_386_GOTPC.
-// CHECK-NEXT: Relocation 25
-// CHECK-NEXT: (('r_offset', 0x00000094)
-// CHECK-NEXT:  ('r_sym', 0x00000b)
-// CHECK-NEXT:  ('r_type', 0x0a)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x94         R_386_GOTPC      _GLOBAL_OFFSET_TABLE_ 0x0
 // Relocation 26 (und_symbol-bar2) is of type R_386_PC32
-// CHECK-NEXT: Relocation 26
-// CHECK-NEXT: (('r_offset', 0x0000009a)
-// CHECK-NEXT:  ('r_sym', 0x00000e)
-// CHECK-NEXT:  ('r_type', 0x02)
-// CHECK-NEXT: ),
-
-// Section 4 is bss
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x0000000b) # '.bss'
-
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000005) # '.Lfoo'
+// CHECK-NEXT:     0x9A         R_386_PC32       und_symbol 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
 // Symbol 4 is zed
-// CHECK:      # Symbol 4
-// CHECK-NEXT: (('st_name', 0x00000035) # 'zed'
-// CHECK-NEXT:  ('st_value', 0x00000000)
-// CHECK-NEXT:  ('st_size', 0x00000000)
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x6)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0005)
-
+// CHECK:        Symbol {
+// CHECK:          Name: zed (53)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: zedsec (0x5)
+// CHECK-NEXT:   }
 // Symbol 7 is section 4
-// CHECK:      # Symbol 7
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_value', 0x00000000)
-// CHECK-NEXT:  ('st_size', 0x00000000)
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0004)
-
+// CHECK:        Symbol {
+// CHECK:          Name: .bss (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:   }
 
         .text
 bar:
diff --git a/test/MC/ELF/relocation-pc.s b/test/MC/ELF/relocation-pc.s
index b6279c3e55dd..551f5ff378ef 100644
--- a/test/MC/ELF/relocation-pc.s
+++ b/test/MC/ELF/relocation-pc.s
@@ -1,33 +1,32 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr | FileCheck  %s
 
 // Test that we produce the correct relocation.
 
 	loope	0                 # R_X86_64_PC8
 	jmp	-256              # R_X86_64_PC32
 
-// CHECK:      # Section 2
-// CHECK-NEXT: (('sh_name', 0x00000001) # '.rela.text'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000000000002e8)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
-// CHECK-NEXT:  ('sh_info', 0x00000001)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000001)
-// CHECK-NEXT:    ('r_sym', 0x00000000)
-// CHECK-NEXT:    ('r_type', 0x0000000f)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 1
-// CHECK-NEXT:   (('r_offset', 0x0000000000000003)
-// CHECK-NEXT:    ('r_sym', 0x00000000)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 1
+// CHECK-NEXT:     Name: .text
+// CHECK:          Relocations [
+// CHECK-NEXT:       0x1 R_X86_64_PC8 - 0x0
+// CHECK-NEXT:       0x3 R_X86_64_PC32 - 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+
+// CHECK:        Section {
+// CHECK:          Index: 2
+// CHECK-NEXT:     Name: .rela.text
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x2E8
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 6
+// CHECK-NEXT:     Info: 1
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index 5db213bc0497..19bcc18d8f88 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -t | FileCheck  %s
 
 // Test that we produce the correct relocation.
 
@@ -20,102 +20,33 @@ bar:
         addq	$bar,%rax         # R_X86_64_32S
 
 
-// CHECK:  # Section 1
-// CHECK: (('sh_name', 0x00000006) # '.text'
-
-// CHECK: # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x0000000000000001)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000a)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 1
-// CHECK-NEXT:  (('r_offset', 0x0000000000000008)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 2
-// CHECK-NEXT:  (('r_offset', 0x0000000000000013)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 3
-// CHECK-NEXT:  (('r_offset', 0x000000000000001a)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 4
-// CHECK-NEXT:  (('r_offset', 0x0000000000000022)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 5
-// CHECK-NEXT:  (('r_offset', 0x0000000000000026)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000a)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 6
-// CHECK-NEXT:  (('r_offset', 0x000000000000002d)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000016)
-// CHECK-NEXT:   ('r_addend', 0xfffffffffffffffc)
-
-// CHECK:  # Relocation 7
-// CHECK-NEXT:  (('r_offset', 0x0000000000000034)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000013)
-// CHECK-NEXT:   ('r_addend', 0xfffffffffffffffc)
-
-// CHECK:  # Relocation 8
-// CHECK-NEXT:  (('r_offset', 0x000000000000003b)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000017)
-// CHECK-NEXT:   ('r_addend', 0x0000000000000000)
-
-// CHECK:  # Relocation 9
-// CHECK-NEXT:  (('r_offset', 0x0000000000000042)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000014)
-// CHECK-NEXT:   ('r_addend', 0xfffffffffffffffc)
-
-// CHECK:  # Relocation 10
-// CHECK-NEXT:  (('r_offset', 0x0000000000000049)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000015)
-// CHECK-NEXT:   ('r_addend', 0x0000000000000000)
-
-// CHECK: # Relocation 11
-// CHECK-NEXT:  (('r_offset', 0x000000000000004e)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend', 0x0000000000000000)
-
-// CHECK: # Relocation 12
-// CHECK-NEXT: (('r_offset', 0x0000000000000055)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000002)
-// CHECK-NEXT:  ('r_addend', 0xfffffffffffffffc)
-
-// CHECK: # Relocation 13
-// CHECK-NEXT: (('r_offset', 0x000000000000005c)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000002)
-// CHECK-NEXT:  ('r_addend', 0x000000000000005c)
-
-// CHECK: # Relocation 14
-// CHECK-NEXT: (('r_offset', 0x0000000000000063)
-// CHECK-NEXT:  ('r_sym', 0x00000002)
-// CHECK-NEXT:  ('r_type', 0x0000000b)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-
-// CHECK:   # Symbol 2
-// CHECK: (('st_name', 0x00000000) # ''
-// CHECK:  ('st_bind', 0x0)
-// CHECK:  ('st_type', 0x3)
-// CHECK:  ('st_other', 0x00)
-// CHECK:  ('st_shndx', 0x0001)
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK:          Relocations [
+// CHECK-NEXT:       0x1 R_X86_64_32        .text
+// CHECK-NEXT:       0x8 R_X86_64_32S       .text
+// CHECK-NEXT:       0x13 R_X86_64_32S      .text
+// CHECK-NEXT:       0x1A R_X86_64_32S      .text
+// CHECK-NEXT:       0x22 R_X86_64_32S      .text
+// CHECK-NEXT:       0x26 R_X86_64_32       .text
+// CHECK-NEXT:       0x2D R_X86_64_GOTTPOFF foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x34 R_X86_64_TLSGD    foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x3B R_X86_64_TPOFF32  foo 0x0
+// CHECK-NEXT:       0x42 R_X86_64_TLSLD    foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x49 R_X86_64_DTPOFF32 foo 0x0
+// CHECK-NEXT:       0x4E R_X86_64_32S      .text 0x0
+// CHECK-NEXT:       0x55 R_X86_64_PC32     foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x5C R_X86_64_PC32     foo 0x5C
+// CHECK-NEXT:       0x63 R_X86_64_32S      .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+
+// CHECK:        Symbol {
+// CHECK:          Name: .text (0)
+// CHECK-NEXT:     Value:
+// CHECK-NEXT:     Size:
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/rename.s b/test/MC/ELF/rename.s
index 241aa05ecbde..c50910b41ec0 100644
--- a/test/MC/ELF/rename.s
+++ b/test/MC/ELF/rename.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -t | FileCheck %s
 
 // When doing a rename, all the checks for where the relocation should go
 // should be performed with the original symbol. Only if we decide to relocate
@@ -16,31 +16,33 @@ defined3:
         .global defined1
 
 // Section 1 is .text
-// CHECK:      # Section 1
-// CHECK-NEXT: (('sh_name', 0x00000006) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000004)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-
-// The relocation uses symbol 2
-// CHECK:      # Relocation 0
-// CHECK-NEXT: (('r_offset', 0x0000000000000000)
-// CHECK-NEXT:  ('r_sym', 0x00000002)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+// CHECK:        Section {
+// CHECK:          Index: 1
+// CHECK-NEXT:     Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 4
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x0 R_X86_64_32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
 
 // Symbol 2 is section 1
-// CHECK:      # Symbol 2
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK:        Symbol {
+// CHECK:          Name: .text (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/section.s b/test/MC/ELF/section.s
index c71e1a72c471..a6794034c94d 100644
--- a/test/MC/ELF/section.s
+++ b/test/MC/ELF/section.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that these names are accepted.
 
@@ -7,10 +7,10 @@
 .section	.note.GNU-,"",@progbits
 .section	-.note.GNU,"",@progbits
 
-// CHECK: ('sh_name', 0x00000038) # '.note.GNU-stack'
-// CHECK: ('sh_name', 0x0000008f) # '.note.GNU-stack2'
-// CHECK: ('sh_name', 0x000000a0) # '.note.GNU-'
-// CHECK: ('sh_name', 0x00000084) # '-.note.GNU'
+// CHECK: Name: .note.GNU-stack (56)
+// CHECK: Name: .note.GNU-stack2 (143)
+// CHECK: Name: .note.GNU- (160)
+// CHECK: Name: -.note.GNU (132)
 
 // Test that the defaults are used
 
@@ -19,66 +19,81 @@
 .section	.rodata
 .section	zed, ""
 
-// CHECK:      (('sh_name', 0x00000012) # '.init'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Section 11
-// CHECK-NEXT: (('sh_name', 0x00000048) # '.fini'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Section 12
-// CHECK-NEXT: (('sh_name', 0x00000076) # '.rodata'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Section 13
-// CHECK-NEXT: (('sh_name', 0x00000058) # 'zed'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .init
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 11
+// CHECK-NEXT:     Name: .fini
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 12
+// CHECK-NEXT:     Name: .rodata
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 13
+// CHECK-NEXT:     Name: zed
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
 .section	.note.test,"",@note
-// CHECK:       (('sh_name', 0x00000007) # '.note.test'
-// CHECK-NEXT:   ('sh_type', 0x00000007)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Name: .note.test
+// CHECK-NEXT:     Type: SHT_NOTE
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
 // Test that we can parse these
 foo:
@@ -90,21 +105,26 @@ bar:
 
 .section .eh_frame,"a",@unwind
 
-// CHECK:       (('sh_name', 0x0000004e) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x70000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
 // Test that we handle the strings like gas
 .section bar-"foo"
 .section "foo"
 
-// CHECK: ('sh_name', 0x000000ab) # 'bar-"foo"'
-// CHECK: ('sh_name', 0x00000034) # 'foo'
+// CHECK:        Section {
+// CHECK:          Name: bar-"foo" (171)
+// CHECK:        Section {
+// CHECK:          Name: foo (52)
diff --git a/test/MC/ELF/set.s b/test/MC/ELF/set.s
index 2258b1923678..f6965a583a9f 100644
--- a/test/MC/ELF/set.s
+++ b/test/MC/ELF/set.s
@@ -1,17 +1,18 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck  %s
 
 // Test that we emit the correct value.
 
 .set kernbase,0xffffffff80000000
 
-// CHECK:         (('st_name', 0x00000001) # 'kernbase'
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x0)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0xfff1)
-// CHECK-NEXT:     ('st_value', 0xffffffff80000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
-// CHECK-NEXT:    ),
+// CHECK:        Symbol {
+// CHECK:          Name: kernbase (1)
+// CHECK-NEXT:     Value: 0xFFFFFFFF80000000
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
 
 // Test that we accept .set of a symbol after it has been used in a statement.
 
@@ -24,11 +25,12 @@
 	.set	foo2,bar2
 
 // Test that there is an undefined reference to bar
-// CHECK:      (('st_name', 0x0000000a) # 'bar'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: bar (10)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/sleb.s b/test/MC/ELF/sleb.s
index 00e5b4bf2821..5cba5829a12b 100644
--- a/test/MC/ELF/sleb.s
+++ b/test/MC/ELF/sleb.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
 // RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
 // RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
 
@@ -19,10 +19,14 @@ foo:
 
 	.sleb128        8193
 
-// ELF_32: ('sh_name', 0x00000001) # '.text'
-// ELF_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
-// ELF_64: ('sh_name', 0x00000001) # '.text'
-// ELF_64: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// ELF_32:   Name: .text
+// ELF_32:   SectionData (
+// ELF_32:     0000: 00017F3F 40C000BF 7FFF3F80 4081C000
+// ELF_32:   )
+// ELF_64:   Name: .text
+// ELF_64:   SectionData (
+// ELF_64:     0000: 00017F3F 40C000BF 7FFF3F80 4081C000
+// ELF_64:   )
 // MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // MACHO_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
 // MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
diff --git a/test/MC/ELF/subsection.s b/test/MC/ELF/subsection.s
new file mode 100644
index 000000000000..d437cacf6347
--- /dev/null
+++ b/test/MC/ELF/subsection.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -filetype=obj %s -o - -triple x86_64-pc-linux | llvm-objdump -s - | FileCheck %s
+
+// CHECK: Contents of section .text:
+// CHECK-NEXT: 0000 03042502 00000003 04250100 0000ebf7
+.text 1
+add 1, %eax
+jmp label
+.subsection
+add 2, %eax
+label:
+
+// CHECK-NOT: Contents of section .rela.text:
+
+// CHECK: Contents of section .data:
+// CHECK-NEXT: 0000 01030402 74657374
+.data
+l0:
+.byte 1
+.subsection 1+1
+l1:
+.byte 2
+l2:
+.subsection l2-l1
+.byte l1-l0
+.subsection 3
+.ascii "test"
+.previous
+.byte 4
+
+// CHECK: Contents of section test:
+// CHECK-NEXT: 0000 010302
+.section test
+.byte 1
+.pushsection test, 1
+.byte 2
+.popsection
+.byte 3
diff --git a/test/MC/ELF/symref.s b/test/MC/ELF/symref.s
index 2dfa058ab086..9a71a81930ee 100644
--- a/test/MC/ELF/symref.s
+++ b/test/MC/ELF/symref.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
 defined1:
 defined2:
@@ -21,145 +21,122 @@ defined3:
         .symver global1, g1@@zed
 global1:
 
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:     0x0 R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0x4 R_X86_64_32 bar2@zed 0x0
+// CHECK-NEXT:     0x8 R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0xC R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0x10 R_X86_64_32 bar6@zed 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
-// CHECK:      # Relocation 0
-// CHECK-NEXT: (('r_offset', 0x0000000000000000)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 1
-// CHECK-NEXT: (('r_offset', 0x0000000000000004)
-// CHECK-NEXT:  ('r_sym', 0x0000000b)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 2
-// CHECK-NEXT: (('r_offset', 0x0000000000000008)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 3
-// CHECK-NEXT: (('r_offset', 0x000000000000000c)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 4
-// CHECK-NEXT: (('r_offset', 0x0000000000000010)
-// CHECK-NEXT:  ('r_sym', 0x0000000c)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:])
-
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000013) # 'bar1@zed'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 2
-// CHECK-NEXT: (('st_name', 0x00000025) # 'bar3@@zed'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 3
-// CHECK-NEXT: (('st_name', 0x0000002f) # 'bar5@@zed'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 4
-// CHECK-NEXT: (('st_name', 0x00000001) # 'defined1'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x0000000a) # 'defined2'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 7
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0003)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 8
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0004)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 9
-// CHECK-NEXT: (('st_name', 0x0000004a) # 'g1@@zed'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000014)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 10
-// CHECK-NEXT: (('st_name', 0x00000042) # 'global1'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000014)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 11
-// CHECK-NEXT: (('st_name', 0x0000001c) # 'bar2@zed'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 12
-// CHECK-NEXT: (('st_name', 0x00000039) # 'bar6@zed'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:])
+// CHECK:        Symbol {
+// CHECK:          Name: bar1@zed (19)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar3@@zed (37)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar5@@zed (47)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: defined1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: defined2 (10)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .data (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .data (0x3)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .bss (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: g1@@zed (74)
+// CHECK-NEXT:     Value: 0x14
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: global1 (66)
+// CHECK-NEXT:     Value: 0x14
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar2@zed (28)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar6@zed (57)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/tls-i386.s b/test/MC/ELF/tls-i386.s
index 922d4c6e6c2c..267046ef5bfb 100644
--- a/test/MC/ELF/tls-i386.s
+++ b/test/MC/ELF/tls-i386.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that all symbols are of type STT_TLS.
 
@@ -17,129 +17,129 @@
         .long   fooD@DTPOFF
         .long   fooE@INDNTPOFF
 
-// CHECK:       (('st_name', 0x00000001) # 'foo1'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 6
-// CHECK-NEXT:  (('st_name', 0x00000006) # 'foo2'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x0000000b) # 'foo3'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 8
-// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 9
-// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 10
-// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 11
-// CHECK-NEXT:  (('st_name', 0x0000001f) # 'foo7'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 12
-// CHECK-NEXT:  (('st_name', 0x00000024) # 'foo8'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 13
-// CHECK-NEXT:  (('st_name', 0x00000029) # 'foo9'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 14
-// CHECK-NEXT:  (('st_name', 0x0000002e) # 'fooA'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 15
-// CHECK-NEXT:  (('st_name', 0x00000033) # 'fooB'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 16
-// CHECK-NEXT:  (('st_name', 0x00000038) # 'fooC'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 17
-// CHECK-NEXT:  (('st_name', 0x0000003d) # 'fooD'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 18
-// CHECK-NEXT:  (('st_name', 0x00000042) # 'fooE'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-
+// CHECK:        Symbol {
+// CHECK:          Name: foo1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo2 (6)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo3 (11)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo4 (16)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo5 (21)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo6 (26)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo7 (31)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo8 (36)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo9 (41)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooA (46)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooB (51)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooC (56)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooD (61)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooE (66)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
index fe2bb4e73307..c71e3962bb49 100644
--- a/test/MC/ELF/tls.s
+++ b/test/MC/ELF/tls.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that all symbols are of type STT_TLS.
 
@@ -12,66 +12,67 @@
 foobar:
 	.long	43
 
-// CHECK:      (('st_name', 0x0000001f) # 'foobar'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x6)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0005)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: foobar (31)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .zed (0x5)
+// CHECK-NEXT:   }
 
-// CHECK:       # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x00000001) # 'foo1'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 8
-// CHECK-NEXT:  (('st_name', 0x00000006) # 'foo2'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 9
-// CHECK-NEXT:  (('st_name', 0x0000000b) # 'foo3'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 10
-// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 11
-// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 12
-// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Symbol {
+// CHECK:          Name: foo1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo2 (6)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo3 (11)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo4 (16)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo5 (21)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo6 (26)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
index ec53e4ffa524..a5b98129c94a 100644
--- a/test/MC/ELF/type.s
+++ b/test/MC/ELF/type.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that both % and @ are accepted.
         .global foo
@@ -12,35 +12,76 @@ bar:
 // Test that gnu_unique_object is accepted.
         .type zed,@gnu_unique_object
 
+obj:
+        .global obj
+        .type obj,@object
+        .type obj,@notype
+
+func:
+        .global func
+        .type func,@function
+        .type func,@object
+
 ifunc:
         .global ifunc
         .type ifunc,@gnu_indirect_function
 
-// CHECK:      # Symbol 4
-// CHECK-NEXT: (('st_name', 0x00000005) # 'bar'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x1)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x2)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000009) # 'ifunc'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0xa)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+tls:
+        .global tls
+        .type tls,@tls_object
+        .type tls,@gnu_indirect_function
 
+// CHECK:        Symbol {
+// CHECK:          Name: bar
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Function
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: func
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Function
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: ifunc
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: GNU_IFunc
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: obj
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: tls
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/uleb.s b/test/MC/ELF/uleb.s
index 1e4734bcafc6..d755cc23e392 100644
--- a/test/MC/ELF/uleb.s
+++ b/test/MC/ELF/uleb.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
 // RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
 // RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
 
@@ -12,10 +12,14 @@ foo:
 	.uleb128	16383
 	.uleb128	16384
 
-// ELF_32: ('sh_name', 0x00000001) # '.text'
-// ELF_32: ('_section_data', '00017f80 01ff7f80 8001')
-// ELF_64: ('sh_name', 0x00000001) # '.text'
-// ELF_64: ('_section_data', '00017f80 01ff7f80 8001')
+// ELF_32:   Name: .text
+// ELF_32:   SectionData (
+// ELF_32:     0000: 00017F80 01FF7F80 8001
+// ELF_32:   )
+// ELF_64:   Name: .text
+// ELF_64:   SectionData (
+// ELF_64:     0000: 00017F80 01FF7F80 8001
+// ELF_64:   )
 // MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // MACHO_32: ('_section_data', '00017f80 01ff7f80 8001')
 // MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
diff --git a/test/MC/ELF/undef.s b/test/MC/ELF/undef.s
index e377c6331747..0d89fb129361 100644
--- a/test/MC/ELF/undef.s
+++ b/test/MC/ELF/undef.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test which symbols should be in the symbol table
 
@@ -19,28 +19,21 @@
         .text
         movsd   .Lsym8(%rip), %xmm1
 
-// CHECK:      ('_symbols', [
-// CHECK-NEXT:  # Symbol 0
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 1
-// CHECK-NEXT:  (('st_name', 0x0000000d) # '.Lsym8'
-// CHECK:       # Symbol 2
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 3
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 4
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 5
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 6
-// CHECK-NEXT:  (('st_name', 0x00000001) # '.Lsym1'
-// CHECK:       # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x00000008) # 'sym6'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x1)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
+// CHECK:      Symbols [
+
+// CHECK:        Symbol {
+// CHECK:          Name: .Lsym8
+
+// CHECK:        Symbol {
+// CHECK:          Name: .Lsym1
+
+// CHECK:        Symbol {
+// CHECK:          Name: sym6
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/undef2.s b/test/MC/ELF/undef2.s
index 6f971c55bf6c..6aa66c05c4bb 100644
--- a/test/MC/ELF/undef2.s
+++ b/test/MC/ELF/undef2.s
@@ -1,10 +1,18 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
 
 // Test that this produces an undefined reference to .Lfoo
 
         je	.Lfoo
 
-// CHECK: ('_symbols', [
-// CHECK:      (('st_name', 0x00000001) # '.Lfoo'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK: (('sh_name', 0x0000001b) # '.strtab'
+// CHECK:       Section {
+// CHECK:         Name: .strtab
+
+// CHECK:       Symbol {
+// CHECK:         Name: .Lfoo
+// CHECK-NEXT:    Value:
+// CHECK-NEXT:    Size:
+// CHECK-NEXT:    Binding: Global
+// CHECK-NEXT:    Type:
+// CHECK-NEXT:    Other:
+// CHECK-NEXT:    Section:
+// CHECK-NEXT:  }
diff --git a/test/MC/ELF/version.s b/test/MC/ELF/version.s
index 31e952afeb40..0bc9c8b7a782 100644
--- a/test/MC/ELF/version.s
+++ b/test/MC/ELF/version.s
@@ -1,17 +1,23 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
 
 .version "1234"
 .version "123"
 
-// CHECK:       (('sh_name', 0x0000000c) # '.note'
-// CHECK-NEXT:   ('sh_type', 0x00000007)
-// CHECK-NEXT:   ('sh_flags', 0x00000000)
-// CHECK-NEXT:   ('sh_addr', 0x00000000)
-// CHECK-NEXT:   ('sh_offset', 0x00000034)
-// CHECK-NEXT:   ('sh_size', 0x00000024)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x00000004)
-// CHECK-NEXT:   ('sh_entsize', 0x00000000)
-// CHECK-NEXT:   ('_section_data', '05000000 00000000 01000000 31323334 00000000 04000000 00000000 01000000 31323300')
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Name: .note
+// CHECK-NEXT:     Type: SHT_NOTE
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x34
+// CHECK-NEXT:     Size: 36
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 05000000 00000000 01000000 31323334
+// CHECK-NEXT:       0010: 00000000 04000000 00000000 01000000
+// CHECK-NEXT:       0020: 31323300
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/weak-relocation.s b/test/MC/ELF/weak-relocation.s
index 88e841e65bf5..0f5bba2383d7 100644
--- a/test/MC/ELF/weak-relocation.s
+++ b/test/MC/ELF/weak-relocation.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
 
 // Test that weak symbols always produce relocations
 
@@ -7,9 +7,8 @@ foo:
 bar:
         call    foo
 
-//CHECK:        # Relocation 0
-//CHECK-NEXT:   (('r_offset', 0x0000000000000001)
-//CHECK-NEXT:    ('r_sym', 0x00000005)
-//CHECK-NEXT:    ('r_type', 0x00000002)
-//CHECK-NEXT:    ('r_addend', 0xfffffffffffffffc)
-//CHECK-NEXT:   ),
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[0-9]+}}) .text {
+// CHECK-NEXT:     0x1 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/weak.s b/test/MC/ELF/weak.s
index 07a83913631a..2ed3eb7b2bd4 100644
--- a/test/MC/ELF/weak.s
+++ b/test/MC/ELF/weak.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that this produces a weak undefined symbol.
 
@@ -9,22 +9,22 @@
         .weak bar
 bar:
 
-//CHECK:        # Symbol 4
-//CHECK-NEXT:   (('st_name', 0x00000005) # 'bar'
-//CHECK-NEXT:    ('st_bind', 0x2)
-//CHECK-NEXT:    ('st_type', 0x0)
-//CHECK-NEXT:    ('st_other', 0x00)
-//CHECK-NEXT:    ('st_shndx', 0x0001)
-//CHECK-NEXT:    ('st_value', 0x0000000000000004)
-//CHECK-NEXT:    ('st_size', 0x0000000000000000)
-//CHECK-NEXT:   ),
-//CHECK-NEXT:   # Symbol 5
-//CHECK:       (('st_name', 0x00000001) # 'foo'
-//CHECK-NEXT:   ('st_bind', 0x2)
-//CHECK-NEXT:   ('st_type', 0x0)
-//CHECK-NEXT:   ('st_other', 0x00)
-//CHECK-NEXT:   ('st_shndx', 0x0000)
-//CHECK-NEXT:   ('st_value', 0x0000000000000000)
-//CHECK-NEXT:   ('st_size', 0x0000000000000000)
-//CHECK-NEXT:  ),
-//CHECK-NEXT: ])
+// CHECK:        Symbol {
+// CHECK:          Name: bar
+// CHECK-NEXT:     Value: 0x4
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:          Name: foo
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
diff --git a/test/MC/ELF/weakref-plt.s b/test/MC/ELF/weakref-plt.s
index 2e500935b6db..d6486dcd5283 100644
--- a/test/MC/ELF/weakref-plt.s
+++ b/test/MC/ELF/weakref-plt.s
@@ -1,8 +1,14 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 	.weakref	bar,foo
 	call	bar@PLT
 
-// CHECK:      # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:  ('st_bind', 0x2)
+// CHECK:        Symbol {
+// CHECK:          Name: foo
+// CHECK-NEXT:     Value:
+// CHECK-NEXT:     Size:
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type:
+// CHECK-NEXT:     Other:
+// CHECK-NEXT:     Section:
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/weakref-reloc.s b/test/MC/ELF/weakref-reloc.s
index 4bbf2645a8cd..48bda8748fde 100644
--- a/test/MC/ELF/weakref-reloc.s
+++ b/test/MC/ELF/weakref-reloc.s
@@ -1,49 +1,44 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
 // Test that the relocations point to the correct symbols. We used to get the
 // symbol index wrong for weakrefs when creating _GLOBAL_OFFSET_TABLE_.
 
-	.weakref	bar,foo
+	.weakref bar,foo
         call    zed@PLT
-	call	bar
+     call	bar
 
-// CHECK:      # Relocation 0
-// CHECK-NEXT: (('r_offset', 0x0000000000000001)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000004)
-// CHECK-NEXT:  ('r_addend', 0xfffffffffffffffc)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 1
-// CHECK-NEXT: (('r_offset', 0x0000000000000006)
-// CHECK-NEXT:  ('r_sym', 0x00000005)
-// CHECK-NEXT:  ('r_type', 0x00000002)
-// CHECK-NEXT:  ('r_addend', 0xfffffffffffffffc)
-// CHECK-NEXT: ),
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[0-9]+}}) {{[^ ]+}} {
+// CHECK-NEXT:     0x1 R_X86_64_PLT32 zed 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:     0x6 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
-// CHECK:      # Symbol 4
-// CHECK-NEXT: (('st_name', 0x00000009) # '_GLOBAL_OFFSET_TABLE_'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:  ('st_bind', 0x2)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000005) # 'zed'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:      Symbols [
+// CHECK:        Symbol {
+// CHECK:          Name: _GLOBAL_OFFSET_TABLE_ (9)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: zed (5)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/weakref.s b/test/MC/ELF/weakref.s
index e12d2c74eb8a..87173641b56f 100644
--- a/test/MC/ELF/weakref.s
+++ b/test/MC/ELF/weakref.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // This is a long test that checks that the aliases created by weakref are
 // never in the symbol table and that the only case it causes a symbol to
@@ -69,166 +69,158 @@ bar15:
         .long bar15
         .long foo15
 
-// CHECK:       # Symbol 0
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 1
-// CHECK-NEXT:  (('st_name', 0x00000015) # 'bar6'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000018)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 2
-// CHECK-NEXT:  (('st_name', 0x0000001a) # 'bar7'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000018)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 3
-// CHECK-NEXT:  (('st_name', 0x0000001f) # 'bar8'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x000000000000001c)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 4
-// CHECK-NEXT:  (('st_name', 0x00000024) # 'bar9'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000020)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 5
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x3)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 6
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x3)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0003)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x3)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0004)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 8
-// CHECK-NEXT:  (('st_name', 0x00000029) # 'bar10'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000028)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 9
-// CHECK-NEXT:  (('st_name', 0x0000002f) # 'bar11'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000030)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 10
-// CHECK-NEXT:  (('st_name', 0x00000035) # 'bar12'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000030)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 11
-// CHECK-NEXT:  (('st_name', 0x0000003b) # 'bar13'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000034)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 12
-// CHECK-NEXT:  (('st_name', 0x00000041) # 'bar14'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000038)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 13
-// CHECK-NEXT:  (('st_name', 0x00000047) # 'bar15'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000040)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 14
-// CHECK-NEXT:  (('st_name', 0x00000001) # 'bar2'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 15
-// CHECK-NEXT:  (('st_name', 0x00000006) # 'bar3'
-// CHECK-NEXT:   ('st_bind', 0x2)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 16
-// CHECK-NEXT:  (('st_name', 0x0000000b) # 'bar4'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 17
-// CHECK-NEXT:  (('st_name', 0x00000010) # 'bar5'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT: ])
+// CHECK:      Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar6 (21)
+// CHECK-NEXT:     Value: 0x18
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar7 (26)
+// CHECK-NEXT:     Value: 0x18
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar8 (31)
+// CHECK-NEXT:     Value: 0x1C
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar9 (36)
+// CHECK-NEXT:     Value: 0x20
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .data (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .data (0x3)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .bss (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar10 (41)
+// CHECK-NEXT:     Value: 0x28
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar11 (47)
+// CHECK-NEXT:     Value: 0x30
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar12 (53)
+// CHECK-NEXT:     Value: 0x30
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar13 (59)
+// CHECK-NEXT:     Value: 0x34
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar14 (65)
+// CHECK-NEXT:     Value: 0x38
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar15 (71)
+// CHECK-NEXT:     Value: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar2 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar3 (6)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar4 (11)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar5 (16)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/x86_64-reloc-sizetest.s b/test/MC/ELF/x86_64-reloc-sizetest.s
index acca2f5076a5..bd67ee0f9de8 100644
--- a/test/MC/ELF/x86_64-reloc-sizetest.s
+++ b/test/MC/ELF/x86_64-reloc-sizetest.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-linux-gnu -filetype=obj %s | elf-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-linux-gnu -filetype=obj %s | llvm-readobj -r | FileCheck %s
 
 // Tests that relocation value fits in the provided size
 // Original bug http://llvm.org/bugs/show_bug.cgi?id=10568
@@ -6,8 +6,8 @@
 L: movq $(L + 2147483648),%rax
 
 
-// CHECK:          Relocation 0
-// CHECK-NEXT:     ('r_offset', 0x0000000000000003)
-// CHECK-NEXT:     ('r_sym'
-// CHECK-NEXT:     ('r_type', 0x0000000b)
-// CHECK-NEXT:     ('r_addend', 0x0000000080000000
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[0-9]+}}) .text {
+// CHECK-NEXT:     0x3 R_X86_64_32S {{[^ ]+}} 0x80000000
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/zero.s b/test/MC/ELF/zero.s
index 46ffe17cfbf7..be92eb839e1c 100644
--- a/test/MC/ELF/zero.s
+++ b/test/MC/ELF/zero.s
@@ -1,16 +1,23 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 .zero 4
 .zero 1,42
 
-// CHECK: ('sh_name', 0x00000001) # '.text'
-// CHECK: ('sh_type', 0x00000001)
-// CHECK: ('sh_flags', 0x0000000000000006)
-// CHECK: ('sh_addr', 0x0000000000000000)
-// CHECK: ('sh_offset', 0x0000000000000040)
-// CHECK: ('sh_size', 0x0000000000000005)
-// CHECK: ('sh_link', 0x00000000)
-// CHECK: ('sh_info', 0x00000000)
-// CHECK: ('sh_addralign', 0x0000000000000004)
-// CHECK: ('sh_entsize', 0x0000000000000000)
-// CHECK: ('_section_data', '00000000 2a')
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 5
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00000000 2A
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/Mips/ef_frame.ll b/test/MC/Mips/ef_frame.ll
deleted file mode 100644
index 91c8b43d02c5..000000000000
--- a/test/MC/Mips/ef_frame.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; This tests .eh_frame CIE descriptor for the.
-; Data alignment factor
-
-; RUN: llc -filetype=obj -mcpu=mips64r2 -mattr=n64 -march=mips64el %s -o - \
-; RUN: | llvm-objdump -s - | FileCheck %s
-
-; N64
-; CHECK: Contents of section .eh_frame:
-; CHECK-NEXT:  0000 1c000000 00000000 017a504c 52000178  .........zPLR..x
-; CHECK-NEXT:  0010 1f0b0000 00000000 00000000 000c1d00  ................
-; CHECK-NEXT:  0020 2c000000 24000000 00000000 00000000  ,...$...........
-; CHECK-NEXT:  0030 7c000000 00000000 08000000 00000000  |...............
-; CHECK-NEXT:  0040 00440e10 489f019c 02000000 00000000  .D..H...........
-
-; ModuleID = 'simple_throw.cpp'
-
-@_ZTIi = external constant i8*
-@str = private unnamed_addr constant [7 x i8] c"All ok\00"
-
-define i32 @main() {
-entry:
-  %exception.i = tail call i8* @__cxa_allocate_exception(i64 4) nounwind
-  %0 = bitcast i8* %exception.i to i32*
-  store i32 5, i32* %0, align 4
-  invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
-          to label %.noexc unwind label %return
-
-.noexc:                                           ; preds = %entry
-  unreachable
-
-return:                                           ; preds = %entry
-  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  %2 = extractvalue { i8*, i32 } %1, 0
-  %3 = tail call i8* @__cxa_begin_catch(i8* %2) nounwind
-  %puts = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str, i64 0, i64 0))
-  tail call void @__cxa_end_catch()
-  ret i32 0
-}
-
-declare i32 @__gxx_personality_v0(...)
-
-declare i8* @__cxa_begin_catch(i8*)
-
-declare void @__cxa_end_catch()
-
-declare i8* @__cxa_allocate_exception(i64)
-
-declare void @__cxa_throw(i8*, i8*, i8*)
-
-declare i32 @puts(i8* nocapture) nounwind
-
diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s
new file mode 100644
index 000000000000..93ff0b8bd277
--- /dev/null
+++ b/test/MC/Mips/eh-frame.s
@@ -0,0 +1,167 @@
+// Test the bits of .eh_frame on mips that are already implemented correctly.
+
+// FIXME: This test would be a lot cleaner if llvm-objdump had the
+// --dwarf=frames option.
+
+// RUN: llvm-mc -filetype=obj %s -o %t.o -arch=mips
+// RUN: llvm-objdump -r -s %t.o | FileCheck --check-prefix=MIPS32 %s
+
+// RUN: llvm-mc -filetype=obj %s -o %t.o -arch=mipsel
+// RUN: llvm-objdump -r -s %t.o | FileCheck --check-prefix=MIPS32EL %s
+
+// RUN: llvm-mc -filetype=obj %s -o %t.o -arch=mips64
+// RUN: llvm-objdump -r -s %t.o | FileCheck --check-prefix=MIPS64 %s
+
+// RUN: llvm-mc -filetype=obj %s -o %t.o -arch=mips64el
+// RUN: llvm-objdump -r -s %t.o | FileCheck --check-prefix=MIPS64EL %s
+
+func:
+	.cfi_startproc
+	.cfi_endproc
+
+// MIPS32: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS32-NEXT: R_MIPS_32
+// MIPS32: Contents of section .eh_frame:
+// MIPS32-NEXT: 0000
+
+// Length
+// MIPS32: 00000010
+
+// CIE ID
+// MIPS32: 00000000
+
+// Version
+// MIPS32: 01
+
+// Augmentation String
+// MIPS32: 7a5200
+
+// Code Alignment Factor
+// MIPS32: 01
+
+// Data Alignment Factor (-4)
+// MIPS32: 7c
+
+// Return Address Register
+// MIPS32: 1f
+
+// Augmentation Size
+// MIPS32: 01
+
+// MIPS32: .........zR..|..
+// MIPS32-NEXT: 0010
+
+// Augmentation (fde pointer encoding: DW_EH_PE_sdata4)
+// MIPS32: 0b
+// FIXME: The instructions are different from the ones produces by gas.
+
+// MIPS32EL: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS32EL-NEXT: R_MIPS_32
+// MIPS32EL: Contents of section .eh_frame:
+// MIPS32EL-NEXT: 0000
+
+// Length
+// MIPS32EL: 10000000
+
+// CIE ID
+// MIPS32EL: 00000000
+
+// Version
+// MIPS32EL: 01
+
+// Augmentation String
+// MIPS32EL: 7a5200
+
+// Code Alignment Factor
+// MIPS32EL: 01
+
+// Data Alignment Factor (-4)
+// MIPS32EL: 7c
+
+// Return Address Register
+// MIPS32EL: 1f
+
+// Augmentation Size
+// MIPS32EL: 01
+
+// MIPS32EL: .........zR..|..
+// MIPS32EL-NEXT: 0010
+
+// Augmentation (fde pointer encoding: DW_EH_PE_sdata4)
+// MIPS32EL: 0b
+// FIXME: The instructions are different from the ones produces by gas.
+
+// MIPS64: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS64-NEXT: R_MIPS_64
+// MIPS64: Contents of section .eh_frame:
+// MIPS64-NEXT: 0000
+
+// Length
+// MIPS64: 00000010
+
+// CIE ID
+// MIPS64: 00000000
+
+// Version
+// MIPS64: 01
+
+// Augmentation String
+// MIPS64: 7a5200
+
+// Code Alignment Factor
+// MIPS64: 01
+
+// Data Alignment Factor (-8). GAS uses -4. Should be ok as long as all
+// offsets we need are a multiple of 8.
+// MIPS64: 78
+
+// Return Address Register
+// MIPS64: 1f
+
+// Augmentation Size
+// MIPS64: 01
+
+// MIPS64: .........zR..x..
+// MIPS64-NEXT: 0010
+
+// Augmentation (fde pointer encoding: DW_EH_PE_sdata8)
+// MIPS64: 0c
+// FIXME: The instructions are different from the ones produces by gas.
+
+
+// MIPS64EL: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS64EL-NEXT: R_MIPS_64
+// MIPS64EL: Contents of section .eh_frame:
+// MIPS64EL-NEXT: 0000
+
+// Length
+// MIPS64EL: 10000000
+
+// CIE ID
+// MIPS64EL: 00000000
+
+// Version
+// MIPS64EL: 01
+
+// Augmentation String
+// MIPS64EL: 7a5200
+
+// Code Alignment Factor
+// MIPS64EL: 01
+
+// Data Alignment Factor (-8). GAS uses -4. Should be ok as long as all
+// offsets we need are a multiple of 8.
+// MIPS64EL: 78
+
+// Return Address Register
+// MIPS64EL: 1f
+
+// Augmentation Size
+// MIPS64EL: 01
+
+// MIPS64EL: .........zR..x..
+// MIPS64EL-NEXT: 0010
+
+// Augmentation (fde pointer encoding: DW_EH_PE_sdata8)
+// MIPS64EL: 0c
+// FIXME: The instructions are different from the ones produces by gas.
diff --git a/test/MC/Mips/elf-N64.ll b/test/MC/Mips/elf-N64.ll
index ae6de78d6552..a1ea34a80a71 100644
--- a/test/MC/Mips/elf-N64.ll
+++ b/test/MC/Mips/elf-N64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 -disable-mips-delay-filler %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 -disable-mips-delay-filler %s -o - | llvm-readobj -r | FileCheck %s
 
 ; Check for N64 relocation production.
 ;
@@ -12,25 +12,12 @@ define i32 @main() nounwind {
 entry:
 ; Check that the appropriate relocations were created.
 
-; R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16
-; CHECK:     ('r_type3', 0x05)
-; CHECK-NEXT:     ('r_type2', 0x18)
-; CHECK-NEXT:     ('r_type', 0x07)
-
-; R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16
-; CHECK:     ('r_type3', 0x06)
-; CHECK-NEXT:     ('r_type2', 0x18)
-; CHECK-NEXT:     ('r_type', 0x07)
-
-; R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE
-; CHECK:     ('r_type3', 0x00)
-; CHECK-NEXT:     ('r_type2', 0x00)
-; CHECK-NEXT:     ('r_type', 0x14)
-
-; R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE
-; CHECK:     ('r_type3', 0x00)
-; CHECK-NEXT:     ('r_type2', 0x00)
-; CHECK-NEXT:     ('r_type', 0x15)
+; CHECK: Relocations [
+; CHECK:   0x{{[0-9,A-F]+}} R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16
+; CHECK:   0x{{[0-9,A-F]+}} R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16
+; CHECK:   0x{{[0-9,A-F]+}} R_MIPS_GOT_PAGE/R_MIPS_NONE/R_MIPS_NONE
+; CHECK:   0x{{[0-9,A-F]+}} R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE
+; CHECK: ]
 
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0))
   ret i32 0
diff --git a/test/MC/Mips/elf-bigendian.ll b/test/MC/Mips/elf-bigendian.ll
index 7111debd11b1..a92fe339f88b 100644
--- a/test/MC/Mips/elf-bigendian.ll
+++ b/test/MC/Mips/elf-bigendian.ll
@@ -1,24 +1,37 @@
-; DISABLE: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; DISABLE: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | llvm-readobj -h -s -sd | FileCheck %s
 ; RUN: false
 ; XFAIL: *
 
 ; Check that this is big endian.
-; CHECK: ('e_indent[EI_DATA]', 0x02)
+; CHECK: ElfHeader {
+; CHECK:   Ident {
+; CHECK:     DataEncoding: BigEndian
+; CHECK:   }
+; CHECK: }
 
 ; Make sure that a section table (text) entry is correct.
-; CHECK:   (('sh_name', 0x{{[0]*}}5) # '.text'
-; CHECK-NEXT:   ('sh_type', 0x{{[0]*}}1)
-; CHECK-NEXT:   ('sh_flags', 0x{{[0]*}}6)
-; CHECK-NEXT:   ('sh_addr', 0x{{[0-9,a-f]+}})
-; CHECK-NEXT:   ('sh_offset', 0x{{[0-9,a-f]+}})
-; CHECK-NEXT:   ('sh_size', 0x{{[0-9,a-f]+}})
-; CHECK-NEXT:   ('sh_link', 0x{{[0]+}})
-; CHECK-NEXT:   ('sh_info', 0x{{[0]+}})
-; CHECK-NEXT:   ('sh_addralign', 0x{{[0]*}}4)
-; CHECK-NEXT:   ('sh_entsize', 0x{{[0]+}})
+; CHECK:      Sections [
+; CHECK:        Section {
+; CHECK:          Index:
+; CHECK:          Name: .text
+; CHECK-NEXT:     Type: SHT_PROGBITS
+; CHECK-NEXT:     Flags [ (0x6)
+; CHECK-NEXT:       SHF_ALLOC
+; CHECK-NEXT:       SHF_EXECINSTR
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Address: 0x{{[0-9,A-F]+}}
+; CHECK-NEXT:     Offset: 0x{{[0-9,A-F]+}}
+; CHECK-NEXT:     Size: {{[0-9]+}}
+; CHECK-NEXT:     Link: 0
+; CHECK-NEXT:     Info: 0
+; CHECK-NEXT:     AddressAlignment: 4
+; CHECK-NEXT:     EntrySize: 0
 
 ; See that at least first 3 instructions are correct: GP prologue
-; CHECK-NEXT:   ('_section_data', '3c1c0000 279c0000 0399e021 {{[0-9,a-f, ]*}}')
+; CHECK-NEXT:     SectionData (
+; CHECK-NEXT:       0000: 3C1C0000 279C0000 0399E021 {{[0-9,A-F, ]*}}
+; CHECK:          )
+; CHECK:   }
 
 ; ModuleID = '../br1.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
diff --git a/test/MC/Mips/elf-gprel-32-64.ll b/test/MC/Mips/elf-gprel-32-64.ll
index b94682214df7..47003fa1e752 100644
--- a/test/MC/Mips/elf-gprel-32-64.ll
+++ b/test/MC/Mips/elf-gprel-32-64.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - \
-; RUN: | elf-dump --dump-section-data \
+; RUN: | llvm-readobj -r \
 ; RUN: | FileCheck %s
 
 define i32 @test(i32 %c) nounwind {
@@ -30,8 +30,11 @@ return:
 ; Check that the appropriate relocations were created.
 
 ; R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
-; CHECK: (('sh_name', 0x{{[a-z0-9]+}}) # '.rela.rodata'
-; CHECK:      ('r_type3', 0x00)
-; CHECK-NEXT: ('r_type2', 0x12)
-; CHECK-NEXT: ('r_type', 0x0c)
-
+; CHECK:      Relocations [
+; CHECK:        Section ({{[a-z0-9]+}}) .rodata {
+; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
diff --git a/test/MC/Mips/elf-reginfo.ll b/test/MC/Mips/elf-reginfo.ll
index 1d7a18866c50..a255af931d83 100644
--- a/test/MC/Mips/elf-reginfo.ll
+++ b/test/MC/Mips/elf-reginfo.ll
@@ -1,7 +1,7 @@
  ; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - \
- ; RUN: | elf-dump --dump-section-data  | FileCheck --check-prefix=CHECK_64 %s
+ ; RUN: | llvm-readobj -s | FileCheck --check-prefix=CHECK_64 %s
  ; RUN: llc -filetype=obj -march=mipsel -mcpu=mips32 %s -o - \
- ; RUN: | elf-dump --dump-section-data  | FileCheck --check-prefix=CHECK_32 %s
+ ; RUN: | llvm-readobj -s | FileCheck --check-prefix=CHECK_32 %s
 
 ; Check for register information sections.
 ;
@@ -13,14 +13,18 @@ entry:
 ; Check that the appropriate relocations were created.
 
 ; check for .MIPS.options
-; CHECK_64:      (('sh_name', 0x{{[0-9|a-f]+}}) # '.MIPS.options'
-; CHECK_64-NEXT: ('sh_type', 0x7000000d)
-; CHECK_64-NEXT: ('sh_flags', 0x0000000008000002)
+; CHECK_64:      Sections [
+; CHECK_64:        Section {
+; CHECK_64:          Name: .MIPS.options
+; CHECK_64-NEXT:     Type: SHT_MIPS_OPTIONS
+; CHECK_64-NEXT:     Flags [ (0x8000002)
 
 ; check for .reginfo
-; CHECK_32:      (('sh_name', 0x{{[0-9|a-f]+}}) # '.reginfo'
-; CHECK_32-NEXT: ('sh_type', 0x70000006)
-; CHECK_32-NEXT: ('sh_flags', 0x00000002)
+; CHECK_32:      Sections [
+; CHECK_32:        Section {
+; CHECK_32:          Name: .reginfo
+; CHECK_32-NEXT:     Type:  SHT_MIPS_REGINFO
+; CHECK_32-NEXT:     Flags [ (0x2)
 
 
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0))
@@ -28,4 +32,3 @@ entry:
 
 }
 declare i32 @puts(i8* nocapture) nounwind
-  
diff --git a/test/MC/Mips/elf-relsym.ll b/test/MC/Mips/elf-relsym.ll
index 0f74437ec5d4..6da926273ab7 100644
--- a/test/MC/Mips/elf-relsym.ll
+++ b/test/MC/Mips/elf-relsym.ll
@@ -1,11 +1,21 @@
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | llvm-readobj -t | FileCheck %s
 
 ; Check that the appropriate symbols were created.
 
-; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str'
-; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str1'
-; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_0'
-; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_1'
+; CHECK: Symbols [
+; CHECK:   Symbol {
+; CHECK:     Name: $.str
+; CHECK:   }
+; CHECK:   Symbol {
+; CHECK:     Name: $.str1
+; CHECK:   }
+; CHECK:   Symbol {
+; CHECK:     Name: $CPI0_0
+; CHECK:   }
+; CHECK:   Symbol {
+; CHECK:     Name: $CPI0_1
+; CHECK:   }
+; CHECK: ]
 
 @.str = private unnamed_addr constant [6 x i8] c"abcde\00", align 1
 @gc1 = external global i8*
diff --git a/test/MC/Mips/elf-tls.ll b/test/MC/Mips/elf-tls.ll
index b4183b835779..9f604e00a0a8 100644
--- a/test/MC/Mips/elf-tls.ll
+++ b/test/MC/Mips/elf-tls.ll
@@ -1,10 +1,14 @@
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | llvm-readobj -r | FileCheck %s
 
 ; Check that the appropriate relocations were created.
 
-; CHECK:     ('r_type', 0x2b)
-; CHECK:     ('r_type', 0x2c)
-; CHECK:     ('r_type', 0x2d)
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     R_MIPS_TLS_LDM
+; CHECK:     R_MIPS_TLS_DTPREL_HI16
+; CHECK:     R_MIPS_TLS_DTPREL_LO16
+; CHECK:   }
+; CHECK: ]
 
 @t1 = thread_local global i32 0, align 4
 
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
index ffc3b112e502..6c1e7690126d 100644
--- a/test/MC/Mips/elf_basic.s
+++ b/test/MC/Mips/elf_basic.s
@@ -1,35 +1,41 @@
 // 32 bit big endian
-// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32 %s
+// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32 %s
 // 32 bit little endian
-// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE32 %s
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-LE32 %s
 // 64 bit big endian
-// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | elf-dump --dump-section-data | FileCheck -check-prefix=CHECK-BE64 %s
+// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64 %s
 // 64 bit little endian
-// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | elf-dump --dump-section-data | FileCheck -check-prefix=CHECK-LE64 %s
+// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-LE64 %s
 
 // Check that we produce 32 bit with each endian.
 
-// This is 32 bit.
-// CHECK-BE32: ('e_indent[EI_CLASS]', 0x01)
-// This is big endian.
-// CHECK-BE32: ('e_indent[EI_DATA]', 0x02)
+// CHECK-BE32: ElfHeader {
+// CHECK-BE32:   Ident {
+// CHECK-BE32:     Class: 32-bit
+// CHECK-BE32:     DataEncoding: BigEndian
+// CHECK-BE32:   }
+// CHECK-BE32: }
 
-// This is 32 bit.
-// CHECK-LE32: ('e_indent[EI_CLASS]', 0x01)
-// This is little endian.
-// CHECK-LE32: ('e_indent[EI_DATA]', 0x01)
+// CHECK-LE32: ElfHeader {
+// CHECK-LE32:   Ident {
+// CHECK-LE32:     Class: 32-bit
+// CHECK-LE32:     DataEncoding: LittleEndian
+// CHECK-LE32:   }
+// CHECK-LE32: }
 
 // Check that we produce 64 bit with each endian.
 
-// This is 64 bit.
-// CHECK-BE64: ('e_indent[EI_CLASS]', 0x02)
-// This is big endian.
-// CHECK-BE64: ('e_indent[EI_DATA]', 0x02)
+// CHECK-BE64: ElfHeader {
+// CHECK-BE64:   Ident {
+// CHECK-BE64:     Class: 64-bit
+// CHECK-BE64:     DataEncoding: BigEndian
+// CHECK-BE64:   }
+// CHECK-BE64: }
 
-// This is 64 bit.
-// CHECK-LE64: ('e_indent[EI_CLASS]', 0x02)
-// This is little endian.
-// CHECK-LE64: ('e_indent[EI_DATA]', 0x01)
-
-// Check that we are setting EI_OSABI to ELFOSABI_LINUX.
-// CHECK-LE64: ('e_indent[EI_OSABI]', 0x03)
+// CHECK-LE64: ElfHeader {
+// CHECK-LE64:   Ident {
+// CHECK-LE64:     Class: 64-bit
+// CHECK-LE64:     DataEncoding: LittleEndian
+// CHECK-LE64:     OS/ABI: GNU/Linux
+// CHECK-LE64:   }
+// CHECK-LE64: }
diff --git a/test/MC/Mips/elf_eflags.ll b/test/MC/Mips/elf_eflags.ll
index 315cb812bf77..6d16a42aca2f 100644
--- a/test/MC/Mips/elf_eflags.ll
+++ b/test/MC/Mips/elf_eflags.ll
@@ -13,52 +13,52 @@
 ; EF_MIPS_ARCH_32R2 (0x70000000)
 ; EF_MIPS_ARCH_64R2 (0x80000000)
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64R2 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64R2 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
  
 ; 32(R1) bit with NO_REORDER and static
-; CHECK-BE32: ('e_flags', 0x50001001)
+; CHECK-BE32: Flags [ (0x50001001)
 ;
 ; 32(R1) bit with NO_REORDER and PIC
-; CHECK-BE32_PIC: ('e_flags', 0x50001003)
+; CHECK-BE32_PIC: Flags [ (0x50001003)
 ;
 ; 32R2 bit with NO_REORDER and static
-; CHECK-BE32R2: ('e_flags', 0x70001001)
+; CHECK-BE32R2: Flags [ (0x70001001)
 ;
 ; 32R2 bit with NO_REORDER and PIC
-; CHECK-BE32R2_PIC: ('e_flags', 0x70001003)
+; CHECK-BE32R2_PIC: Flags [ (0x70001003)
 ;
 ; 32R2 bit MICROMIPS with NO_REORDER and static
-; CHECK-BE32R2-MICROMIPS: ('e_flags', 0x72001001)
+; CHECK-BE32R2-MICROMIPS: Flags [ (0x72001001)
 ;
 ; 32R2 bit MICROMIPS with NO_REORDER and PIC
-;CHECK-BE32R2-MICROMIPS_PIC:  ('e_flags', 0x72001003)
+;CHECK-BE32R2-MICROMIPS_PIC: Flags [ (0x72001003)
 ;
 ; 64(R1) bit with NO_REORDER and static
-; CHECK-BE64: ('e_flags', 0x60000001)
+; CHECK-BE64: Flags [ (0x60000001)
 ;
 ; 64(R1) bit with NO_REORDER and PIC
-; CHECK-BE64_PIC: ('e_flags', 0x60000003)
+; CHECK-BE64_PIC: Flags [ (0x60000003)
 ;
 ; 64R2 bit with NO_REORDER and static
-; CHECK-BE64R2: ('e_flags', 0x80000001)
+; CHECK-BE64R2: Flags [ (0x80000001)
 ;
 ; 64R2 bit with NO_REORDER and PIC
-; CHECK-BE64R2_PIC: ('e_flags', 0x80000003)
+; CHECK-BE64R2_PIC: Flags [ (0x80000003)
 ;
 ; 32R2 bit MIPS16 with PIC
-; CHECK-LE32R2-MIPS16: ('e_flags', 0x74001002)
+; CHECK-LE32R2-MIPS16: Flags [ (0x74001002)
  
 define i32 @main() nounwind {
 entry:
diff --git a/test/MC/Mips/elf_st_other.ll b/test/MC/Mips/elf_st_other.ll
index f188ce7ba387..bc56c0033e58 100644
--- a/test/MC/Mips/elf_st_other.ll
+++ b/test/MC/Mips/elf_st_other.ll
@@ -1,13 +1,12 @@
 ; This tests value of ELF st_other field for function symbol table entries.
 ; For microMIPS value should be equal to STO_MIPS_MICROMIPS.
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | llvm-readobj -t | FileCheck %s
 
 define i32 @main() nounwind {
 entry:
   ret i32 0
 }
 
-; CHECK:  'main'
-; CHECK:  ('st_other', 0x80)
-
+; CHECK:     Name: main
+; CHECK:     Other: 128
diff --git a/test/MC/Mips/expr1.s b/test/MC/Mips/expr1.s
new file mode 100644
index 000000000000..67664c1a2ae4
--- /dev/null
+++ b/test/MC/Mips/expr1.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# Check that the assembler can handle the expressions as operands.
+# CHECK:  .text
+# CHECK:  .globl  foo
+# CHECK:  foo:
+# CHECK:  lw   $4, %lo(foo)($4)    # encoding: [A,A,0x84,0x8c]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  lw   $4, 56($4)          # encoding: [0x38,0x00,0x84,0x8c]
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x08'A',A,0x84,0x8c]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x08'A',A,0x84,0x8c]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x08'A',A,0x84,0x8c]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  .space  64
+
+  .globl  foo
+  .ent  foo
+foo:
+  lw  $4,%lo(foo)($4)
+  lw  $4,((10 + 4) * 4)($4)
+  lw  $4,%lo (2 * 4) + foo($4)
+  lw  $4,%lo((2 * 4) + foo)($4)
+  lw  $4,(((%lo ((2 * 4) + foo))))($4)
+  .space  64
+  .end  foo
diff --git a/test/MC/Mips/higher_highest.ll b/test/MC/Mips/higher_highest.ll
index 0c665220335b..6c3d71f6a4b1 100644
--- a/test/MC/Mips/higher_highest.ll
+++ b/test/MC/Mips/higher_highest.ll
@@ -1,14 +1,16 @@
-; DISABLE: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -filetype=obj < %s -o - | elf-dump --dump-section-data | FileCheck %s
+; DISABLE: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -filetype=obj < %s -o - | llvm-readobj -r | FileCheck %s
 ; RUN: false
 ; XFAIL: *
 ; Disabled because currently we don't have a way to generate these relocations.
 ;
 ; Check that the R_MIPS_HIGHER and R_MIPS_HIGHEST relocations were created.
 
-; CHECK:     ('r_type', 0x1d)
-; CHECK:     ('r_type', 0x1d)
-; CHECK:     ('r_type', 0x1c)
-; CHECK:     ('r_type', 0x1c)
+; CHECK: Relocations [
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HIGHEST
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HIGHEST
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HIGHER
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HIGHER
+; CHECK: ]
 
 @g0 = external global i32
 
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
index 816138ec6544..586e88bc4814 100644
--- a/test/MC/Mips/mips-alu-instructions.s
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for arithmetic and logical instructions.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Logical instructions
 #------------------------------------------------------------------------------
@@ -13,6 +12,7 @@
 # CHECK:  ins    $19, $9, 6, 7   # encoding: [0x84,0x61,0x33,0x7d]
 # CHECK:  nor    $9, $6, $7      # encoding: [0x27,0x48,0xc7,0x00]
 # CHECK:  or     $3, $3, $5      # encoding: [0x25,0x18,0x65,0x00]
+# CHECK:  ori    $4, $5, 17767   # encoding: [0x67,0x45,0xa4,0x34]
 # CHECK:  ori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x34]
 # CHECK:  rotr   $9, $6, 7       # encoding: [0xc2,0x49,0x26,0x00]
 # CHECK:  rotrv  $9, $6, $7      # encoding: [0x46,0x48,0xe6,0x00]
@@ -40,6 +40,7 @@
      ins    $19, $9, 6,7
      nor    $9,  $6, $7
      or     $3,  $3, $5
+     or     $4,  $5, 17767
      ori    $9,  $6, 17767
      rotr   $9,  $6, 7
      rotrv  $9,  $6, $7
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index cfc15e883a95..1622965a4139 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for macro instructions
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Load immediate instructions
 #------------------------------------------------------------------------------
@@ -16,6 +15,22 @@
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
 # CHECK: addu    $7, $7, $8          # encoding: [0x21,0x38,0xe8,0x00]
+# CHECK: lui     $10, %hi(symbol)        # encoding: [A,A,0x0a,0x3c]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
+# CHECK: lw      $10, %lo(symbol)($10)   # encoding: [A,A,0x4a,0x8d]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: lui     $1, %hi(symbol)         # encoding: [A,A,0x01,0x3c]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
+# CHECK: sw      $10, %lo(symbol)($1)    # encoding: [A,A,0x2a,0xac]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: lui     $10, 10                 # encoding: [0x0a,0x00,0x0a,0x3c]
+# CHECK: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
+# CHECK: lw      $10, 123($10)           # encoding: [0x7b,0x00,0x4a,0x8d]
+# CHECK: lui     $1, 2                   # encoding: [0x02,0x00,0x01,0x3c]
+# CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
+# CHECK: sw      $10, 57920($1)          # encoding: [0x40,0xe2,0x2a,0xac]
 
     li $5,123
     li $6,-2345
@@ -25,3 +40,9 @@
     la $7,65538
     la $a0, 20($a1)
     la $7,65538($8)
+
+    lw  $t2, symbol($a0)
+    sw  $t2, symbol($t1)
+
+    lw  $t2, 655483($a0)
+    sw  $t2, 123456($t1)
diff --git a/test/MC/Mips/mips-fpu-instructions.s b/test/MC/Mips/mips-fpu-instructions.s
index a126c6f7188c..e515872f260a 100644
--- a/test/MC/Mips/mips-fpu-instructions.s
+++ b/test/MC/Mips/mips-fpu-instructions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for FPU instructions.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # FP aritmetic  instructions
 #------------------------------------------------------------------------------
@@ -157,6 +156,8 @@
 # CHECK:  mtc0    $9, $8, 3               # encoding: [0x03,0x40,0x89,0x40]
 # CHECK:  mfc2    $5, $7, 4               # encoding: [0x04,0x38,0x05,0x48]
 # CHECK:  mtc2    $9, $4, 5               # encoding: [0x05,0x20,0x89,0x48]
+# CHECK:  movf    $2, $1, $fcc0           # encoding: [0x01,0x10,0x20,0x00]
+# CHECK:  movt    $2, $1, $fcc0           # encoding: [0x01,0x10,0x21,0x00]
 
    cfc1    $a2,$0
    mfc1    $a2,$f7
@@ -176,3 +177,5 @@
    mtc0    $9, $8, 3
    mfc2    $5, $7, 4
    mtc2    $9, $4, 5
+   movf    $2, $1, $fcc0
+   movt    $2, $1, $fcc0
diff --git a/test/MC/Mips/mips-jump-instructions.s b/test/MC/Mips/mips-jump-instructions.s
index bc2d72039818..3643c27bbbd8 100644
--- a/test/MC/Mips/mips-jump-instructions.s
+++ b/test/MC/Mips/mips-jump-instructions.s
@@ -1,30 +1,33 @@
-# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN: FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for jumps and branches.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Branch instructions
 #------------------------------------------------------------------------------
-# CHECK:   b 1332                 # encoding: [0x34,0x05,0x00,0x10]
+# CHECK:   b 1332                 # encoding: [0x4d,0x01,0x00,0x10]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bc1f 1332              # encoding: [0x34,0x05,0x00,0x45]
+# CHECK:   bc1f 1332              # encoding: [0x4d,0x01,0x00,0x45]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bc1t 1332              # encoding: [0x34,0x05,0x01,0x45]
+# CHECK:   bc1t 1332              # encoding: [0x4d,0x01,0x01,0x45]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   beq $9, $6, 1332       # encoding: [0x34,0x05,0x26,0x11]
+# CHECK:   beq $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x11]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgez $6, 1332          # encoding: [0x34,0x05,0xc1,0x04]
+# CHECK:   bgez $6, 1332          # encoding: [0x4d,0x01,0xc1,0x04]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgezal $6, 1332        # encoding: [0x34,0x05,0xd1,0x04]
+# CHECK:   bgezal $6, 1332        # encoding: [0x4d,0x01,0xd1,0x04]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgtz $6, 1332          # encoding: [0x34,0x05,0xc0,0x1c]
+# CHECK:   bgtz $6, 1332          # encoding: [0x4d,0x01,0xc0,0x1c]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   blez $6, 1332          # encoding: [0x34,0x05,0xc0,0x18]
+# CHECK:   blez $6, 1332          # encoding: [0x4d,0x01,0xc0,0x18]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bne $9, $6, 1332       # encoding: [0x34,0x05,0x26,0x15]
+# CHECK:   bne $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x15]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bal     1332           # encoding: [0x34,0x05,0x11,0x04]
+# CHECK:   bal     1332           # encoding: [0x4d,0x01,0x11,0x04]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+
+.set noreorder
+
          b 1332
          nop
          bc1f 1332
@@ -50,9 +53,9 @@ end_of_code:
 #------------------------------------------------------------------------------
 # Jump instructions
 #------------------------------------------------------------------------------
-# CHECK:   j 1328               # encoding: [0x30,0x05,0x00,0x08]
+# CHECK:   j 1328               # encoding: [0x4c,0x01,0x00,0x08]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jal 1328             # encoding: [0x30,0x05,0x00,0x0c]
+# CHECK:   jal 1328             # encoding: [0x4c,0x01,0x00,0x0c]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK:   jalr $6              # encoding: [0x09,0xf8,0xc0,0x00]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
@@ -63,6 +66,11 @@ end_of_code:
 # CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr  $25            # encoding: [0x09,0xf8,0x20,0x03]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr  $4, $25        # encoding: [0x09,0x20,0x20,0x03]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 
 
    j 1328
@@ -78,3 +86,8 @@ end_of_code:
    jr $7
    nop
    j $7
+   nop
+   jal  $25
+   nop
+   jal  $4,$25
+   nop
diff --git a/test/MC/Mips/mips-memory-instructions.s b/test/MC/Mips/mips-memory-instructions.s
index b5f1267ef386..c8b055906ebb 100644
--- a/test/MC/Mips/mips-memory-instructions.s
+++ b/test/MC/Mips/mips-memory-instructions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for loads and stores.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Memory store instructions
 #------------------------------------------------------------------------------
diff --git a/test/MC/Mips/mips-relocations.s b/test/MC/Mips/mips-relocations.s
index ff71c7559cd0..6f095d1ecdff 100644
--- a/test/MC/Mips/mips-relocations.s
+++ b/test/MC/Mips/mips-relocations.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for relocations.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 # CHECK:  lui   $2, %hi(_gp_disp)     # encoding: [A,A,0x02,0x3c]
 # CHECK:                              #   fixup A - offset: 0, value: _gp_disp@ABS_HI, kind: fixup_Mips_HI16
 # CHECK:  addiu $2, $2, %lo(_gp_disp) # encoding: [A,A,0x42,0x24]
diff --git a/test/MC/Mips/mips64-alu-instructions.s b/test/MC/Mips/mips64-alu-instructions.s
index 1b4ebdfbbd49..db6c972b3b49 100644
--- a/test/MC/Mips/mips64-alu-instructions.s
+++ b/test/MC/Mips/mips64-alu-instructions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for arithmetic and logical instructions.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Logical instructions
 #------------------------------------------------------------------------------
@@ -13,6 +12,7 @@
 # CHECK:  ins    $19, $9, 6, 7   # encoding: [0x84,0x61,0x33,0x7d]
 # CHECK:  nor    $9, $6, $7      # encoding: [0x27,0x48,0xc7,0x00]
 # CHECK:  or     $3, $3, $5      # encoding: [0x25,0x18,0x65,0x00]
+# CHECK:  ori    $4, $5, 17767   # encoding: [0x67,0x45,0xa4,0x34]
 # CHECK:  ori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x34]
 # CHECK:  rotr   $9, $6, 7       # encoding: [0xc2,0x49,0x26,0x00]
 # CHECK:  rotrv  $9, $6, $7      # encoding: [0x46,0x48,0xe6,0x00]
@@ -40,6 +40,7 @@
      ins    $19, $9, 6,7
      nor    $9,  $6, $7
      or     $3,  $3, $5
+     or     $4,  $5, 17767
      ori    $9,  $6, 17767
      rotr   $9,  $6, 7
      rotrv  $9,  $6, $7
diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s
index 65d584dfa8aa..df7e64563371 100644
--- a/test/MC/Mips/mips_directives.s
+++ b/test/MC/Mips/mips_directives.s
@@ -17,3 +17,9 @@ $JTI0_0:
     .set macro
     .set reorder
     .set  at=$a0
+    .set STORE_MASK,$t7
+    .set FPU_MASK,$f7
+#CHECK:    abs.s   $f6, $f7           # encoding: [0x46,0x00,0x39,0x85]
+#CHECK:    and     $3, $15, $15       # encoding: [0x01,0xef,0x18,0x24]
+    abs.s      $f6,FPU_MASK
+    and $3,$t7,STORE_MASK
diff --git a/test/MC/Mips/nabi-regs.s b/test/MC/Mips/nabi-regs.s
index 9371208a2a9e..050fb8134880 100644
--- a/test/MC/Mips/nabi-regs.s
+++ b/test/MC/Mips/nabi-regs.s
@@ -8,7 +8,6 @@
 # RUN: -mcpu=mips64r2 -arch=mips64 | \
 # RUN: FileCheck %s
 
-# CHECK: .section    __TEXT,__text,regular,pure_instructions
     .text
 foo:
 
diff --git a/test/MC/Mips/r-mips-got-disp.ll b/test/MC/Mips/r-mips-got-disp.ll
index 73396ac5139d..7e78a46649d8 100644
--- a/test/MC/Mips/r-mips-got-disp.ll
+++ b/test/MC/Mips/r-mips-got-disp.ll
@@ -1,8 +1,9 @@
-; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s -o - | llvm-readobj -r | FileCheck %s
 
 ; Check that the R_MIPS_GOT_DISP relocations were created.
 
-; CHECK:     ('r_type', 0x13)
+; CHECK: Relocations [
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_DISP
 
 @shl = global i64 1, align 8
 @.str = private unnamed_addr constant [8 x i8] c"0x%llx\0A\00", align 1
diff --git a/test/MC/Mips/set-at-directive.s b/test/MC/Mips/set-at-directive.s
index 98a3a35b5428..828175a223a7 100644
--- a/test/MC/Mips/set-at-directive.s
+++ b/test/MC/Mips/set-at-directive.s
@@ -3,7 +3,6 @@
 # Check that the assembler can handle the documented syntax
 # for ".set at" and set the correct value.
 
-# CHECK: .section __TEXT,__text,regular,pure_instructions
     .text
 foo:
 # CHECK:   jr    $1                      # encoding: [0x08,0x00,0x20,0x00]
diff --git a/test/MC/Mips/sym-offset.ll b/test/MC/Mips/sym-offset.ll
index 5162c913ad06..c7450f74ae41 100644
--- a/test/MC/Mips/sym-offset.ll
+++ b/test/MC/Mips/sym-offset.ll
@@ -1,4 +1,4 @@
-; DISABLED: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; DISABLED: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | llvm-readobj -s -sd | FileCheck %s
 ; RUN: false
 ; XFAIL: *
 
@@ -13,7 +13,9 @@ entry:
 ; 8841000e        lwl     at,14(v0)
 ; 9841000b        lwr     at,11(v0)
 
-; CHECK: ('_section_data', '00001c3c 00009c27 21e09903 0000828f 0e004188 0b004198
+; CHECK:          SectionData (
+; CHECK:            0000: 00001C3C 00009C27 21E09903 0000828F
+; CHECK-NEXT:       0010: 0E004188 0B004198
 
   %call = tail call i32 @memcmp(i8* getelementptr inbounds ([11 x i8]* @string1, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8]* @string2, i32 0, i32 0), i32 4) nounwind readonly
   %cmp = icmp eq i32 %call, 0
diff --git a/test/MC/Mips/xgot.ll b/test/MC/Mips/xgot.ll
index bfe9b9ad6604..e2a500ffdeea 100644
--- a/test/MC/Mips/xgot.ll
+++ b/test/MC/Mips/xgot.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mxgot %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mxgot %s -o - | llvm-readobj -r | FileCheck %s
 
 @.str = private unnamed_addr constant [16 x i8] c"ext_1=%d, i=%d\0A\00", align 1
 @ext_1 = external global i32
@@ -9,29 +9,16 @@ entry:
 ; Check that the appropriate relocations were created. 
 ; For the xgot case we want to see R_MIPS_[GOT|CALL]_[HI|LO]16.
 
-; R_MIPS_HI16
-; CHECK:     ('r_type', 0x05)
-
-; R_MIPS_LO16
-; CHECK:     ('r_type', 0x06)
-
-; R_MIPS_GOT_HI16
-; CHECK:     ('r_type', 0x16)
-
-; R_MIPS_GOT_LO16
-; CHECK:     ('r_type', 0x17)
-
-; R_MIPS_GOT
-; CHECK:     ('r_type', 0x09)
-
-; R_MIPS_LO16
-; CHECK:     ('r_type', 0x06)
-
-; R_MIPS_CALL_HI16
-; CHECK:     ('r_type', 0x1e)
-
-; R_MIPS_CALL_LO16
-; CHECK:     ('r_type', 0x1f)
+; CHECK: Relocations [
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HI16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_HI16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_LO16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_CALL_HI16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_CALL_LO16
+; CHECK: ]
 
   %0 = load i32* @ext_1, align 4
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str, i32 0, i32 0), i32 %0) nounwind
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.ll b/test/MC/PowerPC/ppc64-initial-cfa.ll
index 16236c9c650d..23a77384ecd0 100644
--- a/test/MC/PowerPC/ppc64-initial-cfa.ll
+++ b/test/MC/PowerPC/ppc64-initial-cfa.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=static %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s -check-prefix=STATIC
+; RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=STATIC
 ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s -check-prefix=PIC
+; RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=PIC
 
 ; FIXME: this file should be in .s form, change when asm parser is available.
 
@@ -10,69 +10,75 @@ entry:
   ret void
 }
 
-; STATIC:      ('sh_name', 0x{{.*}}) # '.eh_frame'
-; STATIC-NEXT: ('sh_type', 0x00000001)
-; STATIC-NEXT: ('sh_flags', 0x0000000000000002)
-; STATIC-NEXT: ('sh_addr', 0x{{.*}})
-; STATIC-NEXT: ('sh_offset', 0x{{.*}})
-; STATIC-NEXT: ('sh_size', 0x0000000000000028)
-; STATIC-NEXT: ('sh_link', 0x00000000)
-; STATIC-NEXT: ('sh_info', 0x00000000)
-; STATIC-NEXT: ('sh_addralign', 0x0000000000000008)
-; STATIC-NEXT: ('sh_entsize', 0x0000000000000000)
-; STATIC-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 1b0c0100 00000010 00000018 00000000 00000010 00000000')
+; STATIC:      Section {
+; STATIC:        Name: .eh_frame
+; STATIC-NEXT:   Type: SHT_PROGBITS
+; STATIC-NEXT:   Flags [ (0x2)
+; STATIC-NEXT:     SHF_ALLOC
+; STATIC-NEXT:   ]
+; STATIC-NEXT:   Address:
+; STATIC-NEXT:   Offset:
+; STATIC-NEXT:   Size: 40
+; STATIC-NEXT:   Link: 0
+; STATIC-NEXT:   Info: 0
+; STATIC-NEXT:   AddressAlignment: 8
+; STATIC-NEXT:   EntrySize: 
+; STATIC-NEXT:   Relocations [
+; STATIC-NEXT:     0x1C R_PPC64_REL32 .text 0x0
+; STATIC-NEXT:   ]
+; STATIC-NEXT:   SectionData (
+; STATIC-NEXT:     0000: 00000010 00000000 017A5200 01784101
+; STATIC-NEXT:     0010: 1B0C0100 00000010 00000018 00000000
+; STATIC-NEXT:     0020: 00000010 00000000
+; STATIC-NEXT:   )
+; STATIC-NEXT: }
 
-; STATIC:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
-; STATIC-NEXT: ('sh_type', 0x00000004)
-; STATIC-NEXT: ('sh_flags', 0x0000000000000000)
-; STATIC-NEXT: ('sh_addr', 0x{{.*}})
-; STATIC-NEXT: ('sh_offset', 0x{{.*}})
-; STATIC-NEXT: ('sh_size', 0x0000000000000018)
-; STATIC-NEXT: ('sh_link', 0x{{.*}})
-; STATIC-NEXT: ('sh_info', 0x{{.*}})
-; STATIC-NEXT: ('sh_addralign', 0x0000000000000008)
-; STATIC-NEXT: ('sh_entsize', 0x0000000000000018)
-; STATIC-NEXT: ('_relocations', [
+; STATIC:      Section {
+; STATIC:        Name: .rela.eh_frame
+; STATIC-NEXT:   Type: SHT_RELA
+; STATIC-NEXT:   Flags [ (0x0)
+; STATIC-NEXT:   ]
+; STATIC-NEXT:   Address:
+; STATIC-NEXT:   Offset:
+; STATIC-NEXT:   Size: 24
+; STATIC-NEXT:   Link:
+; STATIC-NEXT:   Info:
+; STATIC-NEXT:   AddressAlignment: 8
+; STATIC-NEXT:   EntrySize: 24
 
-; Static build should create R_PPC64_REL32 relocations
-; STATIC-NEXT:  # Relocation 0
-; STATIC-NEXT:  (('r_offset', 0x000000000000001c)
-; STATIC-NEXT:   ('r_sym', 0x{{.*}})
-; STATIC-NEXT:   ('r_type', 0x0000001a)
-; STATIC-NEXT:   ('r_addend', 0x0000000000000000)
-; STATIC-NEXT:  ),
-; STATIC-NEXT: ])
 
+; PIC:      Section {
+; PIC:        Name: .eh_frame
+; PIC-NEXT:   Type: SHT_PROGBITS
+; PIC-NEXT:   Flags [ (0x2)
+; PIC-NEXT:     SHF_ALLOC
+; PIC-NEXT:   ]
+; PIC-NEXT:   Address:
+; PIC-NEXT:   Offset:
+; PIC-NEXT:   Size: 40
+; PIC-NEXT:   Link: 0
+; PIC-NEXT:   Info: 0
+; PIC-NEXT:   AddressAlignment: 8
+; PIC-NEXT:   EntrySize: 0
+; PIC-NEXT:   Relocations [
+; PIC-NEXT:     0x1C R_PPC64_REL32 .text 0x0
+; PIC-NEXT:   ]
+; PIC-NEXT:   SectionData (
+; PIC-NEXT:     0000: 00000010 00000000 017A5200 01784101
+; PIC-NEXT:     0010: 1B0C0100 00000010 00000018 00000000
+; PIC-NEXT:     0020: 00000010 00000000
+; PIC-NEXT:   )
+; PIC-NEXT: }
 
-; PIC:      ('sh_name', 0x{{.*}}) # '.eh_frame'
-; PIC-NEXT: ('sh_type', 0x00000001)
-; PIC-NEXT: ('sh_flags', 0x0000000000000002)
-; PIC-NEXT: ('sh_addr', 0x{{.*}})
-; PIC-NEXT: ('sh_offset', 0x{{.*}})
-; PIC-NEXT: ('sh_size', 0x0000000000000028)
-; PIC-NEXT: ('sh_link', 0x00000000)
-; PIC-NEXT: ('sh_info', 0x00000000)
-; PIC-NEXT: ('sh_addralign', 0x0000000000000008)
-; PIC-NEXT: ('sh_entsize', 0x0000000000000000)
-; PIC-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 1b0c0100 00000010 00000018 00000000 00000010 00000000')
-
-; PIC:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
-; PIC-NEXT: ('sh_type', 0x00000004)
-; PIC-NEXT: ('sh_flags', 0x0000000000000000)
-; PIC-NEXT: ('sh_addr', 0x{{.*}})
-; PIC-NEXT: ('sh_offset', 0x{{.*}})
-; PIC-NEXT: ('sh_size', 0x0000000000000018)
-; PIC-NEXT: ('sh_link', 0x{{.*}})
-; PIC-NEXT: ('sh_info', 0x{{.*}})
-; PIC-NEXT: ('sh_addralign', 0x0000000000000008)
-; PIC-NEXT: ('sh_entsize', 0x0000000000000018)
-; PIC-NEXT: ('_relocations', [
-
-; PIC build should create R_PPC64_REL32 relocations
-; PIC-NEXT:  # Relocation 0
-; PIC-NEXT:  (('r_offset', 0x000000000000001c)
-; PIC-NEXT:   ('r_sym', 0x{{.*}})
-; PIC-NEXT:   ('r_type', 0x0000001a)
-; PIC-NEXT:   ('r_addend', 0x0000000000000000)
-; PIC-NEXT:  ),
-; PIC-NEXT: ])
+; PIC:      Section {
+; PIC:        Name: .rela.eh_frame
+; PIC-NEXT:   Type: SHT_RELA
+; PIC-NEXT:   Flags [ (0x0)
+; PIC-NEXT:   ]
+; PIC-NEXT:   Address:
+; PIC-NEXT:   Offset:
+; PIC-NEXT:   Size: 24
+; PIC-NEXT:   Link:
+; PIC-NEXT:   Info:
+; PIC-NEXT:   AddressAlignment: 8
+; PIC-NEXT:   EntrySize: 24
diff --git a/test/MC/PowerPC/ppc64-relocs-01.ll b/test/MC/PowerPC/ppc64-relocs-01.ll
index 4919e91400ba..ac8d303dd4cf 100644
--- a/test/MC/PowerPC/ppc64-relocs-01.ll
+++ b/test/MC/PowerPC/ppc64-relocs-01.ll
@@ -1,6 +1,6 @@
 ;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3 -code-model=small  \
 ;; RUN:  -filetype=obj %s -o - | \
-;; RUN: elf-dump --dump-section-data | FileCheck %s
+;; RUN: llvm-readobj -r | FileCheck %s
 
 ;; FIXME: this file need to be in .s form, change when asm parse is done.
 
@@ -22,45 +22,28 @@ entry:
   ret double %add
 }
 
+;; CHECK:      Relocations [
+
 ;; The relocations in .rela.text are the 'number64' load using a
 ;; R_PPC64_TOC16_DS against the .toc and the 'sin' external function
 ;; address using a R_PPC64_REL24
-;; CHECK:       '.rela.text'
-;; CHECK:       Relocation 0
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000006
-;; CHECK-NEXT:  'r_type', 0x0000003f
-;; CHECK:       Relocation 1
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x0000000a
-;; CHECK-NEXT:  'r_type', 0x0000000a
+;; CHECK:        Section ({{[0-9]+}}) .text {
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_DS .toc
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_REL24    sin
+;; CHECK-NEXT:   }
 
 ;; The .opd entry for the 'access_int64' function creates 2 relocations:
 ;; 1. A R_PPC64_ADDR64 against the .text segment plus addend (the function
 ;    address itself);
 ;; 2. And a R_PPC64_TOC against no symbol (the linker will replace for the
 ;;    module's TOC base).
-;; CHECK:       '.rela.opd'
-;; CHECK:       Relocation 0
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000002
-;; CHECK-NEXT:  'r_type', 0x00000026
-;; CHECK:       Relocation 1
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000000
-;; CHECK-NEXT:  'r_type', 0x00000033
+;; CHECK:        Section ({{[0-9]+}}) .opd {
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_ADDR64 .text 0x0
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC - 0x0
 
 ;; Finally the TOC creates the relocation for the 'number64'.
-;; CHECK:       '.rela.toc'
-;; CHECK:       Relocation 0
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000008
-;; CHECK-NEXT:  'r_type', 0x00000026
+;; CHECK:        Section ({{[0-9]+}}) .toc {
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_ADDR64 number64 0x0
+;; CHECK-NEXT:   }
 
-;; Check if the relocation references are for correct symbols.
-;; CHECK:       Symbol 7
-;; CHECK-NEXT:  'access_int64'
-;; CHECK:       Symbol 8
-;; CHECK-NEXT:  'number64'
-;; CHECK:       Symbol 10
-;; CHECK-NEXT:  'sin'
+;; CHECK-NEXT: ]
diff --git a/test/MC/PowerPC/ppc64-tls-relocs-01.ll b/test/MC/PowerPC/ppc64-tls-relocs-01.ll
index 5e3731107522..4e901e816a68 100644
--- a/test/MC/PowerPC/ppc64-tls-relocs-01.ll
+++ b/test/MC/PowerPC/ppc64-tls-relocs-01.ll
@@ -1,5 +1,5 @@
 ;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj %s -o - | \
-;; RUN: elf-dump --dump-section-data | FileCheck %s
+;; RUN: llvm-readobj -r | FileCheck %s
 
 ;; FIXME: this file should be in .s form, change when asm parser is available.
 
@@ -12,17 +12,8 @@ entry:
 
 ;; Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
 ;; against the thread-local symbol 't'.
-;; CHECK:       '.rela.text'
-;; CHECK:       Relocation 0
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000008
-;; CHECK-NEXT:  'r_type', 0x00000048
-;; CHECK:       Relocation 1
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000008
-;; CHECK-NEXT:  'r_type', 0x00000046
-
-;; Check that we got the correct symbol.
-;; CHECK:       Symbol 8
-;; CHECK-NEXT:  't'
-
+;; CHECK:      Relocations [
+;; CHECK:        Section ({{[0-9]+}}) .text {
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_HA t
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_LO t
+;; CHECK-NEXT:   }
diff --git a/test/MC/ELF/fde-reloc.s b/test/MC/X86/fde-reloc.s
similarity index 100%
rename from test/MC/ELF/fde-reloc.s
rename to test/MC/X86/fde-reloc.s
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 8bfa58a4bed8..c49a17e28c4e 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -247,4 +247,78 @@ _main:
     mov [16][eax][ebx*4], ecx
 // CHECK: movl %ecx, -16(%eax,%ebx,4)
     mov [eax][ebx*4 - 16], ecx
-	ret
+
+// CHECK: prefetchnta 12800(%esi)
+    prefetchnta [esi + (200*64)]
+// CHECK: prefetchnta 32(%esi)
+    prefetchnta [esi + (64/2)]
+// CHECK: prefetchnta 128(%esi)
+    prefetchnta [esi + (64/2*4)]
+// CHECK: prefetchnta 8(%esi)
+    prefetchnta [esi + (64/(2*4))]
+// CHECK: prefetchnta 48(%esi)
+    prefetchnta [esi + (64/(2*4)+40)]
+
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - 2*8], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][4*ebx - 2*8], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax + 4*ebx - 2*8], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [12 + eax + (4*ebx) - 2*14], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - 2*2*2*2], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - (2*8)], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - 2 * 8 + 4 - 4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax + ebx*4 - 2 * 8 + 4 - 4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax + ebx*4 - 2 * ((8 + 4) - 4)], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [-2 * ((8 + 4) - 4) + eax + ebx*4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [((-2) * ((8 + 4) - 4)) + eax + ebx*4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax + ((-2) * ((8 + 4) - 4)) + ebx*4], ecx
+// CHECK: movl %ecx, 96(%eax,%ebx,4)
+    mov [eax + ((-2) * ((8 + 4) * -4)) + ebx*4], ecx
+// CHECK: movl %ecx, -8(%eax,%ebx,4)
+    mov [eax][-8][ebx*4], ecx
+// CHECK: movl %ecx, -2(%eax,%ebx,4)
+    mov [eax][16/-8][ebx*4], ecx
+// CHECK: movl %ecx, -2(%eax,%ebx,4)
+    mov [eax][(16)/-8][ebx*4], ecx
+
+// CHECK: setb %al
+    setc al
+// CHECK: sete %al
+    setz al
+// CHECK: setbe %al
+    setna al
+// CHECK: setae %al
+    setnb al
+// CHECK: setae %al
+    setnc al
+// CHECK: setle %al
+    setng al
+// CHECK: setge %al
+    setnl al
+// CHECK: setne %al
+    setnz al
+// CHECK: setp %al
+    setpe al
+// CHECK: setnp %al
+    setpo al
+// CHECK: setb %al
+    setnae al
+// CHECK: seta %al
+    setnbe al
+// CHECK: setl %al
+    setnge al
+// CHECK: setg %al
+    setnle al
+
+    ret
diff --git a/test/MC/X86/x86-32-ms-inline-asm.s b/test/MC/X86/x86-32-ms-inline-asm.s
index 5524c706cc5c..d912915c585e 100644
--- a/test/MC/X86/x86-32-ms-inline-asm.s
+++ b/test/MC/X86/x86-32-ms-inline-asm.s
@@ -57,6 +57,26 @@ _t21:                                   ## @t21
 // CHECK: movl 4(%esi,%eax,2), %eax
 // CHECK: # encoding: [0x8b,0x44,0x46,0x04]
 
+	mov eax, 4[esi + 2*eax + 4]
+// CHECK: movl 8(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x08]
+	mov eax, 4[esi][2*eax + 4]
+// CHECK: movl 8(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x08]
+	mov eax, 4[esi + 2*eax][4]
+// CHECK: movl 8(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x08]
+	mov eax, 4[esi][2*eax][4]
+// CHECK: movl 8(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x08]
+	mov eax, 4[esi][2*eax][4][8]
+// CHECK: movl 16(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x10]
+
+    prefetchnta 64[eax]
+// CHECK: prefetchnta 64(%eax)
+// CHECK: # encoding: [0x0f,0x18,0x40,0x40]
+        
     pusha
 // CHECK: pushal
 // CHECK: # encoding: [0x60]
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index c5f1d15f8ff0..521a0776d8af 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -1228,3 +1228,11 @@ sysexitl
 // CHECK: sysexitq
 // CHECK: encoding: [0x48,0x0f,0x35]
 sysexitq
+
+// CHECK: clac
+// CHECK: encoding: [0x0f,0x01,0xca]
+clac
+
+// CHECK: stac
+// CHECK: encoding: [0x0f,0x01,0xcb]
+stac
diff --git a/test/MC/X86/x86_64-rand-encoding.s b/test/MC/X86/x86_64-rand-encoding.s
new file mode 100644
index 000000000000..3a8cb817bc1a
--- /dev/null
+++ b/test/MC/X86/x86_64-rand-encoding.s
@@ -0,0 +1,49 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdrandw %ax
+// CHECK: encoding: [0x66,0x0f,0xc7,0xf0]
+          rdrand %ax
+
+// CHECK: rdrandl %eax
+// CHECK: encoding: [0x0f,0xc7,0xf0]
+          rdrand %eax
+
+// CHECK: rdrandq %rax
+// CHECK: encoding: [0x48,0x0f,0xc7,0xf0]
+          rdrand %rax
+
+// CHECK: rdrandw %r11w
+// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xf3]
+          rdrand %r11w
+
+// CHECK: rdrandl %r11d
+// CHECK: encoding: [0x41,0x0f,0xc7,0xf3]
+          rdrand %r11d
+
+// CHECK: rdrandq %r11
+// CHECK: encoding: [0x49,0x0f,0xc7,0xf3]
+          rdrand %r11
+
+// CHECK: rdseedw %ax
+// CHECK: encoding: [0x66,0x0f,0xc7,0xf8]
+          rdseed %ax
+
+// CHECK: rdseedl %eax
+// CHECK: encoding: [0x0f,0xc7,0xf8]
+          rdseed %eax
+
+// CHECK: rdseedq %rax
+// CHECK: encoding: [0x48,0x0f,0xc7,0xf8]
+          rdseed %rax
+
+// CHECK: rdseedw %r11w
+// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xfb]
+          rdseed %r11w
+
+// CHECK: rdseedl %r11d
+// CHECK: encoding: [0x41,0x0f,0xc7,0xfb]
+          rdseed %r11d
+
+// CHECK: rdseedq %r11
+// CHECK: encoding: [0x49,0x0f,0xc7,0xfb]
+          rdseed %r11
diff --git a/test/MC/X86/x86_64-rtm-encoding.s b/test/MC/X86/x86_64-rtm-encoding.s
index 44d6bacb7f32..d9975d67b314 100644
--- a/test/MC/X86/x86_64-rtm-encoding.s
+++ b/test/MC/X86/x86_64-rtm-encoding.s
@@ -8,6 +8,10 @@
 // CHECK: encoding: [0x0f,0x01,0xd5]
 	xend
 
+// CHECK: xtest
+// CHECK: encoding: [0x0f,0x01,0xd6]
+	xtest
+
 // CHECK: xabort
 // CHECK: encoding: [0xc6,0xf8,0x0d]
 	xabort $13
diff --git a/test/Makefile b/test/Makefile
index bd6b672dd3d5..8d05c57f8624 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -60,6 +60,11 @@ endif
 
 ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/Makefile && echo OK), OK)
 LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test
+
+# Force creation of Clang Tools' lit.site.cfg.
+clang-tools-site-cfg: FORCE
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test lit.site.cfg
+extra-site-cfgs:: clang-tools-site-cfg
 endif
 endif
 endif
@@ -95,13 +100,6 @@ check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-site-cfgs
 clean::
 	$(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print`
 
-# dsymutil is used on the Darwin to manipulate DWARF debugging information.
-ifeq ($(TARGET_OS),Darwin)
-DSYMUTIL=dsymutil
-else
-DSYMUTIL=true
-endif
-
 ifneq ($(OCAMLOPT),)
 CC_FOR_OCAMLOPT := $(shell $(OCAMLOPT) -config | grep native_c_compiler | sed -e 's/native_c_compiler: //')
 CXX_FOR_OCAMLOPT := $(subst gcc,g++,$(CC_FOR_OCAMLOPT))
diff --git a/test/Makefile.tests b/test/Makefile.tests
index aeb5871e7cdf..c60c90c075d2 100644
--- a/test/Makefile.tests
+++ b/test/Makefile.tests
@@ -38,7 +38,7 @@ LCCFLAGS  += -O2 -Wall
 LCXXFLAGS += -O2 -Wall
 LLCFLAGS =
 TESTRUNR = @echo Running test: $<; \
-             PATH="$(LLVMTOOLCURRENT):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \
+             PATH="$(LLVMTOOLCURRENT):$(PATH)" \
                   $(LLVM_SRC_ROOT)/test/TestRunner.sh
 
 LLCLIBS := $(LLCLIBS) -lm
diff --git a/test/Object/ARM/lit.local.cfg b/test/Object/ARM/lit.local.cfg
new file mode 100644
index 000000000000..5fc35d80541d
--- /dev/null
+++ b/test/Object/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
diff --git a/test/Object/ARM/objdump-thumb.test b/test/Object/ARM/objdump-thumb.test
new file mode 100644
index 000000000000..9c92a27782d4
--- /dev/null
+++ b/test/Object/ARM/objdump-thumb.test
@@ -0,0 +1,4 @@
+RUN: llvm-objdump -d -macho -triple=thumbv7-apple-ios \
+RUN: %p/../Inputs/macho-text.thumb | FileCheck %s
+
+CHECK: 0:	00 bf                                        	nop
diff --git a/test/Object/ARM/symbol-addr.ll b/test/Object/ARM/symbol-addr.ll
new file mode 100644
index 000000000000..6bcbde9f9f18
--- /dev/null
+++ b/test/Object/ARM/symbol-addr.ll
@@ -0,0 +1,12 @@
+; RUN: llc %s -mtriple=arm-unknown-unknown -filetype=obj -o - \
+; RUN:   | llvm-objdump -t - | FileCheck %s
+; RUN: llc %s -mtriple=thumb-unknown-unknown -filetype=obj -o - \
+; RUN:   | llvm-objdump -t - | FileCheck %s
+
+; Check that the symbol address does not include the ARM/Thumb instruction
+; indicator bit.
+; CHECK: 00000000 g     F .text  {{[0-9]+}} test
+
+define i32 @test() {
+  ret i32 1
+}
diff --git a/test/Object/Inputs/COFF/i386.yaml b/test/Object/Inputs/COFF/i386.yaml
index ca902220c13b..aec7a5813cf9 100644
--- a/test/Object/Inputs/COFF/i386.yaml
+++ b/test/Object/Inputs/COFF/i386.yaml
@@ -1,5 +1,6 @@
 header: !Header
   Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+  Characteristics: [ IMAGE_FILE_DEBUG_STRIPPED ]
 
 sections:
   - !Section
diff --git a/test/Object/Inputs/macho-text.thumb b/test/Object/Inputs/macho-text.thumb
new file mode 100644
index 000000000000..b29428a78781
Binary files /dev/null and b/test/Object/Inputs/macho-text.thumb differ
diff --git a/test/Object/Inputs/trivial-object-test.elf-mips64el b/test/Object/Inputs/trivial-object-test.elf-mips64el
new file mode 100644
index 000000000000..5ad9ba3a0bc0
Binary files /dev/null and b/test/Object/Inputs/trivial-object-test.elf-mips64el differ
diff --git a/test/Object/Mips/feature.test b/test/Object/Mips/feature.test
index e8da60974603..340301450a50 100644
--- a/test/Object/Mips/feature.test
+++ b/test/Object/Mips/feature.test
@@ -2,10 +2,12 @@ RUN: llvm-objdump -disassemble -triple mips64el -mattr +mips64r2 %p/../Inputs/de
 RUN: | FileCheck %s
 
 CHECK: Disassembly of section .text:
-CHECK: .text:
+CHECK: dext:
 CHECK:        0:	08 00 e0 03                                  	jr	$ra
 CHECK:        4:	43 49 82 7c                                  	dext $2, $4, 5, 10
+CHECK: dextu:
 CHECK:        8:	08 00 e0 03                                  	jr	$ra
 CHECK:        c:	83 28 82 7c                                  	dext $2, $4, 2, 6
+CHECK: dextm:
 CHECK:       10:	08 00 e0 03                                  	jr	$ra
 CHECK:       14:	43 09 82 7c                                  	dext $2, $4, 5, 2
diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test
new file mode 100644
index 000000000000..0d96fd2bfd8e
--- /dev/null
+++ b/test/Object/obj2yaml.test
@@ -0,0 +1,170 @@
+RUN: obj2yaml %p/Inputs/trivial-object-test.coff-i386 | FileCheck %s --check-prefix COFF-I386
+RUN: obj2yaml %p/Inputs/trivial-object-test.coff-x86-64 | FileCheck %s --check-prefix COFF-X86-64
+
+
+COFF-I386: header: !Header
+COFF-I386-NEXT:  Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+
+COFF-I386: sections:
+COFF-I386-NEXT:   - !Section
+COFF-I386-NEXT:    Name: .text
+COFF-I386-NEXT:    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+COFF-I386-NEXT:    SectionData:  !hex "83EC0CC744240800000000C7042400000000E800000000E8000000008B44240883C40CC3" # |....D$.......$...............D$.....|
+
+COFF-I386:    Relocations:
+COFF-I386-NEXT:      - !Relocation
+COFF-I386-NEXT:        VirtualAddress: 0xe
+COFF-I386-NEXT:        SymbolTableIndex: 5
+COFF-I386-NEXT:        Type: IMAGE_REL_I386_DIR32
+
+COFF-I386:      - !Relocation
+COFF-I386-NEXT:        VirtualAddress: 0x13
+COFF-I386-NEXT:        SymbolTableIndex: 6
+COFF-I386-NEXT:        Type: IMAGE_REL_I386_REL32
+
+COFF-I386:      - !Relocation
+COFF-I386-NEXT:        VirtualAddress: 0x18
+COFF-I386-NEXT:        SymbolTableIndex: 7
+COFF-I386-NEXT:        Type: IMAGE_REL_I386_REL32
+
+COFF-I386:  - !Section
+COFF-I386-NEXT:    Name: .data
+COFF-I386-NEXT:    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+COFF-I386-NEXT:    SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+
+COFF-I386: symbols:
+COFF-I386-NEXT:  - !Symbol
+COFF-I386-NEXT:    Name: .text
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 1
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-I386-NEXT:    NumberOfAuxSymbols: 1
+COFF-I386-NEXT:    AuxillaryData:  !hex "240000000300000000000000010000000000" # |$.................|
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: .data
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 2
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-I386-NEXT:    NumberOfAuxSymbols: 1
+COFF-I386-NEXT:    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: _main
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 1
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_FUNCTION # (2)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: L_.str
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 2
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: _puts
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 0
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: _SomeOtherFunction
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 0
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+
+COFF-X86-64: header: !Header
+COFF-X86-64-NEXT:  Machine: IMAGE_FILE_MACHINE_AMD64 # (0x8664)
+
+COFF-X86-64: sections:
+COFF-X86-64-NEXT:   - !Section
+COFF-X86-64-NEXT:     Name: .text
+COFF-X86-64-NEXT:     Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+COFF-X86-64-NEXT:     SectionData:  !hex "4883EC28C744242400000000488D0D00000000E800000000E8000000008B4424244883C428C3" # |H..(.D$$....H.................D$$H..(.|
+
+COFF-X86-64:     Relocations:
+COFF-X86-64-NEXT:       - !Relocation
+COFF-X86-64-NEXT:         VirtualAddress: 0xf
+COFF-X86-64-NEXT:         SymbolTableIndex: 5
+COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
+
+COFF-X86-64:       - !Relocation
+COFF-X86-64-NEXT:         VirtualAddress: 0x14
+COFF-X86-64-NEXT:         SymbolTableIndex: 6
+COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
+
+COFF-X86-64:       - !Relocation
+COFF-X86-64-NEXT:         VirtualAddress: 0x19
+COFF-X86-64-NEXT:         SymbolTableIndex: 7
+COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
+
+COFF-X86-64:   - !Section
+COFF-X86-64-NEXT:     Name: .data
+COFF-X86-64-NEXT:     Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+COFF-X86-64-NEXT:     SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+
+COFF-X86-64: symbols:
+COFF-X86-64-NEXT:   - !Symbol
+COFF-X86-64-NEXT:     Name: .text
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 1
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-X86-64-NEXT:     NumberOfAuxSymbols: 1
+COFF-X86-64-NEXT:     AuxillaryData:  !hex "260000000300000000000000010000000000" # |&.................|
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: .data
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 2
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-X86-64-NEXT:     NumberOfAuxSymbols: 1
+COFF-X86-64-NEXT:     AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: main
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 1
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: L.str
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 2
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: puts
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 0
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: SomeOtherFunction
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 0
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test
index 6d35a2651d7a..95c4c4dcaedf 100644
--- a/test/Object/objdump-relocations.test
+++ b/test/Object/objdump-relocations.test
@@ -8,6 +8,8 @@ RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:              | FileCheck %s -check-prefix ELF-x86-64
 RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-hexagon \
 RUN:              | FileCheck %s -check-prefix ELF-hexagon
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-mips64el \
+RUN:              | FileCheck %s -check-prefix ELF-MIPS64EL
 
 RUN: llvm-objdump -r %p/Inputs/relocations.elf-x86-64 \
 RUN:              | FileCheck %s -check-prefix ELF-complex-x86-64
@@ -40,6 +42,11 @@ ELF-hexagon: R_HEX_LO16 puts
 ELF-hexagon: R_HEX_B15_PCREL testf
 ELF-hexagon: R_HEX_B22_PCREL puts
 
+// Note: this file was produced with gas to make sure we don't end up in a
+// situation where LLVM produces and accepts a broken file.
+ELF-MIPS64EL: .data
+ELF-MIPS64EL: R_MIPS_64
+
 ELF-complex-x86-64: .text
 ELF-complex-x86-64-NEXT: R_X86_64_8 .data-4
 ELF-complex-x86-64-NEXT: R_X86_64_16 .data-4
diff --git a/test/Object/objdump-section-content.test b/test/Object/objdump-section-content.test
index f9c4f43f0be0..e0199b329d18 100644
--- a/test/Object/objdump-section-content.test
+++ b/test/Object/objdump-section-content.test
@@ -1,6 +1,8 @@
 RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-objdump -s - | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -s %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -s %p/Inputs/shared-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix BSS
 
 COFF-i386: file format
 COFF-i386: Contents of section .text:
@@ -17,3 +19,6 @@ ELF-i386:  0010 0000e8fc ffffffe8 fcffffff 8b442408  .............D$.
 ELF-i386:  0020 83c40cc3                             ....
 ELF-i386: Contents of section .rodata.str1.1:
 ELF-i386:  0024 48656c6c 6f20576f 726c6421 00        Hello World!.
+
+BSS: Contents of section .bss:
+BSS-NEXT: <skipping contents of bss section at [12c8, 12cc)>
diff --git a/test/Object/readobj-elf-versioning.test b/test/Object/readobj-elf-versioning.test
index 0906f344e2c5..1f09ef32a11a 100644
--- a/test/Object/readobj-elf-versioning.test
+++ b/test/Object/readobj-elf-versioning.test
@@ -1,15 +1,46 @@
-RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN: llvm-readobj -dt %p/Inputs/elf-versioning-test.i386 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN: llvm-readobj -dt %p/Inputs/elf-versioning-test.i386 \
 RUN:         | FileCheck %s -check-prefix ELF32
-RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN: llvm-readobj -dt %p/Inputs/elf-versioning-test.x86_64 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN: llvm-readobj -dt %p/Inputs/elf-versioning-test.x86_64 \
 RUN:         | FileCheck %s -check-prefix ELF64
 
-ELF: foo@@VER2          FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
-ELF: foo@VER1           FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
-ELF: unversioned_define FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+ELF: DynamicSymbols [
+ELF:   Symbol {
+ELF:     Name: foo@@VER2
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: foo@VER1
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: unversioned_define
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF: ]
 
-ELF32: puts@GLIBC_2.0   FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
-ELF64: puts@GLIBC_2.2.5 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
+ELF32: DynamicSymbols [
+ELF32:   Symbol {
+ELF32:     Name: puts@GLIBC_2.0
+ELF32:     Binding: Global
+ELF32:     Type: Function
+ELF32:     Section:  (0x0)
+ELF32:   }
+ELF32: ]
+ELF64: DynamicSymbols [
+ELF64:   Symbol {
+ELF64:     Name: puts@GLIBC_2.2.5
+ELF64:     Binding: Global
+ELF64:     Type: Function
+ELF64:     Section:  (0x0)
+ELF64:   }
+ELF64: ]
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
index 2c0b54dca4a5..72dbd32ea9d5 100644
--- a/test/Object/readobj-shared-object.test
+++ b/test/Object/readobj-shared-object.test
@@ -1,92 +1,319 @@
-RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN: llvm-readobj -s -t -dt -dynamic-table -needed-libs \
+RUN:              %p/Inputs/shared-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN: llvm-readobj -s -t -dt -dynamic-table -needed-libs \
+RUN:              %p/Inputs/shared-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF32
 
-RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN: llvm-readobj -s -t -dt -dynamic-table -needed-libs \
+RUN:            %p/Inputs/shared-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN: llvm-readobj -s -t -dt -dynamic-table -needed-libs \
+RUN:            %p/Inputs/shared-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF64
 
-ELF64:File Format : ELF64-x86-64
-ELF64:Arch        : x86_64
-ELF64:Address Size: 64 bits
-ELF64:Load Name   : libfoo.so
+ELF64: Format:      ELF64-x86-64
+ELF64: Arch:        x86_64
+ELF64: AddressSize: 64bit
+ELF64: LoadName:    libfoo.so
 
-ELF32:File Format : ELF32-i386
-ELF32:Arch        : i386
-ELF32:Address Size: 32 bits
-ELF32:Load Name   : libfoo.so
+ELF32: Format:      ELF32-i386
+ELF32: Arch:        i386
+ELF32: AddressSize: 32bit
+ELF32: LoadName:    libfoo.so
 
-ELF:Symbols:
-ELF:  Name                   Type            Address        Size           FileOffset     Flags
-ELF:  .dynsym                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .dynstr                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .text                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .eh_frame              DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .tdata                 DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .dynamic               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .got.plt               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .data                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .bss                   DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  shared.ll              FILE            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute,formatspecific
-ELF:  local_func             FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}
-ELF:  _GLOBAL_OFFSET_TABLE_  DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
-ELF:  _DYNAMIC               DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
-ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
-ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  Total: 21
+ELF: Sections [
+ELF:   Section {
+ELF:     Name:  (0)
+ELF:     Type: SHT_NULL
+ELF:     Flags [ (0x0)
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .hash
+ELF:     Type: SHT_HASH
+ELF:     Flags [ (0x2)
+ELF:       SHF_ALLOC
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .dynsym
+ELF:     Type: SHT_DYNSYM
+ELF:     Flags [ (0x2)
+ELF:       SHF_ALLOC
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .dynstr
+ELF:     Type: SHT_STRTAB
+ELF:     Flags [ (0x2)
+ELF:       SHF_ALLOC
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .text
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x6)
+ELF:       SHF_ALLOC
+ELF:       SHF_EXECINSTR
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .eh_frame
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x2)
+ELF:       SHF_ALLOC
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .tdata
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x403)
+ELF:       SHF_ALLOC
+ELF:       SHF_TLS
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .dynamic
+ELF:     Type: SHT_DYNAMIC
+ELF:     Flags [ (0x3)
+ELF:       SHF_ALLOC
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .got.plt
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x3)
+ELF:       SHF_ALLOC
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .data
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x3)
+ELF:       SHF_ALLOC
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .bss
+ELF:     Type: SHT_NOBITS
+ELF:     Flags [ (0x3)
+ELF:       SHF_ALLOC
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .shstrtab
+ELF:     Type: SHT_STRTAB
+ELF:     Flags [ (0x0)
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .symtab
+ELF:     Type: SHT_SYMTAB
+ELF:     Flags [ (0x0)
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .strtab
+ELF:     Type: SHT_STRTAB
+ELF:     Flags [ (0x0)
+ELF:     ]
+ELF:   }
+ELF: ]
 
-ELF:Dynamic Symbols:
-ELF:  Name                   Type            Address        Size           FileOffset     Flags
-ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
-ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  Total: {{[0-9a-f]+}}
+ELF: Symbols [
+ELF:   Symbol {
+ELF:     Name: .hash
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .hash
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .dynsym
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .dynsym
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .dynstr
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .dynstr
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .text
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .eh_frame
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .eh_frame
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .tdata
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .tdata
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .dynamic
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .dynamic
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .got.plt
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .got.plt
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .data
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .data
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .bss
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .bss
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: shared.ll
+ELF:     Binding: Local
+ELF:     Type: File
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: local_func
+ELF:     Binding: Local
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _GLOBAL_OFFSET_TABLE_
+ELF:     Binding: Local
+ELF:     Type: Object
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _DYNAMIC
+ELF:     Binding: Local
+ELF:     Type: Object
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: common_sym
+ELF:     Binding: Global
+ELF:     Type: Object
+ELF:     Section: .bss
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: tls_sym
+ELF:     Binding: Global
+ELF:     Type: TLS
+ELF:     Section: .tdata
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: defined_sym
+ELF:     Binding: Global
+ELF:     Type: Object
+ELF:     Section: .data
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: __bss_start
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _end
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: global_func
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _edata
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF: ]
 
-ELF:Sections:
-ELF:  Name                        Address        Size           Align          Flags
-ELF:                              {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  rodata
-ELF:  .hash                       {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  required,rodata
-ELF:  .dynsym                     {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  required,rodata
-ELF:  .dynstr                     {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  required,rodata
-ELF:  .text                       {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  text,{{(data,)?}}required
-ELF:  .eh_frame                   {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  data,required,rodata
-ELF:  .tdata                      {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  data,required
-ELF:  .dynamic                    {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  required
-ELF:  .got.plt                    {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  data,required
-ELF:  .data                       {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  data,required
-ELF:  .bss                        {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  bss,required,virtual,zeroinit
-ELF:  .shstrtab                   {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  rodata
-ELF:  .symtab                     {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  rodata
-ELF:  .strtab                     {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  rodata
-ELF:  Total: 14
-
-ELF:Dynamic section contains 9 entries
-ELF:  Tag        Type                 Name/Value
-ELF: 00000001 (NEEDED)             Shared library: [libc.so.6]
-ELF: 00000001 (NEEDED)             Shared library: [libm.so.6]
-ELF: 0000000e (SONAME)             Library soname: [libfoo.so]
-ELF: 00000004 (HASH)               {{[0-9a-f]+}}
-ELF: 00000005 (STRTAB)             {{[0-9a-f]+}}
-ELF: 00000006 (SYMTAB)             {{[0-9a-f]+}}
-ELF: 0000000a (STRSZ)              {{[0-9]+}} (bytes)
-ELF: 0000000b (SYMENT)             {{[0-9]+}} (bytes)
-ELF: 00000000 (NULL)               0x0
-ELF:  Total: 9
-
-ELF:Libraries needed:
-ELF:  libc.so.6
-ELF:  libm.so.6
-ELF:  Total: 2
+ELF: DynamicSymbols [
+ELF:   Symbol {
+ELF:     Name: common_sym
+ELF:     Binding: Global
+ELF:     Type: Object
+ELF:     Section: .bss
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: tls_sym
+ELF:     Binding: Global
+ELF:     Type: TLS
+ELF:     Section: .tdata
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: defined_sym
+ELF:     Binding: Global
+ELF:     Type: Object
+ELF:     Section: .data
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: __bss_start
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _end
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: global_func
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _edata
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF: ]
 
+ELF: DynamicSection [ (9 entries)
+ELF:   Tag        Type                 Name/Value
+ELF:   00000001 NEEDED               SharedLibrary (libc.so.6)
+ELF:   00000001 NEEDED               SharedLibrary (libm.so.6)
+ELF:   0000000E SONAME               LibrarySoname (libfoo.so)
+ELF:   00000004 HASH                 {{[0-9a-f]+}}
+ELF:   00000005 STRTAB               {{[0-9a-f]+}}
+ELF:   00000006 SYMTAB               {{[0-9a-f]+}}
+ELF:   0000000A STRSZ                {{[0-9]+}} (bytes)
+ELF:   0000000B SYMENT               {{[0-9]+}} (bytes)
+ELF:   00000000 NULL                 0x0
+ELF: ]
 
+ELF:      NeededLibraries [
+ELF-NEXT:  libc.so.6
+ELF-NEXT:  libm.so.6
+ELF-NEXT: ]
diff --git a/test/Object/yaml2obj-readobj.test b/test/Object/yaml2obj-readobj.test
new file mode 100644
index 000000000000..545ccc48aa4c
--- /dev/null
+++ b/test/Object/yaml2obj-readobj.test
@@ -0,0 +1,5 @@
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-readobj -file-headers - | FileCheck %s --check-prefix COFF-I386
+
+// COFF-I386:  Characteristics [ (0x200)
+// COFF-I386-NEXT:    IMAGE_FILE_DEBUG_STRIPPED (0x200)
+// COFF-I386-NEXT:  ]
diff --git a/test/Other/attribute-comment.ll b/test/Other/attribute-comment.ll
new file mode 100644
index 000000000000..7354e7f765fd
--- /dev/null
+++ b/test/Other/attribute-comment.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S < %s | FileCheck %s -strict-whitespace
+
+; CHECK: {{^}}; Function Attrs: nounwind readnone ssp uwtable{{$}}
+; CHECK-NEXT: define void @test1() #0
+define void @test1() #0 {
+  ret void
+}
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" uwtable "no-frame-pointer-elim"="true" readnone "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Scripts/README.txt b/test/Scripts/README.txt
deleted file mode 100644
index b0b11050375f..000000000000
--- a/test/Scripts/README.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-This directory contains scripts which are used by the TestRunner style
-tests, which allows them to be simpler and more direct.
diff --git a/test/Scripts/coff-dump.py b/test/Scripts/coff-dump.py
deleted file mode 100755
index 36ec53932c63..000000000000
--- a/test/Scripts/coff-dump.py
+++ /dev/null
@@ -1,590 +0,0 @@
-#!/usr/bin/env python
-#===-- coff-dump.py - COFF object file dump utility-------------------------===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-#
-# COFF File Definition
-#
-
-def string_table_entry (offset):
-  return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s'))
-
-def secname(value):
-  if value[0] == '/':
-    return string_table_entry(value[1:].rstrip('\0'))
-  else:
-    return '%s'
-
-def symname(value):
-  parts = struct.unpack("<2L", value)
-  if parts[0] == 0:
-    return string_table_entry(parts[1])
-  else:
-    return '%s'
-
-file = ('struct', [
-  ('MachineType', ('enum', '<H', '0x%X', {
-    0x0:    'IMAGE_FILE_MACHINE_UNKNOWN',
-    0x1d3:  'IMAGE_FILE_MACHINE_AM33',
-    0x8664: 'IMAGE_FILE_MACHINE_AMD64',
-    0x1c0:  'IMAGE_FILE_MACHINE_ARM',
-    0xebc:  'IMAGE_FILE_MACHINE_EBC',
-    0x14c:  'IMAGE_FILE_MACHINE_I386',
-    0x200:  'IMAGE_FILE_MACHINE_IA64',
-    0x904:  'IMAGE_FILE_MACHINE_M32R',
-    0x266:  'IMAGE_FILE_MACHINE_MIPS16',
-    0x366:  'IMAGE_FILE_MACHINE_MIPSFPU',
-    0x466:  'IMAGE_FILE_MACHINE_MIPSFPU16',
-    0x1f0:  'IMAGE_FILE_MACHINE_POWERPC',
-    0x1f1:  'IMAGE_FILE_MACHINE_POWERPCFP',
-    0x166:  'IMAGE_FILE_MACHINE_R4000',
-    0x1a2:  'IMAGE_FILE_MACHINE_SH3',
-    0x1a3:  'IMAGE_FILE_MACHINE_SH3DSP',
-    0x1a6:  'IMAGE_FILE_MACHINE_SH4',
-    0x1a8:  'IMAGE_FILE_MACHINE_SH5',
-    0x1c2:  'IMAGE_FILE_MACHINE_THUMB',
-    0x169:  'IMAGE_FILE_MACHINE_WCEMIPSV2',
-  })),
-  ('NumberOfSections',     ('scalar',  '<H', '%d')),
-  ('TimeDateStamp',        ('scalar',  '<L', '%d')),
-  ('PointerToSymbolTable', ('scalar',  '<L', '0x%0X')),
-  ('NumberOfSymbols',      ('scalar',  '<L', '%d')),
-  ('SizeOfOptionalHeader', ('scalar',  '<H', '%d')),
-  ('Characteristics',      ('flags',   '<H', '0x%x', [
-    (0x0001,      'IMAGE_FILE_RELOCS_STRIPPED',         ),
-    (0x0002,      'IMAGE_FILE_EXECUTABLE_IMAGE',        ),
-    (0x0004,      'IMAGE_FILE_LINE_NUMS_STRIPPED',      ),
-    (0x0008,      'IMAGE_FILE_LOCAL_SYMS_STRIPPED',     ),
-    (0x0010,      'IMAGE_FILE_AGGRESSIVE_WS_TRIM',      ),
-    (0x0020,      'IMAGE_FILE_LARGE_ADDRESS_AWARE',     ),
-    (0x0080,      'IMAGE_FILE_BYTES_REVERSED_LO',       ),
-    (0x0100,      'IMAGE_FILE_32BIT_MACHINE',           ),
-    (0x0200,      'IMAGE_FILE_DEBUG_STRIPPED',          ),
-    (0x0400,      'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ),
-    (0x0800,      'IMAGE_FILE_NET_RUN_FROM_SWAP',       ),
-    (0x1000,      'IMAGE_FILE_SYSTEM',                  ),
-    (0x2000,      'IMAGE_FILE_DLL',                     ),
-    (0x4000,      'IMAGE_FILE_UP_SYSTEM_ONLY',          ),
-    (0x8000,      'IMAGE_FILE_BYTES_REVERSED_HI',       ),
-  ])),
-  ('Sections', ('array', '1', 'NumberOfSections', ('struct', [
-    ('Name',                 ('scalar',  '<8s', secname)),
-    ('VirtualSize',          ('scalar',  '<L',  '%d'   )),
-    ('VirtualAddress',       ('scalar',  '<L',  '%d'   )),
-    ('SizeOfRawData',        ('scalar',  '<L',  '%d'   )),
-    ('PointerToRawData',     ('scalar',  '<L',  '0x%X' )),
-    ('PointerToRelocations', ('scalar',  '<L',  '0x%X' )),
-    ('PointerToLineNumbers', ('scalar',  '<L',  '0x%X' )),
-    ('NumberOfRelocations',  ('scalar',  '<H',  '%d'   )),
-    ('NumberOfLineNumbers',  ('scalar',  '<H',  '%d'   )),
-    ('Charateristics',       ('flags',   '<L',  '0x%X', [
-      (0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'),
-      (0x00000020, 'IMAGE_SCN_CNT_CODE'),
-      (0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'),
-      (0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'),
-      (0x00000100, 'IMAGE_SCN_LNK_OTHER'),
-      (0x00000200, 'IMAGE_SCN_LNK_INFO'),
-      (0x00000800, 'IMAGE_SCN_LNK_REMOVE'),
-      (0x00001000, 'IMAGE_SCN_LNK_COMDAT'),
-      (0x00008000, 'IMAGE_SCN_GPREL'),
-      (0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'),
-      (0x00020000, 'IMAGE_SCN_MEM_16BIT'),
-      (0x00040000, 'IMAGE_SCN_MEM_LOCKED'),
-      (0x00080000, 'IMAGE_SCN_MEM_PRELOAD'),
-      (0x00F00000, 'IMAGE_SCN_ALIGN', {
-        0x00100000: 'IMAGE_SCN_ALIGN_1BYTES',
-        0x00200000: 'IMAGE_SCN_ALIGN_2BYTES',
-        0x00300000: 'IMAGE_SCN_ALIGN_4BYTES',
-        0x00400000: 'IMAGE_SCN_ALIGN_8BYTES',
-        0x00500000: 'IMAGE_SCN_ALIGN_16BYTES',
-        0x00600000: 'IMAGE_SCN_ALIGN_32BYTES',
-        0x00700000: 'IMAGE_SCN_ALIGN_64BYTES',
-        0x00800000: 'IMAGE_SCN_ALIGN_128BYTES',
-        0x00900000: 'IMAGE_SCN_ALIGN_256BYTES',
-        0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES',
-        0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES',
-        0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES',
-        0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES',
-        0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES',
-      }),
-      (0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'),
-      (0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'),
-      (0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'),
-      (0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'),
-      (0x10000000, 'IMAGE_SCN_MEM_SHARED'),
-      (0x20000000, 'IMAGE_SCN_MEM_EXECUTE'),
-      (0x40000000, 'IMAGE_SCN_MEM_READ'),
-      (0x80000000, 'IMAGE_SCN_MEM_WRITE'),
-    ])),
-    ('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))),
-    ('Relocations', ('ptr', 'PointerToRelocations', ('array', '0', 'NumberOfRelocations', ('struct', [
-      ('VirtualAddress',   ('scalar', '<L', '0x%X')),
-      ('SymbolTableIndex', ('scalar', '<L', '%d'  )),
-      ('Type',             ('enum', '<H', '%d', ('MachineType', {
-        0x14c: {
-          0x0000: 'IMAGE_REL_I386_ABSOLUTE',
-          0x0001: 'IMAGE_REL_I386_DIR16',
-          0x0002: 'IMAGE_REL_I386_REL16',
-          0x0006: 'IMAGE_REL_I386_DIR32',
-          0x0007: 'IMAGE_REL_I386_DIR32NB',
-          0x0009: 'IMAGE_REL_I386_SEG12',
-          0x000A: 'IMAGE_REL_I386_SECTION',
-          0x000B: 'IMAGE_REL_I386_SECREL',
-          0x000C: 'IMAGE_REL_I386_TOKEN',
-          0x000D: 'IMAGE_REL_I386_SECREL7',
-          0x0014: 'IMAGE_REL_I386_REL32',
-        },
-        0x8664: {
-          0x0000: 'IMAGE_REL_AMD64_ABSOLUTE',
-          0x0001: 'IMAGE_REL_AMD64_ADDR64',
-          0x0002: 'IMAGE_REL_AMD64_ADDR32',
-          0x0003: 'IMAGE_REL_AMD64_ADDR32NB',
-          0x0004: 'IMAGE_REL_AMD64_REL32',
-          0x0005: 'IMAGE_REL_AMD64_REL32_1',
-          0x0006: 'IMAGE_REL_AMD64_REL32_2',
-          0x0007: 'IMAGE_REL_AMD64_REL32_3',
-          0x0008: 'IMAGE_REL_AMD64_REL32_4',
-          0x0009: 'IMAGE_REL_AMD64_REL32_5',
-          0x000A: 'IMAGE_REL_AMD64_SECTION',
-          0x000B: 'IMAGE_REL_AMD64_SECREL',
-          0x000C: 'IMAGE_REL_AMD64_SECREL7',
-          0x000D: 'IMAGE_REL_AMD64_TOKEN',
-          0x000E: 'IMAGE_REL_AMD64_SREL32',
-          0x000F: 'IMAGE_REL_AMD64_PAIR',
-          0x0010: 'IMAGE_REL_AMD64_SSPAN32',
-        },
-      }))),
-      ('SymbolName',       ('ptr', '+ PointerToSymbolTable * SymbolTableIndex 18', ('scalar',  '<8s', symname)))
-    ])))),
-  ]))),
-  ('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '18', '* NumberOfSymbols 18',  ('struct', [
-    ('Name',                ('scalar',  '<8s', symname)),
-    ('Value',               ('scalar',  '<L',  '%d'   )),
-    ('SectionNumber',       ('scalar',  '<H',  '%d'   )),
-    ('_Type',               ('scalar',  '<H',  None   )),
-    ('SimpleType',          ('enum',    '& _Type 15',  '%d', {
-      0: 'IMAGE_SYM_TYPE_NULL',
-      1: 'IMAGE_SYM_TYPE_VOID',
-      2: 'IMAGE_SYM_TYPE_CHAR',
-      3: 'IMAGE_SYM_TYPE_SHORT',
-      4: 'IMAGE_SYM_TYPE_INT',
-      5: 'IMAGE_SYM_TYPE_LONG',
-      6: 'IMAGE_SYM_TYPE_FLOAT',
-      7: 'IMAGE_SYM_TYPE_DOUBLE',
-      8: 'IMAGE_SYM_TYPE_STRUCT',
-      9: 'IMAGE_SYM_TYPE_UNION',
-      10: 'IMAGE_SYM_TYPE_ENUM',
-      11: 'IMAGE_SYM_TYPE_MOE',
-      12: 'IMAGE_SYM_TYPE_BYTE',
-      13: 'IMAGE_SYM_TYPE_WORD',
-      14: 'IMAGE_SYM_TYPE_UINT',
-      15: 'IMAGE_SYM_TYPE_DWORD',
-    })),                                # (Type & 0xF0) >> 4
-    ('ComplexType',         ('enum',    '>> & _Type 240 4',  '%d', {
-      0: 'IMAGE_SYM_DTYPE_NULL',
-      1: 'IMAGE_SYM_DTYPE_POINTER',
-      2: 'IMAGE_SYM_DTYPE_FUNCTION',
-      3: 'IMAGE_SYM_DTYPE_ARRAY',
-    })),
-    ('StorageClass',        ('enum',    '<B',  '%d', {
-      -1:  'IMAGE_SYM_CLASS_END_OF_FUNCTION',
-      0: 'IMAGE_SYM_CLASS_NULL',
-      1: 'IMAGE_SYM_CLASS_AUTOMATIC',
-      2: 'IMAGE_SYM_CLASS_EXTERNAL',
-      3: 'IMAGE_SYM_CLASS_STATIC',
-      4: 'IMAGE_SYM_CLASS_REGISTER',
-      5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF',
-      6: 'IMAGE_SYM_CLASS_LABEL',
-      7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL',
-      8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT',
-      9: 'IMAGE_SYM_CLASS_ARGUMENT',
-      10: 'IMAGE_SYM_CLASS_STRUCT_TAG',
-      11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION',
-      12: 'IMAGE_SYM_CLASS_UNION_TAG',
-      13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION',
-      14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC',
-      15: 'IMAGE_SYM_CLASS_ENUM_TAG',
-      16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM',
-      17: 'IMAGE_SYM_CLASS_REGISTER_PARAM',
-      18: 'IMAGE_SYM_CLASS_BIT_FIELD',
-      100: 'IMAGE_SYM_CLASS_BLOCK',
-      101: 'IMAGE_SYM_CLASS_FUNCTION',
-      102: 'IMAGE_SYM_CLASS_END_OF_STRUCT',
-      103: 'IMAGE_SYM_CLASS_FILE',
-      104: 'IMAGE_SYM_CLASS_SECTION',
-      105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL',
-      107: 'IMAGE_SYM_CLASS_CLR_TOKEN',
-    })),
-    ('NumberOfAuxSymbols',  ('scalar',  '<B',  '%d'  )),
-    ('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')),
-  ])))),
-])
-
-#
-# Definition Interpreter
-#
-
-import sys, types, struct, re
-
-Input = None
-Stack = []
-Fields = {}
-
-Indent = 0
-NewLine = True
-
-def indent():
-  global Indent
-  Indent += 1
-
-def dedent():
-  global Indent
-  Indent -= 1
-
-def write(input):
-  global NewLine
-  output = ""
-
-  for char in input:
-
-    if NewLine:
-      output += Indent * '  '
-      NewLine = False
-
-    output += char
-
-    if char == '\n':
-      NewLine = True
-
-  sys.stdout.write(output)
-
-def read(format):
-  return struct.unpack(format, Input.read(struct.calcsize(format)))
-
-def read_cstr():
-  output = ""
-  while True:
-    char = Input.read(1)
-    if len(char) == 0:
-      raise RuntimeError ("EOF while reading cstr")
-    if char == '\0':
-      break
-    output += char
-  return output
-
-def push_pos(seek_to = None):
-  Stack [0:0] = [Input.tell()]
-  if seek_to:
-    Input.seek(seek_to)
-
-def pop_pos():
-  assert(len(Stack) > 0)
-  Input.seek(Stack[0])
-  del Stack[0]
-
-def print_binary_data(size):
-  value = ""
-  while size > 0:
-    if size >= 16:
-      data = Input.read(16)
-      size -= 16
-    else:
-      data = Input.read(size)
-      size = 0
-    value += data
-    bytes = ""
-    text = ""
-    for index in xrange(16):
-      if index < len(data):
-        if index == 8:
-          bytes += "- "
-        ch = ord(data[index])
-        bytes += "%02X " % ch
-        if ch >= 0x20 and ch <= 0x7F:
-          text += data[index]
-        else:
-          text += "."
-      else:
-        if index == 8:
-          bytes += "  "
-        bytes += "   "
-
-    write("%s|%s|\n" % (bytes, text))
-  return value
-
-idlit = re.compile("[a-zA-Z_][a-zA-Z0-9_-]*")
-numlit = re.compile("[0-9]+")
-
-def read_value(expr):
-
-  input = iter(expr.split())
-
-  def eval():
-
-    token = input.next()
-
-    if expr == 'cstr':
-      return read_cstr()
-    if expr == 'true':
-      return True
-    if expr == 'false':
-      return False
-
-    if token == '+':
-      return eval() + eval()
-    if token == '-':
-      return eval() - eval()
-    if token == '*':
-      return eval() * eval()
-    if token == '/':
-      return eval() / eval()
-    if token == '&':
-      return eval() & eval()
-    if token == '|':
-      return eval() | eval()
-    if token == '>>':
-      return eval() >> eval()
-    if token == '<<':
-      return eval() << eval()
-
-    if len(token) > 1 and token[0] in ('=', '@', '<', '!', '>'):
-      val = read(expr)
-      assert(len(val) == 1)
-      return val[0]
-
-    if idlit.match(token):
-      return Fields[token]
-    if numlit.match(token):
-      return int(token)
-
-    raise RuntimeError("unexpected token %s" % repr(token))
-
-  value = eval()
-
-  try:
-    input.next()
-  except StopIteration:
-    return value
-  raise RuntimeError("unexpected input at end of expression")
-
-def write_value(format,value):
-  format_type = type(format)
-  if format_type is types.StringType:
-    write(format % value)
-  elif format_type is types.FunctionType:
-    write_value(format(value), value)
-  elif format_type is types.TupleType:
-    Fields['this'] = value
-    handle_element(format)
-  elif format_type is types.NoneType:
-    pass
-  else:
-    raise RuntimeError("unexpected type: %s" % repr(format_type))
-
-def handle_scalar(entry):
-  iformat = entry[1]
-  oformat = entry[2]
-
-  value = read_value(iformat)
-
-  write_value(oformat, value)
-
-  return value
-
-def handle_enum(entry):
-  iformat = entry[1]
-  oformat = entry[2]
-  definitions = entry[3]
-
-  value = read_value(iformat)
-
-  if type(definitions) is types.TupleType:
-    selector = read_value(definitions[0])
-    definitions = definitions[1][selector]
-
-  if value in definitions:
-    description = definitions[value]
-  else:
-    description = "unknown"
-
-  write("%s (" % description)
-  write_value(oformat, value)
-  write(")")
-
-  return value
-
-def handle_flags(entry):
-  iformat = entry[1]
-  oformat = entry[2]
-  definitions = entry[3]
-
-  value = read_value(iformat)
-
-  write_value(oformat, value)
-
-  indent()
-  for entry in definitions:
-    mask = entry[0]
-    name = entry[1]
-    if len (entry) == 3:
-      map = entry[2]
-      selection = value & mask
-      if selection in map:
-        write("\n%s" % map[selection])
-      else:
-        write("\n%s <%d>" % (name, selection))
-    elif len(entry) == 2:
-      if value & mask != 0:
-        write("\n%s" % name)
-  dedent()
-
-  return value
-
-def handle_struct(entry):
-  global Fields
-  members = entry[1]
-
-  newFields = {}
-
-  write("{\n");
-  indent()
-
-  for member in members:
-    name = member[0]
-    type = member[1]
-
-    if name[0] != "_":
-      write("%s = " % name.ljust(24))
-
-    value = handle_element(type)
-
-    if name[0] != "_":
-      write("\n")
-
-    Fields[name] = value
-    newFields[name] = value
-
-  dedent()
-  write("}")
-
-  return newFields
-
-def handle_array(entry):
-  start_index = entry[1]
-  length = entry[2]
-  element = entry[3]
-
-  newItems = []
-
-  write("[\n")
-  indent()
-
-  start_index = read_value(start_index)
-  value = read_value(length)
-
-  for index in xrange(value):
-    write("%d = " % (index + start_index))
-    value = handle_element(element)
-    write("\n")
-    newItems.append(value)
-
-  dedent()
-  write("]")
-
-  return newItems
-
-def handle_byte_array(entry):
-  ent_size = entry[1]
-  length = entry[2]
-  element = entry[3]
-
-  newItems = []
-
-  write("[\n")
-  indent()
-
-  item_size = read_value(ent_size)
-  value = read_value(length)
-  end_of_array = Input.tell() + value
-
-  prev_loc = Input.tell()
-  index = 0
-  while Input.tell() < end_of_array:
-    write("%d = " % index)
-    value = handle_element(element)
-    write("\n")
-    newItems.append(value)
-    index += (Input.tell() - prev_loc) / item_size
-    prev_loc = Input.tell()
-
-  dedent()
-  write("]")
-
-  return newItems
-
-def handle_ptr(entry):
-  offset = entry[1]
-  element = entry[2]
-
-  value = None
-  offset = read_value(offset)
-
-  if offset != 0:
-
-    push_pos(offset)
-
-    value = handle_element(element)
-
-    pop_pos()
-
-  else:
-    write("None")
-
-  return value
-
-def handle_blob(entry):
-  length = entry[1]
-
-  write("\n")
-  indent()
-
-  value = print_binary_data(read_value(length))
-
-  dedent()
-
-  return value
-
-def handle_element(entry):
-  handlers = {
-    'struct':      handle_struct,
-    'scalar':      handle_scalar,
-    'enum':        handle_enum,
-    'flags':       handle_flags,
-    'ptr':         handle_ptr,
-    'blob':        handle_blob,
-    'array':       handle_array,
-    'byte-array':  handle_byte_array,
-  }
-
-  if not entry[0] in handlers:
-    raise RuntimeError ("unexpected type '%s'" % str (entry[0]))
-
-  return handlers[entry[0]](entry)
-
-if len(sys.argv) <= 1 or sys.argv[1] == '-':
-  import StringIO
-  Input = StringIO.StringIO(sys.stdin.read())
-else:
-  Input = open (sys.argv[1], "rb")
-
-try:
-  handle_element(file)
-finally:
-  Input.close()
-  Input = None
diff --git a/test/Scripts/coff-dump.py.bat b/test/Scripts/coff-dump.py.bat
deleted file mode 100755
index 56428e1a605c..000000000000
--- a/test/Scripts/coff-dump.py.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-@echo off
-
-@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
-@rem this in code, but I haven't found a way to do this in 2.6 yet.
-
-%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9
-
diff --git a/test/Scripts/common_dump.py b/test/Scripts/common_dump.py
deleted file mode 100644
index fd58993c0584..000000000000
--- a/test/Scripts/common_dump.py
+++ /dev/null
@@ -1,48 +0,0 @@
-def dataToHex(d):
-    """ Convert the raw data in 'd' to an hex string with a space every 4 bytes.
-    """
-    bytes = []
-    for i,c in enumerate(d):
-        byte = ord(c)
-        hex_byte = hex(byte)[2:]
-        if byte <= 0xf:
-            hex_byte = '0' + hex_byte
-        if i % 4 == 3:
-            hex_byte += ' '
-        bytes.append(hex_byte)
-    return ''.join(bytes).strip()
-
-def dataToHexUnified(d):
-    """ Convert the raw data in 'd' to an hex string with a space every 4 bytes.
-    Each 4byte number is prefixed with 0x for easy sed/rx
-    Fixme: convert all MC tests to use this routine instead of the above
-    """
-    bytes = []
-    for i,c in enumerate(d):
-        byte = ord(c)
-        hex_byte = hex(byte)[2:]
-        if byte <= 0xf:
-            hex_byte = '0' + hex_byte
-        if i % 4 == 0:
-            hex_byte = '0x' + hex_byte
-        if i % 4 == 3:
-            hex_byte += ' '
-        bytes.append(hex_byte)
-    return ''.join(bytes).strip()
-
-
-def HexDump(valPair):
-    """
-    1. do not print 'L'
-    2. Handle negatives and large numbers by mod (2^numBits)
-    3. print fixed length, prepend with zeros.
-       Length is exactly 2+(numBits/4)
-    4. Do print 0x Why?
-       so that they can be easily distinguished using sed/rx
-    """
-    val, numBits = valPair
-    assert 0 <= val < (1 << numBits)
-
-    val = val & (( 1 << numBits) - 1)
-    newFmt = "0x%0" + "%d" % (numBits / 4) + "x"
-    return newFmt % val
diff --git a/test/Scripts/elf-dump b/test/Scripts/elf-dump
deleted file mode 100755
index 61342d8f98e5..000000000000
--- a/test/Scripts/elf-dump
+++ /dev/null
@@ -1,285 +0,0 @@
-#!/usr/bin/env python
-
-import struct
-import sys
-import StringIO
-
-import common_dump
-
-class Reader:
-    def __init__(self, path):
-        if path == "-":
-            # Snarf all the data so we can seek.
-            self.file = StringIO.StringIO(sys.stdin.read())
-        else:
-            self.file = open(path, "rb")
-        self.isLSB = None
-        self.is64Bit = None
-        self.isN64 = False
-
-    def seek(self, pos):
-        self.file.seek(pos)
-
-    def read(self, N):
-        data = self.file.read(N)
-        if len(data) != N:
-            raise ValueError, "Out of data!"
-        return data
-
-    def read8(self):
-        return (ord(self.read(1)), 8)
-
-    def read16(self):
-        return (struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0], 16)
-
-    def read32(self):
-        return (struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0], 32)
-
-    def read64(self):
-        return (struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0], 64)
-
-    def readWord(self):
-        if self.is64Bit:
-            return self.read64()
-        else:
-            return self.read32()
-
-class StringTable:
-    def __init__(self, strings):
-       self.string_table = strings
-
-    def __getitem__(self, index):
-       end = self.string_table.index('\x00', index)
-       return self.string_table[index:end]
-
-class ProgramHeader:
-    def __init__(self, f):
-        self.p_type = f.read32()
-        if f.is64Bit:
-            self.p_flags = f.read32()
-        self.p_offset = f.readWord()
-        self.p_vaddr = f.readWord()
-        self.p_paddr = f.readWord()
-        self.p_filesz = f.readWord()
-        self.p_memsz = f.readWord()
-        if not f.is64Bit:
-            self.p_flags = f.read32()
-        self.p_align = f.readWord()
-
-    def dump(self):
-        print "  (('p_type', %s)" % common_dump.HexDump(self.p_type) 
-        print "   ('p_flags', %s)" % common_dump.HexDump(self.p_flags) 
-        print "   ('p_offset', %s)" % common_dump.HexDump(self.p_offset) 
-        print "   ('p_vaddr', %s)" % common_dump.HexDump(self.p_vaddr) 
-        print "   ('p_paddr', %s)" % common_dump.HexDump(self.p_paddr) 
-        print "   ('p_filesz', %s)" % common_dump.HexDump(self.p_filesz) 
-        print "   ('p_memsz', %s)" % common_dump.HexDump(self.p_memsz) 
-        print "   ('p_align', %s)" % common_dump.HexDump(self.p_align) 
-        print "  ),"
-
-class Section:
-    def __init__(self, f):
-        self.sh_name = f.read32()
-        self.sh_type = f.read32()
-        self.sh_flags = f.readWord()
-        self.sh_addr = f.readWord()
-        self.sh_offset = f.readWord()
-        self.sh_size = f.readWord()
-        self.sh_link = f.read32()
-        self.sh_info = f.read32()
-        self.sh_addralign = f.readWord()
-        self.sh_entsize = f.readWord()
-
-    def dump(self, shstrtab, f, strtab, dumpdata):
-        print "  (('sh_name', %s)" % common_dump.HexDump(self.sh_name), "# %r" % shstrtab[self.sh_name[0]]
-        print "   ('sh_type', %s)" % common_dump.HexDump(self.sh_type)
-        print "   ('sh_flags', %s)" % common_dump.HexDump(self.sh_flags)
-        print "   ('sh_addr', %s)" % common_dump.HexDump(self.sh_addr)
-        print "   ('sh_offset', %s)" % common_dump.HexDump(self.sh_offset)
-        print "   ('sh_size', %s)" % common_dump.HexDump(self.sh_size)
-        print "   ('sh_link', %s)" % common_dump.HexDump(self.sh_link)
-        print "   ('sh_info', %s)" % common_dump.HexDump(self.sh_info)
-        print "   ('sh_addralign', %s)" % common_dump.HexDump(self.sh_addralign)
-        print "   ('sh_entsize', %s)" % common_dump.HexDump(self.sh_entsize)
-        if self.sh_type[0] == 2: # SHT_SYMTAB
-            print "   ('_symbols', ["
-            dumpSymtab(f, self, strtab)
-            print "   ])"
-        elif self.sh_type[0] == 4 or self.sh_type[0] == 9: # SHT_RELA / SHT_REL
-            print "   ('_relocations', ["
-            dumpRel(f, self, self.sh_type[0] == 4)
-            print "   ])"
-        elif dumpdata:
-            f.seek(self.sh_offset[0])
-            if self.sh_type != 8: # != SHT_NOBITS
-                data = f.read(self.sh_size[0])
-                print "   ('_section_data', '%s')" % common_dump.dataToHex(data)
-            else:
-                print "   ('_section_data', '')" 
-        print "  ),"
-
-def dumpSymtab(f, section, strtab):
-    entries = section.sh_size[0] // section.sh_entsize[0]
-
-    for index in range(entries):
-        f.seek(section.sh_offset[0] + index * section.sh_entsize[0])
-        print "    # Symbol %s" % index
-        name = f.read32()
-        print "    (('st_name', %s)" % common_dump.HexDump(name), "# %r" % strtab[name[0]]
-        if not f.is64Bit:
-            print "     ('st_value', %s)" % common_dump.HexDump(f.read32())
-            print "     ('st_size', %s)" % common_dump.HexDump(f.read32())
-        st_info = f.read8()[0]
-        st_bind = (st_info >> 4, 4)
-        st_type = (st_info & 0xf, 4)
-        print "     ('st_bind', %s)" % common_dump.HexDump(st_bind)
-        print "     ('st_type', %s)" % common_dump.HexDump(st_type)
-        print "     ('st_other', %s)" % common_dump.HexDump(f.read8())
-        print "     ('st_shndx', %s)" % common_dump.HexDump(f.read16())
-        if f.is64Bit:
-            print "     ('st_value', %s)" % common_dump.HexDump(f.read64())
-            print "     ('st_size', %s)" % common_dump.HexDump(f.read64())
-        print "    ),"
-
-def dumpRel(f, section, dumprela = False):
-    entries = section.sh_size[0] // section.sh_entsize[0]
-
-    for index in range(entries):
-        f.seek(section.sh_offset[0] + index * section.sh_entsize[0])
-        print "    # Relocation %s" % index
-        print "    (('r_offset', %s)" % common_dump.HexDump(f.readWord())
-
-        if f.isN64:
-            r_sym =   f.read32()
-            r_ssym =  f.read8()
-            r_type3 = f.read8()
-            r_type2 = f.read8()
-            r_type =  f.read8()
-            print "     ('r_sym', %s)" % common_dump.HexDump(r_sym)
-            print "     ('r_ssym', %s)" % common_dump.HexDump(r_ssym)
-            print "     ('r_type3', %s)" % common_dump.HexDump(r_type3)
-            print "     ('r_type2', %s)" % common_dump.HexDump(r_type2)
-            print "     ('r_type', %s)" % common_dump.HexDump(r_type)
-        else:
-            r_info = f.readWord()[0]
-            if f.is64Bit:
-                r_sym = (r_info >> 32, 32)
-                r_type = (r_info & 0xffffffff, 32)
-            else:
-                r_sym = (r_info >> 8, 24)
-                r_type = (r_info & 0xff, 8)
-            print "     ('r_sym', %s)" % common_dump.HexDump(r_sym)
-            print "     ('r_type', %s)" % common_dump.HexDump(r_type)
-        if dumprela:
-            print "     ('r_addend', %s)" % common_dump.HexDump(f.readWord())
-        print "    ),"
-
-def dumpELF(path, opts):
-    f = Reader(path)
-
-    magic = f.read(4)
-    assert magic == '\x7FELF'
-
-    fileclass = f.read8()
-    if fileclass[0] == 1: # ELFCLASS32
-        f.is64Bit = False
-    elif fileclass[0] == 2: # ELFCLASS64
-        f.is64Bit = True
-    else:
-        raise ValueError, "Unknown file class %s" % common_dump.HexDump(fileclass)
-    print "('e_indent[EI_CLASS]', %s)" % common_dump.HexDump(fileclass)
-
-    byteordering = f.read8()
-    if byteordering[0] == 1: # ELFDATA2LSB
-        f.isLSB = True
-    elif byteordering[0] == 2: # ELFDATA2MSB
-        f.isLSB = False
-    else:
-        raise ValueError, "Unknown byte ordering %s" % common_dump.HexDump(byteordering)
-    print "('e_indent[EI_DATA]', %s)" % common_dump.HexDump(byteordering)
-
-    print "('e_indent[EI_VERSION]', %s)" % common_dump.HexDump(f.read8())
-    print "('e_indent[EI_OSABI]', %s)" % common_dump.HexDump(f.read8())
-    print "('e_indent[EI_ABIVERSION]', %s)" % common_dump.HexDump(f.read8())
-
-    f.seek(16) # Seek to end of e_ident.
-
-    print "('e_type', %s)" % common_dump.HexDump(f.read16())
-
-    # Does any other architecture use N64?
-    e_machine = f.read16()
-    if e_machine[0] == 0x0008 and f.is64Bit: # EM_MIPS && 64 bit
-        f.isN64 = True 
-    
-    print "('e_machine', %s)" % common_dump.HexDump(e_machine)
-    print "('e_version', %s)" % common_dump.HexDump(f.read32())
-    print "('e_entry', %s)" % common_dump.HexDump(f.readWord())
-    e_phoff = f.readWord()
-    print "('e_phoff', %s)" % common_dump.HexDump(e_phoff)
-    e_shoff = f.readWord()
-    print "('e_shoff', %s)" % common_dump.HexDump(e_shoff)
-    print "('e_flags', %s)" % common_dump.HexDump(f.read32())
-    print "('e_ehsize', %s)" % common_dump.HexDump(f.read16())
-    e_phentsize = f.read16()
-    print "('e_phentsize', %s)" % common_dump.HexDump(e_phentsize)
-    e_phnum = f.read16()
-    print "('e_phnum', %s)" % common_dump.HexDump(e_phnum)
-    e_shentsize = f.read16()
-    print "('e_shentsize', %s)" % common_dump.HexDump(e_shentsize)
-    e_shnum = f.read16()
-    print "('e_shnum', %s)" % common_dump.HexDump(e_shnum)
-    e_shstrndx = f.read16()
-    print "('e_shstrndx', %s)" % common_dump.HexDump(e_shstrndx)
-    
-
-    # Read all section headers
-    sections = []
-    for index in range(e_shnum[0]):
-        f.seek(e_shoff[0] + index * e_shentsize[0])
-        s = Section(f)
-        sections.append(s)
-
-    # Read .shstrtab so we can resolve section names
-    f.seek(sections[e_shstrndx[0]].sh_offset[0])
-    shstrtab = StringTable(f.read(sections[e_shstrndx[0]].sh_size[0]))
-
-    # Get the symbol string table
-    strtab = None
-    for section in sections:
-        if shstrtab[section.sh_name[0]] == ".strtab":
-            f.seek(section.sh_offset[0])
-            strtab = StringTable(f.read(section.sh_size[0]))
-            break
-
-    print "('_sections', ["
-    for index in range(e_shnum[0]):
-        print "  # Section %s" % index
-        sections[index].dump(shstrtab, f, strtab, opts.dumpSectionData)
-    print "])"
-
-    # Read all  program headers
-    headers = []
-    for index in range(e_phnum[0]):
-        f.seek(e_phoff[0] + index * e_phentsize[0])
-        h = ProgramHeader(f)
-        headers.append(h)
-
-    print "('_ProgramHeaders', ["
-    for index in range(e_phnum[0]):
-        print "  # Program Header %s" % index
-        headers[index].dump()
-    print "])"
-
-if __name__ == "__main__":
-    from optparse import OptionParser, OptionGroup
-    parser = OptionParser("usage: %prog [options] {files}")
-    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
-                      help="Dump the contents of sections",
-                      action="store_true", default=False)
-    (opts, args) = parser.parse_args()
-
-    if not args:
-        args.append('-')
-
-    for arg in args:
-        dumpELF(arg, opts)
diff --git a/test/Scripts/elf-dump.bat b/test/Scripts/elf-dump.bat
deleted file mode 100755
index 9c708083b302..000000000000
--- a/test/Scripts/elf-dump.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-@echo off
-
-@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
-@rem this in code, but I haven't found a way to do this in 2.6 yet.
-
-%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\elf-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
-
diff --git a/test/Scripts/ignore b/test/Scripts/ignore
deleted file mode 100755
index 865ae4df1bd4..000000000000
--- a/test/Scripts/ignore
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-#
-# Program: ignore
-#
-# Synopsis: Ignore the result code of the command and always return 0
-#
-# Syntax:   ignore command <arguments>
-
-"$@" || exit 0 && exit 0
-exit 0
diff --git a/test/Scripts/macho-dumpx b/test/Scripts/macho-dumpx
deleted file mode 100755
index 71e06d837b90..000000000000
--- a/test/Scripts/macho-dumpx
+++ /dev/null
@@ -1,294 +0,0 @@
-#!/usr/bin/env python
-
-import struct
-import sys
-import StringIO
-
-import common_dump
-
-class Reader:
-   def __init__(self, path):
-      if path == '-':
-         # Snarf all the data so we can seek.
-         self.file = StringIO.StringIO(sys.stdin.read())
-      else:
-         self.file = open(path,'rb')
-      self.isLSB = None
-      self.is64Bit = None
-
-      self.string_table = None
-
-   def tell(self):
-      return self.file.tell()
-
-   def seek(self, pos):
-      self.file.seek(pos)
-
-   def read(self, N):
-      data = self.file.read(N)
-      if len(data) != N:
-         raise ValueError,"Out of data!"
-      return data
-
-   def read8(self):
-      return ord(self.read(1))
-
-   def read16(self):
-      return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
-
-   def read32(self):
-      # Force to 32-bit, if possible; otherwise these might be long ints on a
-      # big-endian platform. FIXME: Why???
-      Value = struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
-      return int(Value)
-
-   def read64(self):
-      Value = struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
-      if Value == int(Value):
-         Value = int(Value)
-      return Value
-
-   def registerStringTable(self, strings):
-      if self.string_table is not None:
-         raise ValueError,"%s: warning: multiple string tables" % sys.argv[0]
-
-      self.string_table = strings
-
-   def getString(self, index):
-      if self.string_table is None:
-         raise ValueError,"%s: warning: no string table registered" % sys.argv[0]
-      
-      end = self.string_table.index('\x00', index)
-      return self.string_table[index:end]
-
-def dumpmacho(path, opts):
-   f = Reader(path)
-
-   magic = f.read(4)
-   if magic == '\xFE\xED\xFA\xCE':
-      f.isLSB, f.is64Bit = False, False
-   elif magic == '\xCE\xFA\xED\xFE':
-      f.isLSB, f.is64Bit = True, False
-   elif magic == '\xFE\xED\xFA\xCF':
-      f.isLSB, f.is64Bit = False, True
-   elif magic == '\xCF\xFA\xED\xFE':
-      f.isLSB, f.is64Bit = True, True
-   else:
-      raise ValueError,"Not a Mach-O object file: %r (bad magic)" % path
-
-   print "('cputype', %r)" % f.read32()
-   print "('cpusubtype', %r)" % f.read32()
-   filetype = f.read32()
-   print "('filetype', %r)" % filetype
-   
-   numLoadCommands = f.read32()
-   print "('num_load_commands', %r)" % numLoadCommands
-
-   loadCommandsSize = f.read32()
-   print "('load_commands_size', %r)" % loadCommandsSize
-
-   print "('flag', %r)" % f.read32()
-
-   if f.is64Bit:
-      print "('reserved', %r)" % f.read32()
-
-   start = f.tell()
-
-   print "('load_commands', ["
-   for i in range(numLoadCommands):
-      dumpLoadCommand(f, i, opts)
-   print "])"
-
-   if f.tell() - start != loadCommandsSize:
-      raise ValueError,"%s: warning: invalid load commands size: %r" % (
-         sys.argv[0], loadCommandsSize)
-
-def dumpLoadCommand(f, i, opts):
-   start = f.tell()
-
-   print "  # Load Command %r" % i
-   cmd = f.read32()
-   print " (('command', %r)" % cmd
-   cmdSize = f.read32()
-   print "  ('size', %r)" % cmdSize
-
-   if cmd == 1:
-      dumpSegmentLoadCommand(f, opts, False)
-   elif cmd == 2:
-      dumpSymtabCommand(f, opts)
-   elif cmd == 11:
-      dumpDysymtabCommand(f, opts)
-   elif cmd == 25:
-      dumpSegmentLoadCommand(f, opts, True)
-   elif cmd == 27:
-      import uuid
-      print "  ('uuid', %s)" % uuid.UUID(bytes=f.read(16))
-   else:
-      print >>sys.stderr,"%s: warning: unknown load command: %r" % (
-         sys.argv[0], cmd)
-      f.read(cmdSize - 8)
-   print " ),"
-
-   if f.tell() - start != cmdSize:
-      raise ValueError,"%s: warning: invalid load command size: %r" % (
-         sys.argv[0], cmdSize)
-
-def dumpSegmentLoadCommand(f, opts, is64Bit):
-   print "  ('segment_name', %r)" % f.read(16) 
-   if is64Bit:
-      print "  ('vm_addr', %r)" % f.read64()
-      print "  ('vm_size', %r)" % f.read64()
-      print "  ('file_offset', %r)" % f.read64()
-      print "  ('file_size', %r)" % f.read64()
-   else:
-      print "  ('vm_addr', %r)" % f.read32()
-      print "  ('vm_size', %r)" % f.read32()
-      print "  ('file_offset', %r)" % f.read32()
-      print "  ('file_size', %r)" % f.read32()
-   print "  ('maxprot', %r)" % f.read32()
-   print "  ('initprot', %r)" % f.read32()
-   numSections = f.read32()
-   print "  ('num_sections', %r)" % numSections
-   print "  ('flags', %r)" % f.read32()
-
-   print "  ('sections', ["
-   for i in range(numSections):
-      dumpSection(f, i, opts, is64Bit)
-   print "  ])"
-
-def dumpSymtabCommand(f, opts):
-   symoff = f.read32()
-   print "  ('symoff', %r)" % symoff
-   nsyms = f.read32()
-   print "  ('nsyms', %r)" % nsyms
-   stroff = f.read32()
-   print "  ('stroff', %r)" % stroff
-   strsize = f.read32()
-   print "  ('strsize', %r)" % strsize
-
-   prev_pos = f.tell()
-
-   f.seek(stroff)
-   string_data = f.read(strsize)
-   print "  ('_string_data', %r)" % string_data
-
-   f.registerStringTable(string_data)
-
-   f.seek(symoff)
-   print "  ('_symbols', ["
-   for i in range(nsyms):
-      dumpNlist32(f, i, opts)
-   print "  ])"
-      
-   f.seek(prev_pos)
-
-def dumpNlist32(f, i, opts):
-   print "    # Symbol %r" % i
-   n_strx = f.read32()
-   print "   (('n_strx', %r)" % n_strx
-   n_type = f.read8()
-   print "    ('n_type', %#x)" % n_type
-   n_sect = f.read8()
-   print "    ('n_sect', %r)" % n_sect
-   n_desc = f.read16()
-   print "    ('n_desc', %r)" % n_desc
-   if f.is64Bit:
-      n_value = f.read64()
-      print "    ('n_value', %r)" % n_value
-   else:
-      n_value = f.read32()
-      print "    ('n_value', %r)" % n_value
-   print "    ('_string', %r)" % f.getString(n_strx)
-   print "   ),"
-
-def dumpDysymtabCommand(f, opts):   
-   print "  ('ilocalsym', %r)" % f.read32()
-   print "  ('nlocalsym', %r)" % f.read32()
-   print "  ('iextdefsym', %r)" % f.read32()
-   print "  ('nextdefsym', %r)" % f.read32()
-   print "  ('iundefsym', %r)" % f.read32()
-   print "  ('nundefsym', %r)" % f.read32()
-   print "  ('tocoff', %r)" % f.read32()
-   print "  ('ntoc', %r)" % f.read32()
-   print "  ('modtaboff', %r)" % f.read32()
-   print "  ('nmodtab', %r)" % f.read32()
-   print "  ('extrefsymoff', %r)" % f.read32()
-   print "  ('nextrefsyms', %r)" % f.read32()
-   indirectsymoff = f.read32()
-   print "  ('indirectsymoff', %r)" % indirectsymoff
-   nindirectsyms = f.read32()
-   print "  ('nindirectsyms', %r)" % nindirectsyms
-   print "  ('extreloff', %r)" % f.read32()
-   print "  ('nextrel', %r)" % f.read32()
-   print "  ('locreloff', %r)" % f.read32()
-   print "  ('nlocrel', %r)" % f.read32()
-
-   prev_pos = f.tell()
-
-   f.seek(indirectsymoff)
-   print "  ('_indirect_symbols', ["
-   for i in range(nindirectsyms):
-      print "    # Indirect Symbol %r" % i
-      print "    (('symbol_index', %#x),)," % f.read32()
-   print "  ])"
-      
-   f.seek(prev_pos)
-
-def dumpSection(f, i, opts, is64Bit):
-   print "    # Section %r" % i
-   print "   (('section_name', %r)" % f.read(16)
-   print "    ('segment_name', %r)" % f.read(16)
-   if is64Bit:
-      print "    ('address', %r)" % f.read64()
-      size = f.read64()
-      print "    ('size', %r)" % size
-   else:
-      print "    ('address', %r)" % f.read32()
-      size = f.read32()
-      print "    ('size', %r)" % size
-   offset = f.read32()
-   print "    ('offset', %r)" % offset
-   print "    ('alignment', %r)" % f.read32()   
-   reloc_offset = f.read32()
-   print "    ('reloc_offset', %r)" % reloc_offset
-   num_reloc = f.read32()
-   print "    ('num_reloc', %r)" % num_reloc
-   print "    ('flags', %#x)" % f.read32()
-   print "    ('reserved1', %r)" % f.read32()
-   print "    ('reserved2', %r)" % f.read32()
-   if is64Bit:
-      print "    ('reserved3', %r)" % f.read32()
-   print "   ),"
-
-   prev_pos = f.tell()
-
-   f.seek(reloc_offset)
-   print "  ('_relocations', ["
-   for i in range(num_reloc):
-      print "    # Relocation %r" % i
-      print "    (('word-0', %#x)," % f.read32()
-      print "     ('word-1', %#x))," % f.read32()
-   print "  ])"
-
-   if opts.dumpSectionData:
-      f.seek(offset)
-      print "  ('_section_data', '%s')" % common_dump.dataToHex(f.read(size))
-      
-   f.seek(prev_pos)
-   
-def main():
-    from optparse import OptionParser, OptionGroup
-    parser = OptionParser("usage: %prog [options] {files}")
-    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
-                      help="Dump the contents of sections",
-                      action="store_true", default=False)    
-    (opts, args) = parser.parse_args()
-
-    if not args:
-       args.append('-')
-
-    for arg in args:
-       dumpmacho(arg, opts)
-
-if __name__ == '__main__':
-   main()
diff --git a/test/Scripts/macho-dumpx.bat b/test/Scripts/macho-dumpx.bat
deleted file mode 100644
index 81484f67d702..000000000000
--- a/test/Scripts/macho-dumpx.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-@echo off
-
-@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
-@rem this in code, but I haven't found a way to do this in 2.6 yet.
-
-%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\macho-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
-
diff --git a/test/TableGen/Dag.td b/test/TableGen/Dag.td
index 40399a48ee2f..14d616b52173 100644
--- a/test/TableGen/Dag.td
+++ b/test/TableGen/Dag.td
@@ -70,3 +70,15 @@ def VAL4 : bar<foo2, somedef2>;
 // CHECK-NEXT:  dag Dag3 = (somedef2 2);
 // CHECK-NEXT:  NAME = ?
 // CHECK-NEXT: }
+
+def VAL5 : bar<foo2, somedef2> {
+  // Named operands.
+  let Dag1 = (somedef1 1:$name1);
+
+  // Name, no node.
+  let Dag2 = (somedef2 $name2, $name3);
+}
+
+// CHECK:      def VAL5 {
+// CHECK-NEXT:  dag Dag1 = (somedef1 1:$name1);
+// CHECK-NEXT:  dag Dag2 = (somedef2 ?:$name2, ?:$name3);
diff --git a/test/Transforms/ArgumentPromotion/crash.ll b/test/Transforms/ArgumentPromotion/crash.ll
index f70d8de60ee3..5e1a0370dbb1 100644
--- a/test/Transforms/ArgumentPromotion/crash.ll
+++ b/test/Transforms/ArgumentPromotion/crash.ll
@@ -1,7 +1,5 @@
-; rdar://7879828
 ; RUN: opt -inline -argpromotion < %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
+; rdar://7879828
 
 define void @foo() {
   invoke void @foo2()
@@ -11,6 +9,8 @@ if.end432:
   unreachable
 
 for.end520: 
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+           cleanup
   unreachable
 }
 
@@ -57,3 +57,5 @@ init:
   %4 = call i32 @"clay_assign(Chain, Chain)"(%0* %3, %0* %1)
   ret i32 0
 }
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll
index 493f23b09853..bbf565d1cc7f 100644
--- a/test/Transforms/BBVectorize/X86/loop1.ll
+++ b/test/Transforms/BBVectorize/X86/loop1.ll
@@ -34,7 +34,15 @@ for.body:                                         ; preds = %for.body, %entry
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 10
   br i1 %exitcond, label %for.end, label %for.body
-; CHECK-NOT: <2 x double>
+; CHECK: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fadd <2 x double>
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fadd <2 x double>
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fmul <2 x double>
+
 ; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
 ; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
 ; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll
index 0113e38bb1c9..8abfa5f8bd29 100644
--- a/test/Transforms/BBVectorize/X86/simple.ll
+++ b/test/Transforms/BBVectorize/X86/simple.ll
@@ -12,7 +12,11 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
 	%R  = fmul double %Z1, %Z2
 	ret double %R
 ; CHECK: @test1
-; CHECK-NOT: fmul <2 x double>
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
+; CHECK: fadd <2 x double>
+; CHECK: extract
+; CHECK: extract
 ; CHECK: ret double %R
 }
 
@@ -63,7 +67,12 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
 	%R  = fmul double %Z1, %Z2
 	ret double %R
 ; CHECK: @test2
-; CHECK-NOT: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
 ; CHECK: ret double %R
 }
 
@@ -80,7 +89,15 @@ define double @test4(double %A1, double %A2, double %B1, double %B2) {
 	%R  = fmul double %Z1, %Z2
 	ret double %R
 ; CHECK: @test4
-; CHECK-NOT: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fadd <2 x double>
 ; CHECK: ret double %R
 }
 
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index 351925b9e443..24448b7009ed 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -36,17 +36,17 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !8, metadata !9}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
+!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
+!8 = metadata !{i32 786478, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
 
 ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
 
-!9 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
+!9 = metadata !{i32 786478, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
 
 ; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv
 
diff --git a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
similarity index 91%
rename from test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
rename to test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
index 16791e20f4a8..d414b73524fd 100644
--- a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
+++ b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; RUN: opt < %s -functionattrs -S | FileCheck %s
 
 ; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) #0
 declare i8* @fopen(i8*, i8*)
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/FunctionAttrs/annotate-1.ll
new file mode 100644
index 000000000000..ae77380acc4a
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/annotate-1.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+declare i8* @fopen(i8*, i8*)
+; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) [[G0:#[0-9]]] 
+
+declare i8 @strlen(i8*)
+; CHECK: declare i8 @strlen(i8* nocapture) [[G1:#[0-9]]]
+
+declare i32* @realloc(i32*, i32)
+; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) [[G0]]
+
+; Test deliberately wrong declaration
+
+declare i32 @strcpy(...)
+; CHECK: declare i32 @strcpy(...)
+
+; CHECK: attributes [[G0]] = { nounwind }
+; CHECK: attributes [[G1]] = { nounwind readonly }
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
new file mode 100644
index 000000000000..d1bce728e08c
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -0,0 +1,27 @@
+; RUN: echo '!9 = metadata !{metadata !"%T/linkagename.ll", metadata !0}' > %t1
+; RUN: cat %s %t1 > %t2
+; RUN: opt -insert-gcov-profiling -disable-output < %t2
+; RUN: grep _Z3foov %T/linkagename.gcno
+; RUN: rm %T/linkagename.gcno
+
+; REQUIRES: shell
+
+define void @_Z3foov() {
+entry:
+  ret void, !dbg !8
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.gcov = !{!9}
+
+!0 = metadata !{i32 786449, i32 4, metadata !1, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
+!2 = metadata !{metadata !"hello.cc", metadata !"/home/nlewycky"}
+!3 = metadata !{i32 0}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 1, i32 0, metadata !5, null}
+
+
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index efd633af6d26..d6d0f3314c26 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -1,17 +1,19 @@
-; RUN: echo '!9 = metadata !{metadata !"%s", metadata !0}' > %t1
+; RUN: echo '!9 = metadata !{metadata !"%T/version.ll", metadata !0}' > %t1
 ; RUN: cat %s %t1 > %t2
 ; RUN: opt -insert-gcov-profiling -disable-output < %t2
-; RUN: head -c12 %S/version.gcno | grep '^oncg\*204MVLL$'
-; RUN: rm %S/version.gcno
+; RUN: head -c12 %T/version.gcno | grep '^oncg\*204MVLL$'
+; RUN: rm %T/version.gcno
 ; RUN: not opt -insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2
 ; RUN: opt -insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2
-; RUN: head -c12 %S/version.gcno | grep '^oncg\*704MVLL$'
-; RUN: rm %S/version.gcno
+; RUN: head -c12 %T/version.gcno | grep '^oncg\*704MVLL$'
+; RUN: rm %T/version.gcno
 
 define void @test() {
   ret void, !dbg !8
 }
 
+; REQUIRES: shell
+
 !llvm.gcov = !{!9}
 !llvm.dbg.cu = !{!0}
 
diff --git a/test/Transforms/GlobalDCE/complex-constantexpr.ll b/test/Transforms/GlobalDCE/complex-constantexpr.ll
new file mode 100644
index 000000000000..4bf1aeee709b
--- /dev/null
+++ b/test/Transforms/GlobalDCE/complex-constantexpr.ll
@@ -0,0 +1,97 @@
+; RUN: opt -O2 -disable-output < %s
+; PR15714
+
+%struct.ham = type { i32 }
+
+@global5 = common global i32 0, align 4
+@global6 = common global i32 0, align 4
+@global7 = common global i32 0, align 4
+@global = common global i32 0, align 4
+@global8 = common global %struct.ham zeroinitializer, align 4
+@global9 = common global i32 0, align 4
+@global10 = common global i32 0, align 4
+@global11 = common global i32 0, align 4
+
+define void @zot12() {
+bb:
+  store i32 0, i32* @global5, align 4
+  store i32 0, i32* @global6, align 4
+  br label %bb2
+
+bb1:                                              ; preds = %bb11
+  %tmp = load i32* @global5, align 4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp3 = phi i32 [ %tmp, %bb1 ], [ 0, %bb ]
+  %tmp4 = xor i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32)
+  store i32 %tmp4, i32* @global5, align 4
+  %tmp5 = icmp eq i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32)
+  br i1 %tmp5, label %bb8, label %bb6
+
+bb6:                                              ; preds = %bb2
+  %tmp7 = tail call i32 @quux13()
+  br label %bb8
+
+bb8:                                              ; preds = %bb6, %bb2
+  %tmp9 = load i32* @global7, align 4
+  %tmp10 = icmp eq i32 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb15
+
+bb11:                                             ; preds = %bb8
+  %tmp12 = load i32* @global6, align 4
+  %tmp13 = add nsw i32 %tmp12, 1
+  store i32 %tmp13, i32* @global6, align 4
+  %tmp14 = icmp slt i32 %tmp13, 42
+  br i1 %tmp14, label %bb1, label %bb15
+
+bb15:                                             ; preds = %bb11, %bb8
+  ret void
+}
+
+define i32 @quux13() {
+bb:
+  store i32 1, i32* @global5, align 4
+  ret i32 1
+}
+
+define void @wombat() {
+bb:
+  tail call void @zot12()
+  ret void
+}
+
+define void @wombat14() {
+bb:
+  tail call void @blam()
+  ret void
+}
+
+define void @blam() {
+bb:
+  store i32 ptrtoint (i32* @global to i32), i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
+  store i32 0, i32* @global9, align 4
+  %tmp = load i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i32 [ 0, %bb ], [ %tmp11, %bb1 ]
+  %tmp3 = phi i32 [ %tmp, %bb ], [ %tmp10, %bb1 ]
+  %tmp4 = icmp sgt i32 %tmp3, 0
+  %tmp5 = zext i1 %tmp4 to i32
+  %tmp6 = urem i32 %tmp5, 5
+  %tmp7 = mul i32 %tmp3, -80
+  %tmp8 = or i32 %tmp7, %tmp6
+  %tmp9 = icmp eq i32 %tmp8, 0
+  %tmp10 = zext i1 %tmp9 to i32
+  %tmp11 = add nsw i32 %tmp2, 1
+  %tmp12 = icmp eq i32 %tmp11, 20
+  br i1 %tmp12, label %bb13, label %bb1
+
+bb13:                                             ; preds = %bb1
+  store i32 %tmp10, i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
+  store i32 0, i32* @global10, align 4
+  store i32 %tmp6, i32* @global11, align 4
+  store i32 20, i32* @global9, align 4
+  ret void
+}
diff --git a/test/Transforms/GlobalDCE/indirectbr.ll b/test/Transforms/GlobalDCE/indirectbr.ll
new file mode 100644
index 000000000000..90f1ae44b1ac
--- /dev/null
+++ b/test/Transforms/GlobalDCE/indirectbr.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -globaldce < %s | FileCheck %s
+
+@L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@test1, %L1), i8* blockaddress(@test1, %L2), i8* null], align 16
+
+; CHECK: @L = internal unnamed_addr constant
+
+define void @test1(i32 %idx) {
+entry:
+  br label %L1
+
+L1:
+  %arrayidx = getelementptr inbounds [3 x i8*]* @L, i32 0, i32 %idx
+  %l = load i8** %arrayidx
+  indirectbr i8* %l, [label %L1, label %L2]
+
+L2:
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/crash-2.ll b/test/Transforms/GlobalOpt/crash-2.ll
new file mode 100644
index 000000000000..684f6cee180b
--- /dev/null
+++ b/test/Transforms/GlobalOpt/crash-2.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | opt -globalopt -disable-output
+; NOTE: This needs to run through 'llvm-as' first to reproduce the error!
+; PR15440
+
+%union.U5.0.6.12 = type { i32 }
+%struct.S0.1.7.13 = type { i8, i8, i8, i8, i16, [2 x i8] }
+%struct.S1.2.8.14 = type { i32, i16, i8, i8 }
+
+@.str = external unnamed_addr constant [2 x i8], align 1
+@g_25 = external global i8, align 1
+@g_71 = internal global %struct.S0.1.7.13 { i8 1, i8 -93, i8 58, i8 -1, i16 -5, [2 x i8] undef }, align 4
+@g_114 = external global i8, align 1
+@g_30 = external global { i32, i8, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }, align 4
+@g_271 = internal global [7 x [6 x [5 x i8*]]] [[6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* null, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* @g_114, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* @g_25, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* null, i8* @g_25, i8* @g_25, i8* @g_25, i8* null], [5 x i8*] [i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* null, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* @g_25], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25], [5 x i8*] [i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_114, i8* @g_25, i8* @g_25, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_114], [5 x i8*] [i8* @g_25, i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* null], [5 x i8*] [i8* @g_114, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_25]], [6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_114, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_25, i8* @g_25, i8* @g_114], [5 x i8*] [i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25]]], align 4
+
+define i32 @func() {
+  %tmp = load i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), align 1
+  ret i32 0
+}
diff --git a/test/Transforms/IndVarSimplify/dont-recompute.ll b/test/Transforms/IndVarSimplify/dont-recompute.ll
new file mode 100644
index 000000000000..d37b0e21f826
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/dont-recompute.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+; This tests that the IV is not recomputed outside of the loop when it is known
+; to be computed by the loop and used in the loop any way. In the example below
+; although a's value can be computed outside of the loop, there is no benefit
+; in doing so as it has to be computed by the loop anyway.
+;
+; extern void func(unsigned val);
+;
+; void test(unsigned m)
+; {
+;   unsigned a = 0;
+;
+;   for (int i=0; i<186; i++) {
+;     a += m;
+;     func(a);
+;   }
+;
+;   func(a);
+; }
+
+declare void @func(i32)
+
+; CHECK: @test
+define void @test(i32 %m) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %add = add i32 %a.05, %m
+; CHECK: tail call void @func(i32 %add)
+  tail call void @func(i32 %add)
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 186
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+; CHECK: for.end:
+; CHECK-NOT: mul i32 %m, 186
+; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT: tail call void @func(i32 %add.lcssa)
+  tail call void @func(i32 %add)
+  ret void
+}
+
+; CHECK: @test2
+define i32 @test2(i32 %m) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %add = add i32 %a.05, %m
+; CHECK: tail call void @func(i32 %add)
+  tail call void @func(i32 %add)
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 186
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+; CHECK: for.end:
+; CHECK-NOT: mul i32 %m, 186
+; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT: ret i32 %add.lcssa
+  ret i32 %add
+}
diff --git a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
index 5ced3b8e8da9..b8ca56050dca 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
@@ -3,10 +3,15 @@
 define i32 @main() {
 entry:
         invoke void @__main( )
-                        to label %LongJmpBlkPre unwind label %LongJmpBlkPre
+                        to label %LongJmpBlkPost unwind label %LongJmpBlkPre
 
-LongJmpBlkPre:          ; preds = %entry, %entry
+LongJmpBlkPost:
+        ret i32 0
+
+LongJmpBlkPre:
         %i.3 = phi i32 [ 0, %entry ], [ 0, %entry ]             ; <i32> [#uses=0]
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
         ret i32 0
 }
 
@@ -14,3 +19,4 @@ define void @__main() {
         ret void
 }
 
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
index 1bd55299a901..43bdd309c987 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
@@ -13,6 +13,8 @@ LJDecisionBB:           ; preds = %else
         br label %else
 
 RethrowExcept:          ; preds = %entry
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
         ret i32 0
 }
 
@@ -20,4 +22,4 @@ define void @__main() {
         ret void
 }
 
-
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index c53bb5aa17be..c3941388f937 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -96,6 +96,7 @@ eh.resume:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A]])
 ; CHECK-NEXT:   to label %[[LBL:[^\s]+]] unwind
 ; CHECK: [[LBL]]:
@@ -166,6 +167,7 @@ eh.resume:
 ; CHECK-NEXT: [[LPADVAL1:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A1]])
 ; CHECK-NEXT:   to label %[[RESUME1:[^\s]+]] unwind
 ; CHECK: [[RESUME1]]:
@@ -185,6 +187,7 @@ eh.resume:
 ; CHECK-NEXT: [[LPADVAL2:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:   cleanup
 ; CHECK-NEXT:   catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:   catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A2]])
 ; CHECK-NEXT:   to label %[[RESUME2:[^\s]+]] unwind
 ; CHECK: [[RESUME2]]:
@@ -272,6 +275,7 @@ lpad.cont:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(
 ; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
 ; CHECK:    [[L]]:
@@ -318,6 +322,7 @@ terminate:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(
 ; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
 ; CHECK:    [[L]]:
diff --git a/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll b/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll
new file mode 100644
index 000000000000..b66495d9cbaa
--- /dev/null
+++ b/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -inline -instcombine -functionattrs | llvm-dis
+;
+; Check that nocapture attributes are added when run after an SCC pass.
+; PR3520
+
+define i32 @use(i8* %x) nounwind readonly {
+; CHECK: @use(i8* nocapture %x)
+  %1 = tail call i64 @strlen(i8* %x) nounwind readonly
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
+declare i64 @strlen(i8*) nounwind readonly
+; CHECK: declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/Transforms/InstCombine/and-fcmp.ll b/test/Transforms/InstCombine/and-fcmp.ll
index 40c44c09a8c0..a398307f869e 100644
--- a/test/Transforms/InstCombine/and-fcmp.ll
+++ b/test/Transforms/InstCombine/and-fcmp.ll
@@ -77,3 +77,24 @@ define zeroext i8 @t7(float %x, float %y) nounwind {
 ; CHECK: fcmp uno
 ; CHECK-NOT: fcmp ult
 }
+
+; PR15737
+define i1 @t8(float %a, double %b) {
+  %cmp = fcmp ord float %a, 0.000000e+00
+  %cmp1 = fcmp ord double %b, 0.000000e+00
+  %and = and i1 %cmp, %cmp1
+  ret i1 %and
+; CHECK: t8
+; CHECK: fcmp ord
+; CHECK: fcmp ord
+}
+
+define <2 x i1> @t9(<2 x float> %a, <2 x double> %b) {
+  %cmp = fcmp ord <2 x float> %a, zeroinitializer
+  %cmp1 = fcmp ord <2 x double> %b, zeroinitializer
+  %and = and <2 x i1> %cmp, %cmp1
+  ret <2 x i1> %and
+; CHECK: t9
+; CHECK: fcmp ord
+; CHECK: fcmp ord
+}
diff --git a/test/Transforms/InstCombine/apint-shift-simplify.ll b/test/Transforms/InstCombine/apint-shift-simplify.ll
index 818ae6659b26..14e895ad4bf6 100644
--- a/test/Transforms/InstCombine/apint-shift-simplify.ll
+++ b/test/Transforms/InstCombine/apint-shift-simplify.ll
@@ -1,11 +1,14 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    egrep "shl|lshr|ashr" | count 3
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i41 @test0(i41 %A, i41 %B, i41 %C) {
 	%X = shl i41 %A, %C
 	%Y = shl i41 %B, %C
 	%Z = and i41 %X, %Y
 	ret i41 %Z
+; CHECK: @test0
+; CHECK-NEXT: and i41 %A, %B
+; CHECK-NEXT: shl i41
+; CHECK-NEXT: ret
 }
 
 define i57 @test1(i57 %A, i57 %B, i57 %C) {
@@ -13,6 +16,10 @@ define i57 @test1(i57 %A, i57 %B, i57 %C) {
 	%Y = lshr i57 %B, %C
 	%Z = or i57 %X, %Y
 	ret i57 %Z
+; CHECK: @test1
+; CHECK-NEXT: or i57 %A, %B
+; CHECK-NEXT: lshr i57
+; CHECK-NEXT: ret
 }
 
 define i49 @test2(i49 %A, i49 %B, i49 %C) {
@@ -20,4 +27,8 @@ define i49 @test2(i49 %A, i49 %B, i49 %C) {
 	%Y = ashr i49 %B, %C
 	%Z = xor i49 %X, %Y
 	ret i49 %Z
+; CHECK: @test2
+; CHECK-NEXT: xor i49 %A, %B
+; CHECK-NEXT: ashr i49
+; CHECK-NEXT: ret
 }
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
new file mode 100644
index 000000000000..4ded581a14c6
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; These tests are extracted from bitcast.ll.
+; Verify that they also work correctly on big-endian targets.
+
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64  ; <i64> [#uses=2]
+  %tmp23 = trunc i64 %tmp28 to i32                ; <i32> [#uses=1]
+  %tmp24 = bitcast i32 %tmp23 to float            ; <float> [#uses=1]
+
+  %tmp = bitcast <2 x i32> %B to i64
+  %tmp2 = trunc i64 %tmp to i32                ; <i32> [#uses=1]
+  %tmp4 = bitcast i32 %tmp2 to float            ; <float> [#uses=1]
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+
+; CHECK: @test2
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
+; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <2 x float> {{.*}}, i32 1
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}
+
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+
+; CHECK: @test3
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 1
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}
+
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index e7fe71de3e4f..cdbcd865117c 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -31,7 +31,7 @@ entry:
 !llvm.dbg.cu = !{!3}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, i32 0, i32 12, metadata !26, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 47f1ec480468..edcbcc71dfb4 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -130,6 +130,16 @@ define double @fail2(double %f1, double %f2) {
 ; CHECK: ret
 }
 
+; c1 * x - x => (c1 - 1.0) * x
+define float @fold13(float %x) {
+  %mul = fmul fast float %x, 7.000000e+00
+  %sub = fsub fast float %mul, %x
+  ret float %sub
+; CHECK: fold13
+; CHECK: fmul fast float %x, 6.000000e+00
+; CHECK: ret
+}
+
 ; =========================================================================
 ;
 ;   Testing-cases about fmul begin
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index 39d86b4588cc..e1dc191bd700 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -78,3 +78,12 @@ define void @test_no_simplify2(%FILE* %fp, double %d) {
   ret void
 ; CHECK-NEXT: ret void
 }
+
+define i32 @test_no_simplify3(%FILE* %fp) {
+; CHECK: @test_no_simplify3
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  %1 = call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
+  ret i32 %1
+; CHECK-NEXT: ret i32 %1
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 331eb3f21a9b..c912a576c3d2 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -744,3 +744,235 @@ define i1 @icmp_shl24(i32 %x) {
   %cmp = icmp slt i32 %shl, 603979776
   ret i1 %cmp
 }
+
+; If the (shl x, C) preserved the sign and this is a sign test,
+; compare the LHS operand instead
+; CHECK: @icmp_shl_nsw_sgt
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_shl_nsw_sgt(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sgt i32 %shl, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_sge0
+; CHECK-NEXT: icmp sgt i32 %x, -1
+define i1 @icmp_shl_nsw_sge0(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sge i32 %shl, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_sge1
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_shl_nsw_sge1(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sge i32 %shl, 1
+  ret i1 %cmp
+}
+
+; Checks for icmp (eq|ne) (shl x, C), 0
+; CHECK: @icmp_shl_nsw_eq
+; CHECK-NEXT: icmp eq i32 %x, 0
+define i1 @icmp_shl_nsw_eq(i32 %x) {
+  %mul = shl nsw i32 %x, 5
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_eq
+; CHECK-NOT: icmp eq i32 %mul, 0
+define i1 @icmp_shl_eq(i32 %x) {
+  %mul = shl i32 %x, 5
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_ne
+; CHECK-NEXT: icmp ne i32 %x, 0
+define i1 @icmp_shl_nsw_ne(i32 %x) {
+  %mul = shl nsw i32 %x, 7
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_ne
+; CHECK-NOT: icmp ne i32 %x, 0
+define i1 @icmp_shl_ne(i32 %x) {
+  %mul = shl i32 %x, 7
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; If the (mul x, C) preserved the sign and this is sign test,
+; compare the LHS operand instead
+; CHECK: @icmp_mul_nsw
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_mul_nsw(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp sgt i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw1
+; CHECK-NEXT: icmp slt i32 %x, 0
+define i1 @icmp_mul_nsw1(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp sle i32 %mul, -1
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_neg
+; CHECK-NEXT: icmp slt i32 %x, 1
+define i1 @icmp_mul_nsw_neg(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp sge i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_neg1
+; CHECK-NEXT: icmp slt i32 %x, 0
+define i1 @icmp_mul_nsw_neg1(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp sge i32 %mul, 1
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_0
+; CHECK-NOT: icmp sgt i32 %x, 0
+define i1 @icmp_mul_nsw_0(i32 %x) {
+  %mul = mul nsw i32 %x, 0
+  %cmp = icmp sgt i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul
+; CHECK-NEXT: %mul = mul i32 %x, -12
+define i1 @icmp_mul(i32 %x) {
+  %mul = mul i32 %x, -12
+  %cmp = icmp sge i32 %mul, 0
+  ret i1 %cmp
+}
+
+; Checks for icmp (eq|ne) (mul x, C), 0
+; CHECK: @icmp_mul_neq0
+; CHECK-NEXT: icmp ne i32 %x, 0
+define i1 @icmp_mul_neq0(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_eq0
+; CHECK-NEXT: icmp eq i32 %x, 0
+define i1 @icmp_mul_eq0(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul0_eq0
+; CHECK-NEXT: ret i1 true
+define i1 @icmp_mul0_eq0(i32 %x) {
+  %mul = mul i32 %x, 0
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul0_ne0
+; CHECK-NEXT: ret i1 false
+define i1 @icmp_mul0_ne0(i32 %x) {
+  %mul = mul i32 %x, 0
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sub1_sge
+; CHECK-NEXT: icmp sgt i32 %x, %y
+define i1 @icmp_sub1_sge(i32 %x, i32 %y) {
+  %sub = add nsw i32 %x, -1
+  %cmp = icmp sge i32 %sub, %y
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add1_sgt
+; CHECK-NEXT: icmp sge i32 %x, %y
+define i1 @icmp_add1_sgt(i32 %x, i32 %y) {
+  %add = add nsw i32 %x, 1
+  %cmp = icmp sgt i32 %add, %y
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sub1_slt
+; CHECK-NEXT: icmp sle i32 %x, %y
+define i1 @icmp_sub1_slt(i32 %x, i32 %y) {
+  %sub = add nsw i32 %x, -1
+  %cmp = icmp slt i32 %sub, %y
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add1_sle
+; CHECK-NEXT: icmp slt i32 %x, %y
+define i1 @icmp_add1_sle(i32 %x, i32 %y) {
+  %add = add nsw i32 %x, 1
+  %cmp = icmp sle i32 %add, %y
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add20_sge_add57
+; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add nsw i32 %y, 37
+; CHECK-NEXT: icmp sle i32 [[ADD]], %x
+define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
+  %1 = add nsw i32 %x, 20
+  %2 = add nsw i32 %y, 57
+  %cmp = icmp sge i32 %1, %2
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sub57_sge_sub20
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = add nsw i32 %x, -37
+; CHECK-NEXT: icmp sge i32 [[SUB]], %y
+define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) {
+  %1 = add nsw i32 %x, -57
+  %2 = add nsw i32 %y, -20
+  %cmp = icmp sge i32 %1, %2
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_and_shl_neg_ne_0
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 1, %B
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], %A
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_shl_neg_ne_0(i32 %A, i32 %B) {
+  %neg = xor i32 %A, -1
+  %shl = shl i32 1, %B
+  %and = and i32 %shl, %neg
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_and_shl_neg_eq_0
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 1, %B
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], %A
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_shl_neg_eq_0(i32 %A, i32 %B) {
+  %neg = xor i32 %A, -1
+  %shl = shl i32 1, %B
+  %and = and i32 %shl, %neg
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add_and_shr_ne_0
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, 240
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_add_and_shr_ne_0(i32 %X) {
+  %shr = lshr i32 %X, 4
+  %and = and i32 %shr, 15
+  %add = add i32 %and, -14
+  %tobool = icmp ne i32 %add, 0
+  ret i1 %tobool
+}
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 5cafb7787e36..869215cb58d4 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -47,6 +47,18 @@ define i1 @test4(i32 %X) {
 ; CHECK-NEXT: ret i1 %R
 }
 
+define i1 @test4_i16(i16 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i16 %X
+  %Q = load i16* %P
+  %R = icmp sle i16 %Q, 73
+  ret i1 %R
+; CHECK: @test4_i16
+; CHECK-NEXT: lshr i16 933, %X
+; CHECK-NEXT: and i16 {{.*}}, 1
+; CHECK-NEXT: %R = icmp ne i16 {{.*}}, 0
+; CHECK-NEXT: ret i1 %R
+}
+
 define i1 @test5(i32 %X) {
   %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
   %Q = load i16* %P
@@ -88,8 +100,8 @@ define i1 @test8(i32 %X) {
   %S = icmp eq i16 %R, 0
   ret i1 %S
 ; CHECK: @test8
-; CHECK-NEXT: add i32 %X, -8
-; CHECK-NEXT: icmp ult i32 {{.*}}, 2
+; CHECK-NEXT: and i32 %X, -2
+; CHECK-NEXT: icmp eq i32 {{.*}}, 8
 ; CHECK-NEXT: ret i1
 }
 
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 0ead9d123749..31a3cb46e459 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -256,131 +256,3 @@ xpto:
 return:
   ret i32 7
 }
-
-declare noalias i8* @valloc(i32) nounwind
-
-; CHECK: @test14
-; CHECK: ret i32 6
-define i32 @test14(i32 %a) nounwind {
-  switch i32 %a, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %call = tail call noalias i8* @malloc(i32 6) nounwind
-  br label %sw.epilog
-
-sw.bb1:
-  %call2 = tail call noalias i8* @calloc(i32 3, i32 2) nounwind
-  br label %sw.epilog
-
-sw.default:
-  %call3 = tail call noalias i8* @valloc(i32 6) nounwind
-  br label %sw.epilog
-
-sw.epilog:
-  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
-  ret i32 %1
-}
-
-; CHECK: @test15
-; CHECK: llvm.objectsize
-define i32 @test15(i32 %a) nounwind {
-  switch i32 %a, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %call = tail call noalias i8* @malloc(i32 3) nounwind
-  br label %sw.epilog
-
-sw.bb1:
-  %call2 = tail call noalias i8* @calloc(i32 2, i32 1) nounwind
-  br label %sw.epilog
-
-sw.default:
-  %call3 = tail call noalias i8* @valloc(i32 3) nounwind
-  br label %sw.epilog
-
-sw.epilog:
-  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
-  ret i32 %1
-}
-
-; CHECK: @test16
-; CHECK: llvm.objectsize
-define i32 @test16(i8* %a, i32 %n) nounwind {
-  %b = alloca [5 x i8], align 1
-  %c = alloca [5 x i8], align 1
-  switch i32 %n, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %bp = bitcast [5 x i8]* %b to i8*
-  br label %sw.epilog
-
-sw.bb1:
-  %cp = bitcast [5 x i8]* %c to i8*
-  br label %sw.epilog
-
-sw.default:
-  br label %sw.epilog
-
-sw.epilog:
-  %phi = phi i8* [ %a, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
-  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
-  ret i32 %sz
-}
-
-; CHECK: @test17
-; CHECK: ret i32 5
-define i32 @test17(i32 %n) nounwind {
-  %b = alloca [5 x i8], align 1
-  %c = alloca [5 x i8], align 1
-  %bp = bitcast [5 x i8]* %b to i8*
-  switch i32 %n, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  br label %sw.epilog
-
-sw.bb1:
-  %cp = bitcast [5 x i8]* %c to i8*
-  br label %sw.epilog
-
-sw.default:
-  br label %sw.epilog
-
-sw.epilog:
-  %phi = phi i8* [ %bp, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
-  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
-  ret i32 %sz
-}
-
-@globalalias = alias internal [60 x i8]* @a
-
-; CHECK: @test18
-; CHECK-NEXT: ret i32 60
-define i32 @test18() {
-  %bc = bitcast [60 x i8]* @globalalias to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
-  ret i32 %1
-}
-
-@globalalias2 = alias weak [60 x i8]* @a
-
-; CHECK: @test19
-; CHECK: llvm.objectsize
-define i32 @test19() {
-  %bc = bitcast [60 x i8]* @globalalias2 to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
-  ret i32 %1
-}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index bde2a54048ad..7226bd93996f 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -178,12 +178,12 @@ define i1 @test18(i32 %A) {
 define i1 @test19(i32 %A) {
         %B = icmp eq i32 %A, 50
         %C = icmp eq i32 %A, 51
-        ;; (A-50) < 2
+        ;; (A&-2) == 50
         %D = or i1 %B, %C
         ret i1 %D
 ; CHECK: @test19
-; CHECK: add i32
-; CHECK: icmp ult 
+; CHECK: and i32
+; CHECK: icmp eq 
 ; CHECK: ret i1
 }
 
diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll
index 16c0c67970db..7139972fe043 100644
--- a/test/Transforms/InstCombine/strto-1.ll
+++ b/test/Transforms/InstCombine/strto-1.ll
@@ -1,29 +1,29 @@
 ; Test that the strto* library call simplifiers works correctly.
 ;
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -functionattrs -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 declare i64 @strtol(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtol(i8*, i8**, i32)
+; CHECK: declare i64 @strtol(i8*, i8** nocapture, i32)
 
 declare double @strtod(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare double @strtod(i8*, i8**, i32)
+; CHECK: declare double @strtod(i8*, i8** nocapture, i32)
 
 declare float @strtof(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare float @strtof(i8*, i8**, i32)
+; CHECK: declare float @strtof(i8*, i8** nocapture, i32)
 
 declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoul(i8*, i8**, i32)
+; CHECK: declare i64 @strtoul(i8*, i8** nocapture, i32)
 
 declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoll(i8*, i8**, i32)
+; CHECK: declare i64 @strtoll(i8*, i8** nocapture, i32)
 
 declare double @strtold(i8* %s, i8** %endptr)
-; CHECK: declare double @strtold(i8*, i8**)
+; CHECK: declare double @strtold(i8*, i8** nocapture)
 
 declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoull(i8*, i8**, i32)
+; CHECK: declare i64 @strtoull(i8*, i8** nocapture, i32)
 
 define void @test_simplify1(i8* %x, i8** %endptr) {
 ; CHECK: @test_simplify1
diff --git a/test/Transforms/InstCombine/vec_extract_2elts.ll b/test/Transforms/InstCombine/vec_extract_2elts.ll
new file mode 100644
index 000000000000..5972340d60a9
--- /dev/null
+++ b/test/Transforms/InstCombine/vec_extract_2elts.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test(<4 x i32> %v, i64 *%r1, i64 *%r2) {
+;CHECK: %1 = extractelement <4 x i32> %v, i32 0
+;CHECK: %2 = zext i32 %1 to i64
+        %1 = zext <4 x i32> %v to <4 x i64>
+        %2 = extractelement <4 x i64> %1, i32 0
+        store i64 %2, i64 *%r1
+        store i64 %2, i64 *%r2
+        ret void
+}
+
diff --git a/test/Transforms/InstCombine/vec_phi_extract.ll b/test/Transforms/InstCombine/vec_phi_extract.ll
new file mode 100644
index 000000000000..2f10fc2c1ed2
--- /dev/null
+++ b/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @f(i64 %val, i32  %limit, i32 *%ptr) {
+;CHECK: %0 = trunc i64
+;CHECK: %1 = phi i32
+entry:
+  %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
+  %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
+  %0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+  %1 = trunc <16 x i64> %0 to <16 x i32>
+  br label %loop
+
+loop:
+  %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+  %elt = extractelement <16 x i32> %2, i32 0
+  %end = icmp ult i32 %elt, %limit
+  %3 = add i32 10, %elt
+  %4 = sext i32 %elt to i64
+  %5 = getelementptr i32* %ptr, i64 %4
+  store i32 %3, i32* %5
+  %inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  br i1 %end, label %loop, label %ret
+
+ret:
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 14f532195d7c..37d4d56e9131 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -196,3 +196,30 @@ define <4 x i16> @test13e(<4 x i16> %lhs, <4 x i16> %rhs) {
            <4 x i16> %lhs, <4 x i16> %rhs
   ret <4 x i16> %A
 }
+
+; Check that sequences of insert/extract element are
+; collapsed into shuffle instruction with correct shuffle indexes.
+
+define <4 x float> @test14a(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK: @test14a
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 0, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp4
+        %tmp1 = extractelement <4 x float> %LHS, i32 0
+        %tmp2 = insertelement <4 x float> %RHS, float %tmp1, i32 1
+        %tmp3 = extractelement <4 x float> %RHS, i32 2
+        %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 3
+        ret <4 x float> %tmp4
+}
+
+define <4 x float> @test14b(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK: @test14b
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp5
+        %tmp0 = extractelement <4 x float> %LHS, i32 3
+        %tmp1 = insertelement <4 x float> %RHS, float %tmp0, i32 0
+        %tmp2 = extractelement <4 x float> %tmp1, i32 0
+        %tmp3 = insertelement <4 x float> %RHS, float %tmp2, i32 1
+        %tmp4 = extractelement <4 x float> %RHS, i32 2
+        %tmp5 = insertelement <4 x float> %tmp3, float %tmp4, i32 3
+        ret <4 x float> %tmp5
+}
diff --git a/test/Transforms/InstCombine/vector-type.ll b/test/Transforms/InstCombine/vector-type.ll
new file mode 100644
index 000000000000..59a4bdd19e70
--- /dev/null
+++ b/test/Transforms/InstCombine/vector-type.ll
@@ -0,0 +1,15 @@
+; The code in InstCombiner::FoldSelectOpOp was calling
+; Type::getVectorNumElements without checking first if the type was a vector.
+
+; RUN: opt < %s -instcombine -S
+
+define i32 @vselect1(i32 %a.coerce, i32 %b.coerce, i32 %c.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %2 = bitcast i32 %c.coerce to <2 x i16>
+  %cmp = icmp sge <2 x i16> %2, zeroinitializer
+  %or = select <2 x i1> %cmp, <2 x i16> %0, <2 x i16> %1
+  %3 = bitcast <2 x i16> %or to i32
+  ret i32 %3
+}
diff --git a/test/Transforms/InstSimplify/2013-04-19-ConstantFoldingCrash.ll b/test/Transforms/InstSimplify/2013-04-19-ConstantFoldingCrash.ll
new file mode 100644
index 000000000000..164751784a65
--- /dev/null
+++ b/test/Transforms/InstSimplify/2013-04-19-ConstantFoldingCrash.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instsimplify
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; PR15791
+define <2 x i64> @test1() {
+  %a = and <2 x i64> undef, bitcast (<4 x i32> <i32 undef, i32 undef, i32 undef, i32 2147483647> to <2 x i64>)
+  ret <2 x i64> %a
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 0ecfb1f8d232..b764c761cfb2 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -682,3 +682,14 @@ define zeroext i1 @external_compare(i32* noalias %x) {
   ; CHECK: external_compare
   ; CHECK: ret i1 %cmp
 }
+
+define i1 @alloca_gep(i64 %a, i64 %b) {
+; CHECK: @alloca_gep
+; We can prove this GEP is non-null because it is inbounds and the pointer
+; is non-null.
+  %strs = alloca [1000 x [1001 x i8]], align 16
+  %x = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %a, i64 %b
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index f9c364cade36..91ce26324b81 100644
--- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -14,7 +14,7 @@ define float @fsub_0_0_x(float %a) {
 ; CHECK: @fsub_x_0
 define float @fsub_x_0(float %a) {
   %ret = fsub float %a, 0.0
-; CHECK ret float %a
+; CHECK: ret float %a
   ret float %ret
 }
 
@@ -22,7 +22,7 @@ define float @fsub_x_0(float %a) {
 ; CHECK: @fadd_x_n0
 define float @fadd_x_n0(float %a) {
   %ret = fadd float %a, -0.0
-; CHECK ret float %a
+; CHECK: ret float %a
   ret float %ret
 }
 
diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
new file mode 100644
index 000000000000..6c924409af37
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
@@ -0,0 +1,28 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize -dce -instcombine -S < %s | FileCheck %s
+
+@B = common global [1024 x i32] zeroinitializer, align 16
+@A = common global [1024 x i32] zeroinitializer, align 16
+
+; We use to not vectorize this loop because the shift was deemed to expensive.
+; Now that we differentiate shift cost base on the operand value kind, we will
+; vectorize this loop.
+; CHECK: ashr <4 x i32>
+define void @f() {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %shl = ashr i32 %0, 3
+  %arrayidx2 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  store i32 %shl, i32* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 23d923354460..760d28deaf27 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -33,11 +33,10 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
-  %2 = add nsw i64 %indvars.iv, 3
-  %3 = trunc i64 %2 to i32
-  %4 = sitofp i32 %3 to float
-  %5 = getelementptr inbounds float* %B, i64 %indvars.iv
-  store float %4, float* %5, align 4
+  %add = add nsw i64 %indvars.iv, 3
+  %tofp = sitofp i64 %add to float
+  %gep = getelementptr inbounds float* %B, i64 %indvars.iv
+  store float %tofp, float* %gep, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
new file mode 100644
index 000000000000..90c45bbc6696
--- /dev/null
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -0,0 +1,399 @@
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+@A = common global [1024 x i32] zeroinitializer, align 16
+
+; Signed tests.
+
+; Turn this into a max reduction.
+; CHECK: @max_red
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @max_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp sgt i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a max reduction. The select has its inputs reversed therefore
+; this is a max reduction.
+; CHECK: @max_red_inverse_select
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @max_red_inverse_select(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp slt i32 %max.red.08, %0
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a min reduction.
+; CHECK: @min_red
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp slt i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a min reduction. The select has its inputs reversed therefore
+; this is a min reduction.
+; CHECK: @min_red_inverse_select
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @min_red_inverse_select(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp sgt i32 %max.red.08, %0
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Unsigned tests.
+
+; Turn this into a max reduction.
+; CHECK: @umax_red
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @umax_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ugt i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a max reduction. The select has its inputs reversed therefore
+; this is a max reduction.
+; CHECK: @umax_red_inverse_select
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @umax_red_inverse_select(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ult i32 %max.red.08, %0
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a min reduction.
+; CHECK: @umin_red
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @umin_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ult i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a min reduction. The select has its inputs reversed therefore
+; this is a min reduction.
+; CHECK: @umin_red_inverse_select
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @umin_red_inverse_select(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ugt i32 %max.red.08, %0
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; SGE -> SLT
+; Turn this into a min reduction (select inputs are reversed).
+; CHECK: @sge_min_red
+; CHECK: icmp sge <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @sge_min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp sge i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; SLE -> SGT
+; Turn this into a max reduction (select inputs are reversed).
+; CHECK: @sle_min_red
+; CHECK: icmp sle <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @sle_min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp sle i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; UGE -> ULT
+; Turn this into a min reduction (select inputs are reversed).
+; CHECK: @uge_min_red
+; CHECK: icmp uge <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @uge_min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp uge i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; ULE -> UGT
+; Turn this into a max reduction (select inputs are reversed).
+; CHECK: @ule_min_red
+; CHECK: icmp ule <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @ule_min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ule i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; No reduction.
+; CHECK: @no_red_1
+; CHECK-NOT: icmp <2 x i32>
+define i32 @no_red_1(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %1 = load i32* %arrayidx1, align 4
+  %cmp3 = icmp sgt i32 %0, %1
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; CHECK: @no_red_2
+; CHECK-NOT: icmp <2 x i32>
+define i32 @no_red_2(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %1 = load i32* %arrayidx1, align 4
+  %cmp3 = icmp sgt i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
new file mode 100644
index 000000000000..cdfb3fd66f05
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
@@ -0,0 +1,24 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+@a = common global [128 x i32] zeroinitializer, align 16
+
+;; Must not vectorize division reduction. Division is lossy.
+define i32 @g() {
+entry:
+  br label %for.body
+
+for.body:
+  ; CHECK: @g
+  ; CHECK-NOT: sdiv <2 x i32>
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.05 = phi i32 [ 80, %entry ], [ %div, %for.body ]
+  %arrayidx = getelementptr inbounds [128 x i32]* @a, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %div = sdiv i32 %r.05, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %div
+}
diff --git a/test/Transforms/LoopVectorize/phi-hang.ll b/test/Transforms/LoopVectorize/phi-hang.ll
index b80d45995dc3..bbce239afa71 100644
--- a/test/Transforms/LoopVectorize/phi-hang.ll
+++ b/test/Transforms/LoopVectorize/phi-hang.ll
@@ -27,3 +27,21 @@ bb5:                                              ; preds = %bb4, %bb1
 bb11:                                             ; preds = %bb5
   ret void
 }
+
+; PR15748
+define void @test2() {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb1 ]
+  %tmp2 = phi i32 [ 0, %bb ], [ 1, %bb1 ]
+  %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb1 ]
+  %tmp4 = or i32 %tmp2, %tmp3
+  %tmp5 = add nsw i32 %tmp, 1
+  %tmp6 = icmp eq i32 %tmp5, 0
+  br i1 %tmp6, label %bb7, label %bb1
+
+bb7:                                              ; preds = %bb1
+  ret void
+}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 8abd851eb04a..c0eaaa40154b 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -35,7 +35,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index f60153229cf8..f6119f8bbd85 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -31,7 +31,7 @@ return:                                           ; preds = %entry
 }
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
@@ -40,7 +40,7 @@ return:                                           ; preds = %entry
 !7 = metadata !{i32 8, i32 0, metadata !1, null}
 !8 = metadata !{i32 9, i32 0, metadata !1, null}
 !9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 786478, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{null, metadata !6, metadata !13, metadata !14}
 !13 = metadata !{i32 786468, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/Transforms/MergeFunc/crash.ll b/test/Transforms/MergeFunc/crash.ll
new file mode 100644
index 000000000000..0897ba289337
--- /dev/null
+++ b/test/Transforms/MergeFunc/crash.ll
@@ -0,0 +1,46 @@
+; RUN: opt -mergefunc -disable-output < %s
+; PR15185
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+@g2 = external unnamed_addr constant [9 x i8], align 1
+@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
+
+define internal hidden i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
+  br label %1
+
+; <label>:1
+  br label %2
+
+; <label>:2
+  ret i32 undef
+}
+
+define internal hidden i32 @func10(%.qux.2496* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2496* %this, i32 0, i32 1, i32 1
+  %2 = load i32* %1, align 4
+  ret i32 %2
+}
+
+define internal hidden i8* @func29(i32* nocapture %this) align 2 {
+  ret i8* getelementptr inbounds ([9 x i8]* @g2, i32 0, i32 0)
+}
+
+define internal hidden i32* @func33(%.qux.2585* nocapture %this) align 2 {
+  ret i32* undef
+}
+
+define internal hidden i32* @func34(%.qux.2585* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2585* %this, i32 0
+  ret i32* undef
+}
+
+define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2585* %this, i32 0, i32 2
+  %2 = load i8** %1, align 4
+  ret i8* %2
+}
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
new file mode 100644
index 000000000000..93250fa8ed1a
--- /dev/null
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -0,0 +1,55 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; PR15185
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+@g2 = external unnamed_addr constant [9 x i8], align 1
+@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
+
+define internal hidden i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 undef
+}
+
+define internal hidden i32 @func10(%.qux.2496* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2496* %this, i32 0, i32 1, i32 1
+  %tmp1 = load i32* %tmp, align 4
+  ret i32 %tmp1
+}
+
+define internal hidden i8* @func29(i32* nocapture %this) align 2 {
+bb:
+  ret i8* getelementptr inbounds ([9 x i8]* @g2, i32 0, i32 0)
+}
+
+define internal hidden i32* @func33(%.qux.2585* nocapture %this) align 2 {
+bb:
+  ret i32* undef
+}
+
+define internal hidden i32* @func34(%.qux.2585* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2585* %this, i32 0
+  ret i32* undef
+}
+
+define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
+bb:
+; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
+; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
+  %tmp = getelementptr inbounds %.qux.2585* %this, i32 0, i32 2
+  %tmp1 = load i8** %tmp, align 4
+  ret i8* %tmp1
+}
diff --git a/test/Transforms/ObjCARC/arc-annotations.ll b/test/Transforms/ObjCARC/arc-annotations.ll
new file mode 100644
index 000000000000..c0dea4b1b6a0
--- /dev/null
+++ b/test/Transforms/ObjCARC/arc-annotations.ll
@@ -0,0 +1,83 @@
+; This file consists of various tests which ensure that the objc-arc-annotations
+; are working correctly. In the future, I will use this in other lit tests to
+; check the data flow analysis of ARC.
+
+; REQUIRES: asserts
+; RUN: opt -S -objc-arc -enable-objc-arc-annotations < %s | FileCheck %s
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare void @objc_autoreleasePoolPop(i8*)
+declare i8* @objc_autoreleasePoolPush()
+declare i8* @objc_retainBlock(i8*)
+
+declare i8* @objc_retainedObject(i8*)
+declare i8* @objc_unretainedObject(i8*)
+declare i8* @objc_unretainedPointer(i8*)
+
+declare void @use_pointer(i8*)
+declare void @callee()
+declare void @callee_fnptr(void ()*)
+declare void @invokee()
+declare i8* @returner()
+
+; Simple retain+release pair deletion, with some intervening control
+; flow and harmless instructions.
+
+; CHECK: define void @test0(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup ![[ANN0:[0-9]+]], !llvm.arc.annotation.topdown ![[ANN1:[0-9]+]]
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: t:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup ![[ANN2:[0-9]+]]
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: f:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store i32 7, i32* %x, !llvm.arc.annotation.bottomup ![[ANN2]]
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup ![[ANN3:[0-9]+]], !llvm.arc.annotation.topdown ![[ANN4:[0-9]+]]
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test0(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+!0 = metadata !{}
+
+; CHECK: ![[ANN0]] = metadata !{metadata !"(test0,%x)", metadata !"S_Use", metadata !"S_None"}
+; CHECK: ![[ANN1]] = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: ![[ANN2]] = metadata !{metadata !"(test0,%x)", metadata !"S_Release", metadata !"S_Use"}
+; CHECK: ![[ANN3]] = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: ![[ANN4]] = metadata !{metadata !"(test0,%x)", metadata !"S_Retain", metadata !"S_None"}
+
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 4c24ebf8652b..670e697e5220 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -20,6 +20,7 @@ declare void @callee()
 declare void @callee_fnptr(void ()*)
 declare void @invokee()
 declare i8* @returner()
+declare void @bar(i32 ()*)
 
 declare void @llvm.dbg.value(metadata, i64, metadata)
 
@@ -28,10 +29,11 @@ declare i8* @objc_msgSend(i8*, i8*, ...)
 ; Simple retain+release pair deletion, with some intervening control
 ; flow and harmless instructions.
 
-; CHECK: define void @test0(
-; CHECK-NOT: @objc_
+; CHECK: define void @test0_precise(
+; CHECK: @objc_retain
+; CHECK: @objc_release
 ; CHECK: }
-define void @test0(i32* %x, i1 %p) nounwind {
+define void @test0_precise(i32* %x, i1 %p) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -53,16 +55,41 @@ return:
   ret void
 }
 
+; CHECK: define void @test0_imprecise(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test0_imprecise(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test0 but the release isn't always executed when the retain is,
 ; so the optimization is not safe.
 
 ; TODO: Make the objc_release's argument be %0.
 
-; CHECK: define void @test1(
+; CHECK: define void @test1_precise(
 ; CHECK: @objc_retain(i8* %a)
 ; CHECK: @objc_release
 ; CHECK: }
-define void @test1(i32* %x, i1 %p, i1 %q) nounwind {
+define void @test1_precise(i32* %x, i1 %p, i1 %q) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -88,9 +115,69 @@ alt_return:
   ret void
 }
 
+; CHECK: define void @test1_imprecise(
+; CHECK: @objc_retain(i8* %a)
+; CHECK: @objc_release
+; CHECK: }
+define void @test1_imprecise(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  br i1 %q, label %return, label %alt_return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+
+alt_return:
+  ret void
+}
+
+
 ; Don't do partial elimination into two different CFG diamonds.
 
-; CHECK: define void @test1b(
+; CHECK: define void @test1b_precise(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
+; CHECK-NOT: @objc_
+; CHECK: if.end5:
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]]
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test1b_precise(i8* %x, i1 %p, i1 %q) {
+entry:
+  tail call i8* @objc_retain(i8* %x) nounwind
+  br i1 %p, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @callee()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br i1 %q, label %if.then3, label %if.end5
+
+if.then3:                                         ; preds = %if.end
+  tail call void @use_pointer(i8* %x)
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then3, %if.end
+  tail call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; CHECK: define void @test1b_imprecise(
 ; CHECK: entry:
 ; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
@@ -98,7 +185,7 @@ alt_return:
 ; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test1b(i8* %x, i1 %p, i1 %q) {
+define void @test1b_imprecise(i8* %x, i1 %p, i1 %q) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   br i1 %p, label %if.then, label %if.end
@@ -119,14 +206,15 @@ if.end5:                                          ; preds = %if.then3, %if.end
   ret void
 }
 
+
 ; Like test0 but the pointer is passed to an intervening call,
 ; so the optimization is not safe.
 
-; CHECK: define void @test2(
+; CHECK: define void @test2_precise(
 ; CHECK: @objc_retain(i8* %a)
 ; CHECK: @objc_release
 ; CHECK: }
-define void @test2(i32* %x, i1 %p) nounwind {
+define void @test2_precise(i32* %x, i1 %p) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -151,16 +239,45 @@ return:
   ret void
 }
 
+; CHECK: define void @test2_imprecise(
+; CHECK: @objc_retain(i8* %a)
+; CHECK: @objc_release
+; CHECK: }
+define void @test2_imprecise(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @use_pointer(i8* %0)
+  %d = bitcast i32* %x to float*
+  store float 3.0, float* %d
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test0 but the release is in a loop,
 ; so the optimization is not safe.
 
 ; TODO: For now, assume this can't happen.
 
-; CHECK: define void @test3(
+; CHECK: define void @test3_precise(
 ; TODO: @objc_retain(i8* %a)
 ; TODO: @objc_release
 ; CHECK: }
-define void @test3(i32* %x, i1* %q) nounwind {
+define void @test3_precise(i32* %x, i1* %q) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -176,16 +293,37 @@ return:
   ret void
 }
 
+; CHECK: define void @test3_imprecise(
+; TODO: @objc_retain(i8* %a)
+; TODO: @objc_release
+; CHECK: }
+define void @test3_imprecise(i32* %x, i1* %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br label %loop
+
+loop:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  %j = load volatile i1* %q
+  br i1 %j, label %loop, label %return
+
+return:
+  ret void
+}
+
+
 ; TODO: For now, assume this can't happen.
 
 ; Like test0 but the retain is in a loop,
 ; so the optimization is not safe.
 
-; CHECK: define void @test4(
+; CHECK: define void @test4_precise(
 ; TODO: @objc_retain(i8* %a)
 ; TODO: @objc_release
 ; CHECK: }
-define void @test4(i32* %x, i1* %q) nounwind {
+define void @test4_precise(i32* %x, i1* %q) nounwind {
 entry:
   br label %loop
 
@@ -201,14 +339,35 @@ return:
   ret void
 }
 
+; CHECK: define void @test4_imprecise(
+; TODO: @objc_retain(i8* %a)
+; TODO: @objc_release
+; CHECK: }
+define void @test4_imprecise(i32* %x, i1* %q) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  %j = load volatile i1* %q
+  br i1 %j, label %loop, label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+
 ; Like test0 but the pointer is conditionally passed to an intervening call,
 ; so the optimization is not safe.
 
-; CHECK: define void @test5(
+; CHECK: define void @test5a(
 ; CHECK: @objc_retain(i8*
 ; CHECK: @objc_release
 ; CHECK: }
-define void @test5(i32* %x, i1 %q, i8* %y) nounwind {
+define void @test5a(i32* %x, i1 %q, i8* %y) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -220,13 +379,98 @@ entry:
   ret void
 }
 
+; CHECK: define void @test5b(
+; CHECK: @objc_retain(i8*
+; CHECK: @objc_release
+; CHECK: }
+define void @test5b(i32* %x, i1 %q, i8* %y) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  %s = select i1 %q, i8* %y, i8* %0
+  call void @use_pointer(i8* %s)
+  store i32 7, i32* %x
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+
 ; retain+release pair deletion, where the release happens on two different
 ; flow paths.
 
-; CHECK: define void @test6(
+; CHECK: define void @test6a(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(
+; CHECK: t:
+; CHECK:   call void @objc_release(
+; CHECK: f:
+; CHECK:   call void @objc_release(
+; CHECK: return:
+; CHECK: }
+define void @test6a(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  %ct = bitcast i32* %x to i8*
+  call void @objc_release(i8* %ct) nounwind
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  %cf = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cf) nounwind
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test6b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test6(i32* %x, i1 %p) nounwind {
+define void @test6b(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  %ct = bitcast i32* %x to i8*
+  call void @objc_release(i8* %ct) nounwind, !clang.imprecise_release !0
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  %cf = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cf) nounwind, !clang.imprecise_release !0
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test6c(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(
+; CHECK: t:
+; CHECK:   call void @objc_release(
+; CHECK: f:
+; CHECK:   call void @objc_release(
+; CHECK: return:
+; CHECK: }
+define void @test6c(i32* %x, i1 %p) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -240,6 +484,40 @@ t:
   call void @objc_release(i8* %ct) nounwind
   br label %return
 
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  %cf = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cf) nounwind, !clang.imprecise_release !0
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test6d(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(
+; CHECK: t:
+; CHECK:   call void @objc_release(
+; CHECK: f:
+; CHECK:   call void @objc_release(
+; CHECK: return:
+; CHECK: }
+define void @test6d(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  %ct = bitcast i32* %x to i8*
+  call void @objc_release(i8* %ct) nounwind, !clang.imprecise_release !0
+  br label %return
+
 f:
   store i32 7, i32* %x
   call void @callee()
@@ -251,11 +529,19 @@ return:
   ret void
 }
 
+
 ; retain+release pair deletion, where the retain happens on two different
 ; flow paths.
 
-; CHECK: define void @test7(
-; CHECK-NOT: @objc_
+; CHECK:     define void @test7(
+; CHECK:     entry:
+; CHECK-NOT:   objc_
+; CHECK:     t:
+; CHECK:       call i8* @objc_retain
+; CHECK:     f:
+; CHECK:       call i8* @objc_retain
+; CHECK:     return:
+; CHECK:       call void @objc_release
 ; CHECK: }
 define void @test7(i32* %x, i1 %p) nounwind {
 entry:
@@ -281,17 +567,44 @@ return:
   ret void
 }
 
+; CHECK: define void @test7b(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test7b(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  call void @callee()
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test7, but there's a retain/retainBlock mismatch. Don't delete!
 
-; CHECK: define void @test7b
+; CHECK: define void @test7c
 ; CHECK: t:
-; CHECK: call i8* @objc_retainBlock
+; CHECK:   call i8* @objc_retainBlock
 ; CHECK: f:
-; CHECK: call i8* @objc_retain
+; CHECK:   call i8* @objc_retain
 ; CHECK: return:
-; CHECK: call void @objc_release
+; CHECK:   call void @objc_release
 ; CHECK: }
-define void @test7b(i32* %x, i1 %p) nounwind {
+define void @test7c(i32* %x, i1 %p) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   br i1 %p, label %t, label %f
@@ -318,10 +631,106 @@ return:
 ; retain+release pair deletion, where the retain and release both happen on
 ; different flow paths. Wild!
 
-; CHECK: define void @test8(
+; CHECK: define void @test8a(
+; CHECK: entry:
+; CHECK: t:
+; CHECK:   @objc_retain
+; CHECK: f:
+; CHECK:   @objc_retain
+; CHECK: mid:
+; CHECK: u:
+; CHECK:   @objc_release
+; CHECK: g:
+; CHECK:   @objc_release
+; CHECK: return:
+; CHECK: }
+define void @test8a(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %mid
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  br label %mid
+
+mid:
+  br i1 %q, label %u, label %g
+
+u:
+  call void @callee()
+  %cu = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cu) nounwind
+  br label %return
+
+g:
+  %cg = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cg) nounwind
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test8b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test8(i32* %x, i1 %p, i1 %q) nounwind {
+define void @test8b(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %mid
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  br label %mid
+
+mid:
+  br i1 %q, label %u, label %g
+
+u:
+  call void @callee()
+  %cu = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cu) nounwind, !clang.imprecise_release !0
+  br label %return
+
+g:
+  %cg = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cg) nounwind, !clang.imprecise_release !0
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test8c(
+; CHECK: entry:
+; CHECK: t:
+; CHECK:   @objc_retain
+; CHECK: f:
+; CHECK:   @objc_retain
+; CHECK: mid:
+; CHECK: u:
+; CHECK:   @objc_release
+; CHECK: g:
+; CHECK:   @objc_release
+; CHECK: return:
+; CHECK: }
+define void @test8c(i32* %x, i1 %p, i1 %q) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   br i1 %p, label %t, label %f
@@ -347,6 +756,54 @@ u:
   call void @objc_release(i8* %cu) nounwind
   br label %return
 
+g:
+  %cg = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cg) nounwind, !clang.imprecise_release !0
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test8d(
+; CHECK: entry:
+; CHECK: t:
+; CHECK:   @objc_retain
+; CHECK: f:
+; CHECK:   @objc_retain
+; CHECK: mid:
+; CHECK: u:
+; CHECK:   @objc_release
+; CHECK: g:
+; CHECK:   @objc_release
+; CHECK: return:
+; CHECK: }
+define void @test8d(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %mid
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  br label %mid
+
+mid:
+  br i1 %q, label %u, label %g
+
+u:
+  call void @callee()
+  %cu = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cu) nounwind, !clang.imprecise_release !0
+  br label %return
+
 g:
   %cg = bitcast i32* %x to i8*
   call void @objc_release(i8* %cg) nounwind
@@ -428,11 +885,13 @@ entry:
   ret void
 }
 
-; Same as test11 but the value is returned. Do an RV optimization.
+; Same as test11 but the value is returned. Do not perform an RV optimization
+; since if the frontend emitted code for an __autoreleasing variable, we may
+; want it to be in the autorelease pool.
 
 ; CHECK: define i8* @test11b(
 ; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %0) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define i8* @test11b(i8* %x) nounwind {
 entry:
@@ -552,53 +1011,170 @@ entry:
   call void @use_pointer(i8* %x)
   call void @use_pointer(i8* %x)
   call void @objc_release(i8* %x) nounwind
-  call void @objc_release(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Trivial retain,autorelease pair with intervening call, but it's post-dominated
+; by another release. Don't delete anything.
+
+; CHECK: define void @test15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_retain(i8* %x)
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: @objc_autorelease(i8* %x)
+; CHECK-NEXT: @objc_release
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test15(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call i8* @objc_autorelease(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Trivial retain,autorelease pair, post-dominated
+; by another release. Delete the retain and release.
+
+; CHECK: define void @test15b
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_retain
+; CHECK-NEXT: @objc_autorelease
+; CHECK-NEXT: @objc_release
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test15b(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; CHECK: define void @test15c
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_autorelease
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test15c(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Retain+release pairs in diamonds, all dominated by a retain.
+
+; CHECK: define void @test16a(
+; CHECK: @objc_retain(i8* %x)
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test16a(i1 %a, i1 %b, i8* %x) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  br i1 %a, label %red, label %orange
+
+red:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+orange:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+yellow:
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  br i1 %b, label %green, label %blue
+
+green:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+blue:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+purple:
+  ret void
+}
+
+; CHECK: define void @test16b(
+; CHECK: @objc_retain(i8* %x)
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test16b(i1 %a, i1 %b, i8* %x) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  br i1 %a, label %red, label %orange
+
+red:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+orange:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+yellow:
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  br i1 %b, label %green, label %blue
+
+green:
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  br label %purple
+
+blue:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+purple:
   ret void
 }
 
-; Trivial retain,autorelease pair with intervening call, but it's post-dominated
-; by another release. Don't delete anything.
-
-; CHECK: define void @test15(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: @objc_retain(i8* %x)
-; CHECK-NEXT: @use_pointer
-; CHECK-NEXT: @objc_autorelease(i8* %x)
-; CHECK-NEXT: @objc_release
-; CHECK-NEXT: ret void
-; CHECK-NEXT: }
-define void @test15(i8* %x, i64 %n) {
+; CHECK: define void @test16c(
+; CHECK: @objc_retain(i8* %x)
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test16c(i1 %a, i1 %b, i8* %x) {
 entry:
   call i8* @objc_retain(i8* %x) nounwind
-  call void @use_pointer(i8* %x)
-  call i8* @objc_autorelease(i8* %x) nounwind
-  call void @objc_release(i8* %x) nounwind
-  ret void
-}
+  br i1 %a, label %red, label %orange
 
-; Trivial retain,autorelease pair, post-dominated
-; by another release. Delete the retain and release.
+red:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
 
-; CHECK: define void @test15b
-; CHECK-NEXT: entry:
-; CHECK-NEXT: @objc_autorelease
-; CHECK-NEXT: ret void
-; CHECK-NEXT: }
-define void @test15b(i8* %x, i64 %n) {
-entry:
+orange:
   call i8* @objc_retain(i8* %x) nounwind
-  call i8* @objc_autorelease(i8* %x) nounwind
-  call void @objc_release(i8* %x) nounwind
+  br label %yellow
+
+yellow:
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  br i1 %b, label %green, label %blue
+
+green:
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  br label %purple
+
+blue:
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  br label %purple
+
+purple:
   ret void
 }
 
-; Retain+release pairs in diamonds, all dominated by a retain.
-
-; CHECK: define void @test16(
+; CHECK: define void @test16d(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK-NOT: @objc
 ; CHECK: }
-define void @test16(i1 %a, i1 %b, i8* %x) {
+define void @test16d(i1 %a, i1 %b, i8* %x) {
 entry:
   call i8* @objc_retain(i8* %x) nounwind
   br i1 %a, label %red, label %orange
@@ -621,13 +1197,14 @@ green:
   br label %purple
 
 blue:
-  call void @objc_release(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
   br label %purple
 
 purple:
   ret void
 }
 
+
 ; Retain+release pairs in diamonds, all post-dominated by a release.
 
 ; CHECK: define void @test17(
@@ -795,10 +1372,10 @@ entry:
   ret void
 }
 
-; Don't optimize objc_retainBlock.
+; Don't optimize objc_retainBlock, but do strength reduce it.
 
 ; CHECK: define void @test23b
-; CHECK: @objc_retainBlock
+; CHECK: @objc_retain
 ; CHECK: @objc_release
 ; CHECK: }
 define void @test23b(i8* %p) {
@@ -1161,12 +1738,16 @@ done:
   ret void
 }
 
-; Delete retain,release if there's just a possible dec.
+; Delete retain,release if there's just a possible dec and we have imprecise
+; releases.
 
-; CHECK: define void @test34(
-; CHECK-NOT: @objc_
+; CHECK: define void @test34a(
+; CHECK:   call i8* @objc_retain
+; CHECK: true:
+; CHECK: done:
+; CHECK: call void @objc_release
 ; CHECK: }
-define void @test34(i8* %p, i1 %x, i8* %y) {
+define void @test34a(i8* %p, i1 %x, i8* %y) {
 entry:
   %f0 = call i8* @objc_retain(i8* %p)
   br i1 %x, label %true, label %done
@@ -1182,12 +1763,38 @@ done:
   ret void
 }
 
-; Delete retain,release if there's just a use.
-
-; CHECK: define void @test35(
+; CHECK: define void @test34b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test35(i8* %p, i1 %x, i8* %y) {
+define void @test34b(i8* %p, i1 %x, i8* %y) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  br label %done
+
+done:
+  %g = bitcast i8* %p to i8*
+  %h = getelementptr i8* %g, i64 0
+  call void @objc_release(i8* %g), !clang.imprecise_release !0
+  ret void
+}
+
+
+; Delete retain,release if there's just a use and we do not have a precise
+; release.
+
+; Precise.
+; CHECK: define void @test35a(
+; CHECK: entry:
+; CHECK:   call i8* @objc_retain
+; CHECK: true:
+; CHECK: done:
+; CHECK:   call void @objc_release
+; CHECK: }
+define void @test35a(i8* %p, i1 %x, i8* %y) {
 entry:
   %f0 = call i8* @objc_retain(i8* %p)
   br i1 %x, label %true, label %done
@@ -1203,16 +1810,36 @@ done:
   ret void
 }
 
-; Delete a retain,release if there's no actual use.
-
-; CHECK: define void @test36(
+; Imprecise.
+; CHECK: define void @test35b(
 ; CHECK-NOT: @objc_
+; CHECK: }
+define void @test35b(i8* %p, i1 %x, i8* %y) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  %v = icmp eq i8* %p, %y
+  br label %done
+
+done:
+  %g = bitcast i8* %p to i8*
+  %h = getelementptr i8* %g, i64 0
+  call void @objc_release(i8* %g), !clang.imprecise_release !0
+  ret void
+}
+
+; Delete a retain,release if there's no actual use and we have precise release.
+
+; CHECK: define void @test36a(
+; CHECK: @objc_retain
 ; CHECK: call void @callee()
 ; CHECK-NOT: @objc_
 ; CHECK: call void @callee()
-; CHECK-NOT: @objc_
+; CHECK: @objc_release
 ; CHECK: }
-define void @test36(i8* %p) {
+define void @test36a(i8* %p) {
 entry:
   call i8* @objc_retain(i8* %p)
   call void @callee()
@@ -1223,10 +1850,10 @@ entry:
 
 ; Like test36, but with metadata.
 
-; CHECK: define void @test37(
+; CHECK: define void @test36b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test37(i8* %p) {
+define void @test36b(i8* %p) {
 entry:
   call i8* @objc_retain(i8* %p)
   call void @callee()
@@ -1497,27 +2124,45 @@ define i8* @test49(i8* %p) nounwind {
   ret i8* %x
 }
 
-; Do delete retain+release with intervening stores of the
-; address value.
+; Do delete retain+release with intervening stores of the address value if we
+; have imprecise release attached to objc_release.
+
+; CHECK:      define void @test50a(
+; CHECK-NEXT:   call i8* @objc_retain
+; CHECK-NEXT:   call void @callee
+; CHECK-NEXT:   store
+; CHECK-NEXT:   call void @objc_release
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test50a(i8* %p, i8** %pp) {
+  call i8* @objc_retain(i8* %p)
+  call void @callee()
+  store i8* %p, i8** %pp
+  call void @objc_release(i8* %p)
+  ret void
+}
 
-; CHECK: define void @test50(
+; CHECK: define void @test50b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test50(i8* %p, i8** %pp) {
+define void @test50b(i8* %p, i8** %pp) {
   call i8* @objc_retain(i8* %p)
   call void @callee()
   store i8* %p, i8** %pp
-  call void @objc_release(i8* %p)
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
   ret void
 }
 
+
 ; Don't delete retain+release with intervening stores through the
 ; address value.
 
-; CHECK: define void @test51(
+; CHECK: define void @test51a(
 ; CHECK: call i8* @objc_retain(i8* %p)
 ; CHECK: call void @objc_release(i8* %p)
-define void @test51(i8* %p) {
+; CHECK: ret void
+; CHECK: }
+define void @test51a(i8* %p) {
   call i8* @objc_retain(i8* %p)
   call void @callee()
   store i8 0, i8* %p
@@ -1525,15 +2170,30 @@ define void @test51(i8* %p) {
   ret void
 }
 
+; CHECK: define void @test51b(
+; CHECK: call i8* @objc_retain(i8* %p)
+; CHECK: call void @objc_release(i8* %p)
+; CHECK: ret void
+; CHECK: }
+define void @test51b(i8* %p) {
+  call i8* @objc_retain(i8* %p)
+  call void @callee()
+  store i8 0, i8* %p
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
+  ret void
+}
+
 ; Don't delete retain+release with intervening use of a pointer of
 ; unknown provenance.
 
-; CHECK: define void @test52(
+; CHECK: define void @test52a(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call void @callee()
 ; CHECK: call void @use_pointer(i8* %z)
 ; CHECK: call void @objc_release
-define void @test52(i8** %zz, i8** %pp) {
+; CHECK: ret void
+; CHECK: }
+define void @test52a(i8** %zz, i8** %pp) {
   %p = load i8** %pp
   %1 = call i8* @objc_retain(i8* %p)
   call void @callee()
@@ -1543,6 +2203,23 @@ define void @test52(i8** %zz, i8** %pp) {
   ret void
 }
 
+; CHECK: define void @test52b(
+; CHECK: call i8* @objc_retain
+; CHECK: call void @callee()
+; CHECK: call void @use_pointer(i8* %z)
+; CHECK: call void @objc_release
+; CHECK: ret void
+; CHECK: }
+define void @test52b(i8** %zz, i8** %pp) {
+  %p = load i8** %pp
+  %1 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  %z = load i8** %zz
+  call void @use_pointer(i8* %z)
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test52, but the pointer has function type, so it's assumed to
 ; be not reference counted.
 ; Oops. That's wrong. Clang sometimes uses function types gratuitously.
@@ -1695,19 +2372,78 @@ entry:
 @constptr = external constant i8*
 @something = external global i8*
 
-; CHECK: define void @test60(
-; CHECK-NOT: @objc_
+; We have a precise lifetime retain/release here. We can not remove them since
+; @something is not constant.
+
+; CHECK: define void @test60a(
+; CHECK: call i8* @objc_retain
+; CHECK: call void @objc_release
+; CHECK: }
+define void @test60a() {
+  %t = load i8** @constptr
+  %s = load i8** @something
+  call i8* @objc_retain(i8* %s)
+  call void @callee()
+  call void @use_pointer(i8* %t)
+  call void @objc_release(i8* %s)
+  ret void
+}
+
+; CHECK: define void @test60b(
+; CHECK: call i8* @objc_retain
+; CHECK-NOT: call i8* @objc_retain
+; CHECK-NOT: call i8* @objc_rrelease
 ; CHECK: }
-define void @test60() {
+define void @test60b() {
   %t = load i8** @constptr
   %s = load i8** @something
   call i8* @objc_retain(i8* %s)
+  call i8* @objc_retain(i8* %s)
   call void @callee()
   call void @use_pointer(i8* %t)
   call void @objc_release(i8* %s)
   ret void
 }
 
+; CHECK: define void @test60c(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test60c() {
+  %t = load i8** @constptr
+  %s = load i8** @something
+  call i8* @objc_retain(i8* %s)
+  call void @callee()
+  call void @use_pointer(i8* %t)
+  call void @objc_release(i8* %s), !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test60d(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test60d() {
+  %t = load i8** @constptr
+  %s = load i8** @something
+  call i8* @objc_retain(i8* %t)
+  call void @callee()
+  call void @use_pointer(i8* %s)
+  call void @objc_release(i8* %t)
+  ret void
+}
+
+; CHECK: define void @test60e(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test60e() {
+  %t = load i8** @constptr
+  %s = load i8** @something
+  call i8* @objc_retain(i8* %t)
+  call void @callee()
+  call void @use_pointer(i8* %s)
+  call void @objc_release(i8* %t), !clang.imprecise_release !0
+  ret void
+}
+
 ; Constant pointers to objects don't need to be considered related to other
 ; pointers.
 
@@ -1874,11 +2610,13 @@ return:                                           ; preds = %if.then, %entry
 ; An objc_retain can serve as a may-use for a different pointer.
 ; rdar://11931823
 
-; CHECK: define void @test66(
-; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK: define void @test66a(
+; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
+; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
 ; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
 ; CHECK: }
-define void @test66(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+define void @test66a(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
 entry:
   br i1 %tobool, label %cond.true, label %cond.end
 
@@ -1895,7 +2633,74 @@ cond.end:                                         ; preds = %cond.true, %entry
   ret void
 }
 
-declare void @bar(i32 ()*)
+; CHECK: define void @test66b(
+; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
+; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
+; CHECK: }
+define void @test66b(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+entry:
+  br i1 %tobool, label %cond.true, label %cond.end
+
+cond.true:
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i8* [ %tmp5, %cond.true ], [ %call, %entry ]
+  %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
+  tail call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+  %tmp8 = select i1 %tobool1, i8* %cond, i8* %bar
+  %tmp9 = tail call i8* @objc_retain(i8* %tmp8) nounwind
+  tail call void @objc_release(i8* %cond) nounwind
+  ret void
+}
+
+; CHECK: define void @test66c(
+; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
+; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
+; CHECK: }
+define void @test66c(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+entry:
+  br i1 %tobool, label %cond.true, label %cond.end
+
+cond.true:
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i8* [ %tmp5, %cond.true ], [ %call, %entry ]
+  %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
+  tail call void @objc_release(i8* %call) nounwind
+  %tmp8 = select i1 %tobool1, i8* %cond, i8* %bar
+  %tmp9 = tail call i8* @objc_retain(i8* %tmp8) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %cond) nounwind
+  ret void
+}
+
+; CHECK: define void @test66d(
+; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
+; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
+; CHECK: }
+define void @test66d(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+entry:
+  br i1 %tobool, label %cond.true, label %cond.end
+
+cond.true:
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i8* [ %tmp5, %cond.true ], [ %call, %entry ]
+  %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
+  tail call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+  %tmp8 = select i1 %tobool1, i8* %cond, i8* %bar
+  %tmp9 = tail call i8* @objc_retain(i8* %tmp8) nounwind
+  tail call void @objc_release(i8* %cond) nounwind, !clang.imprecise_release !0
+  ret void
+}
 
 ; A few real-world testcases.
 
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
index 899298b5967e..0156d5bfb464 100644
--- a/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -8,6 +8,7 @@ declare void @use_pointer(i8*)
 declare i8* @objc_retain(i8*)
 declare void @objc_release(i8*)
 declare void @callee()
+declare void @block_callee(void ()*)
 
 ; CHECK: define void @test0(
 ; CHECK:   call i8* @objc_retain(
@@ -394,6 +395,41 @@ exit:
   ret void
 }
 
+; Do not improperly pair retains in a for loop with releases outside of a for
+; loop when the proper pairing is disguised by a separate provenance represented
+; by an alloca.
+; rdar://12969722
+
+; CHECK: define void @test13(i8* %a) [[NUW]] {
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(i8* %a) [[NUW]]
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %a) [[NUW]]
+; CHECK:   call void @block_callee
+; CHECK:   call void @objc_release(i8* %reloaded_a) [[NUW]]
+; CHECK: exit:
+; CHECK:   call void @objc_release(i8* %a) [[NUW]]
+; CHECK: }
+define void @test13(i8* %a) nounwind {
+entry:
+  %block = alloca i8*
+  %a1 = tail call i8* @objc_retain(i8* %a) nounwind
+  br label %loop
+
+loop:
+  %a2 = tail call i8* @objc_retain(i8* %a) nounwind
+  store i8* %a, i8** %block, align 8
+  %casted_block = bitcast i8** %block to void ()*
+  call void @block_callee(void ()* %casted_block)
+  %reloaded_a = load i8** %block, align 8
+  call void @objc_release(i8* %reloaded_a) nounwind, !clang.imprecise_release !0
+  br i1 undef, label %loop, label %exit
+  
+exit:
+  call void @objc_release(i8* %a) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll b/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll
new file mode 100644
index 000000000000..4215b5c36465
--- /dev/null
+++ b/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll
@@ -0,0 +1,16 @@
+; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
+
+; This file makes sure that clang.arc.used is removed even if no other ARC
+; interesting calls are in the module.
+
+declare void @clang.arc.use(...) nounwind
+
+; Kill calls to @clang.arc.use(...)
+; CHECK: define void @test0(
+; CHECK-NOT: clang.arc.use
+; CHECK: }
+define void @test0(i8* %a, i8* %b) {
+  call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind
+  ret void
+}
+
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index b6fba5960345..0b60683d9995 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -162,4 +162,15 @@ return:                                           ; preds = %if.then, %entry
   ret i8* %retval
 }
 
+; Kill calls to @clang.arc.use(...)
+; CHECK: define void @test9(
+; CHECK-NOT: clang.arc.use
+; CHECK: }
+define void @test9(i8* %a, i8* %b) {
+  call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind
+  ret void
+}
+
+declare void @clang.arc.use(...) nounwind
+
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/intrinsic-use.ll b/test/Transforms/ObjCARC/intrinsic-use.ll
new file mode 100644
index 000000000000..2f7c7c8bf351
--- /dev/null
+++ b/test/Transforms/ObjCARC/intrinsic-use.ll
@@ -0,0 +1,114 @@
+; RUN: opt -basicaa -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutorelease(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+
+declare void @clang.arc.use(...)
+
+declare void @test0_helper(i8*, i8**)
+
+; Ensure that we honor clang.arc.use as a use and don't miscompile
+; the reduced test case from <rdar://13195034>.
+;
+; FIXME: the fact that we re-order retains w.r.t. @clang.arc.use could
+; be problematic if we get run twice, e.g. under LTO.
+;
+; CHECK:      define void @test0(
+; CHECK:        @objc_retain(i8* %x)
+; CHECK-NEXT:   store i8* %y, i8** %temp0
+; CHECK-NEXT:   @objc_retain(i8* %y)
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL1:%.*]] = load i8** %temp0
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
+; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_release(i8* %y)
+; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL2:%.*]] = load i8** %temp1
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
+; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_autorelease(i8* %x)
+; CHECK-NEXT:   store i8* %x, i8** %out
+; CHECK-NEXT:   @objc_retain(i8* %x)
+; CHECK-NEXT:   @objc_release(i8* [[VAL2]])
+; CHECK-NEXT:   @objc_release(i8* %x)
+; CHECK-NEXT:   ret void
+define void @test0(i8** %out, i8* %x, i8* %y) {
+entry:
+  %temp0 = alloca i8*, align 8
+  %temp1 = alloca i8*, align 8
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  %1 = call i8* @objc_retain(i8* %y) nounwind
+  store i8* %y, i8** %temp0
+  call void @test0_helper(i8* %x, i8** %temp0)
+  %val1 = load i8** %temp0
+  %2 = call i8* @objc_retain(i8* %val1) nounwind
+  call void (...)* @clang.arc.use(i8* %y) nounwind
+  call void @objc_release(i8* %y) nounwind
+  store i8* %val1, i8** %temp1
+  call void @test0_helper(i8* %x, i8** %temp1)
+  %val2 = load i8** %temp1
+  %3 = call i8* @objc_retain(i8* %val2) nounwind
+  call void (...)* @clang.arc.use(i8* %val1) nounwind
+  call void @objc_release(i8* %val1) nounwind
+  %4 = call i8* @objc_retain(i8* %x) nounwind
+  %5 = call i8* @objc_autorelease(i8* %x) nounwind
+  store i8* %x, i8** %out
+  call void @objc_release(i8* %val2) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; CHECK:      define void @test0a(
+; CHECK:        @objc_retain(i8* %x)
+; CHECK-NEXT:   store i8* %y, i8** %temp0
+; CHECK-NEXT:   @objc_retain(i8* %y)
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL1:%.*]] = load i8** %temp0
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
+; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_release(i8* %y)
+; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL2:%.*]] = load i8** %temp1
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
+; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_autorelease(i8* %x)
+; CHECK-NEXT:   @objc_release(i8* [[VAL2]])
+; CHECK-NEXT:   store i8* %x, i8** %out
+; CHECK-NEXT:   ret void
+define void @test0a(i8** %out, i8* %x, i8* %y) {
+entry:
+  %temp0 = alloca i8*, align 8
+  %temp1 = alloca i8*, align 8
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  %1 = call i8* @objc_retain(i8* %y) nounwind
+  store i8* %y, i8** %temp0
+  call void @test0_helper(i8* %x, i8** %temp0)
+  %val1 = load i8** %temp0
+  %2 = call i8* @objc_retain(i8* %val1) nounwind
+  call void (...)* @clang.arc.use(i8* %y) nounwind
+  call void @objc_release(i8* %y) nounwind, !clang.imprecise_release !0
+  store i8* %val1, i8** %temp1
+  call void @test0_helper(i8* %x, i8** %temp1)
+  %val2 = load i8** %temp1
+  %3 = call i8* @objc_retain(i8* %val2) nounwind
+  call void (...)* @clang.arc.use(i8* %val1) nounwind
+  call void @objc_release(i8* %val1) nounwind, !clang.imprecise_release !0
+  %4 = call i8* @objc_retain(i8* %x) nounwind
+  %5 = call i8* @objc_autorelease(i8* %x) nounwind
+  store i8* %x, i8** %out
+  call void @objc_release(i8* %val2) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+
+!0 = metadata !{}
+
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
index 9728f6e0d94f..58b5bbe9c7e9 100644
--- a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
+++ b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
@@ -59,11 +59,12 @@ lpad:                                             ; preds = %entry
   resume { i8*, i32 } %t8
 }
 
-; There is no !clang.arc.no_objc_arc_exceptions
-; metadata here, so the optimizer shouldn't eliminate anything.
+; There is no !clang.arc.no_objc_arc_exceptions metadata here, so the optimizer
+; shouldn't eliminate anything, but *CAN* strength reduce the objc_retainBlock
+; to an objc_retain.
 
 ; CHECK: define void @test0_no_metadata(
-; CHECK: call i8* @objc_retainBlock(
+; CHECK: call i8* @objc_retain(
 ; CHECK: invoke
 ; CHECK: call void @objc_release(
 ; CHECK: }
diff --git a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
index 2c1ddce32836..8df05ad22666 100644
--- a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
+++ b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
@@ -21,6 +21,23 @@ declare i8* @objc_retainBlock(i8*)
 
 define void @bitcasttest(i8* %storage, void (...)* %block)  {
 ; CHECK: define void @bitcasttest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %storage to void (...)**
+  %t5 = bitcast i8* %t3 to void (...)*
+  store void (...)* %t5, void (...)** %t4, align 8
+; CHECK: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+; CHECK: }
+}
+
+define void @bitcasttest_a(i8* %storage, void (...)* %block)  {
+; CHECK: define void @bitcasttest_a
 entry:
   %t1 = bitcast void (...)* %block to i8*
 ; CHECK-NOT: tail call i8* @objc_retain
@@ -31,12 +48,32 @@ entry:
   %t5 = bitcast i8* %t3 to void (...)*
   store void (...)* %t5, void (...)** %t4, align 8
 ; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1)
+  call void @objc_release(i8* %t1), !clang.imprecise_release !0
   ret void
+; CHECK: }
 }
 
 define void @geptest(void (...)** %storage_array, void (...)* %block)  {
 ; CHECK: define void @geptest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  
+  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
+  
+  store void (...)* %t4, void (...)** %storage, align 8
+; CHECK: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+; CHECK: }
+}
+
+define void @geptest_a(void (...)** %storage_array, void (...)* %block)  {
+; CHECK: define void @geptest_a
 entry:
   %t1 = bitcast void (...)* %block to i8*
 ; CHECK-NOT: tail call i8* @objc_retain
@@ -49,13 +86,32 @@ entry:
   
   store void (...)* %t4, void (...)** %storage, align 8
 ; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1)
+  call void @objc_release(i8* %t1), !clang.imprecise_release !0
   ret void
+; CHECK: }
 }
 
 define void @selecttest(void (...)** %store1, void (...)** %store2,
                         void (...)* %block) {
 ; CHECK: define void @selecttest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  %store = select i1 undef, void (...)** %store1, void (...)** %store2
+  store void (...)* %t4, void (...)** %store, align 8
+; CHECK: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+; CHECK: }
+}
+
+define void @selecttest_a(void (...)** %store1, void (...)** %store2,
+                          void (...)* %block) {
+; CHECK: define void @selecttest_a
 entry:
   %t1 = bitcast void (...)* %block to i8*
 ; CHECK-NOT: tail call i8* @objc_retain
@@ -66,14 +122,45 @@ entry:
   %store = select i1 undef, void (...)** %store1, void (...)** %store2
   store void (...)* %t4, void (...)** %store, align 8
 ; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1)
+  call void @objc_release(i8* %t1), !clang.imprecise_release !0
   ret void
+; CHECK: }
 }
 
 define void @phinodetest(void (...)** %storage1,
                          void (...)** %storage2,
                          void (...)* %block) {
 ; CHECK: define void @phinodetest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  br i1 undef, label %store1_set, label %store2_set
+; CHECK: store1_set:
+
+store1_set:
+  br label %end
+
+store2_set:
+  br label %end
+
+end:
+; CHECK: end:
+  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
+  store void (...)* %t4, void (...)** %storage, align 8
+; CHECK: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+; CHECK: }
+}
+
+define void @phinodetest_a(void (...)** %storage1,
+                           void (...)** %storage2,
+                           void (...)* %block) {
+; CHECK: define void @phinodetest_a
 entry:
   %t1 = bitcast void (...)* %block to i8*
 ; CHECK-NOT: tail call i8* @objc_retain
@@ -93,10 +180,11 @@ end:
   %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
   store void (...)* %t4, void (...)** %storage, align 8
 ; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1)
+  call void @objc_release(i8* %t1), !clang.imprecise_release !0
   ret void
 }
 
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This test makes sure that we do not hang clang when visiting a use ;
 ; cycle caused by phi nodes during objc-arc analysis. *NOTE* This    ;
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
index ee57049d8aab..1bb3f0276adf 100644
--- a/test/Transforms/ObjCARC/retain-block.ll
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -98,7 +98,7 @@ entry:
 ; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_no_metadata(i8* %tmp) {
@@ -122,7 +122,7 @@ entry:
 ; CHECK-NEXT: store i8* %tmp2, i8** %z
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_escape(i8* %tmp, i8** %z) {
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index e834179bb712..165829f7c01f 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -21,8 +21,8 @@ define i8* @test0(i8* %p) {
 entry:
   %call = tail call i8* @objc_unretainedObject(i8* %p)
   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
-  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
-  ret i8* %1
+  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %call) nounwind
+  ret i8* %call
 }
 
 ; Properly create the @objc_retain declaration when it doesn't already exist.
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
index a2fef967495b..589c60f9f3aa 100644
--- a/test/Transforms/ObjCARC/rv.ll
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -121,7 +121,7 @@ define i8* @test7() {
   %p = call i8* @returner()
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
   %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
-  call void @use_pointer(i8* %t)
+  call void @use_pointer(i8* %p)
   ret i8* %t
 }
 
@@ -133,7 +133,7 @@ define i8* @test7b() {
   call void @use_pointer(i8* %p)
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
   %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
-  ret i8* %t
+  ret i8* %p
 }
 
 ; Turn objc_retain into objc_retainAutoreleasedReturnValue if its operand
@@ -156,11 +156,11 @@ define i8* @test9(i8* %p) {
   ret i8* %p
 }
 
-; Apply the RV optimization.
+; Do not apply the RV optimization.
 
 ; CHECK: define i8* @test10(i8* %p)
 ; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret i8* %p
 define i8* @test10(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
diff --git a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
index 74ac97c7b307..26cd67727e6a 100644
--- a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
+++ b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
@@ -72,13 +72,3 @@ entry:
   ret i8* %tmp0
 }
 
-; If we convert a called @objc_autorelease to an @objc_autoreleaseReturnValue,
-; ensure that the tail call is added.
-define i8* @test6(i8* %x) {
-entry:
-  ; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x)
-  %tmp0 = tail call i8* @objc_retain(i8* %x)
-  ; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
-  %tmp1 = call i8* @objc_autorelease(i8* %x)
-  ret i8* %x
-}
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
new file mode 100644
index 000000000000..d371a9b5b68f
--- /dev/null
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -0,0 +1,166 @@
+;RUN: opt -S -reassociate < %s | FileCheck %s
+
+; ==========================================================================
+;
+;   Xor reassociation general cases
+;  
+; ==========================================================================
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3, where c3 = c1^c2
+;   
+define i32 @xor1(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+
+;CHECK: @xor1
+;CHECK: %and.ra = and i32 %x, 435
+;CHECK: %xor = xor i32 %and.ra, 435
+}
+
+; Test rule : (x & c1) ^ (x & c2) = (x & (c1^c2))
+; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y
+define i32 @xor2(i32 %x, i32 %y) {
+  %and = and i32 %x, 123
+  %xor = xor i32 %and, %y
+  %and1 = and i32 %x, 456
+  %xor2 = xor i32 %xor, %and1
+  ret i32 %xor2
+
+;CHECK: @xor2
+;CHECK: %and.ra = and i32 %x, 435
+;CHECK: %xor2 = xor i32 %and.ra, %y
+}
+
+; Test rule: (x | c1) ^ (x & c2) = (x & c3) ^ c1, where c3 = ~c1 ^ c2
+;  c3 = ~c1 ^ c2
+define i32 @xor3(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, 456
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+
+;CHECK: @xor3
+;CHECK: %and.ra = and i32 %x, -436
+;CHECK: %xor = xor i32 %y, 123
+;CHECK: %xor1 = xor i32 %xor, %and.ra
+}
+
+; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2)
+define i32 @xor4(i32 %x, i32 %y) {
+  %and = and i32 %x, -124
+  %xor = xor i32 %y, 435
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor4
+; CHECK: %and = and i32 %x, -124
+; CHECK: %xor = xor i32 %y, 435
+; CHECK: %xor1 = xor i32 %xor, %and
+}
+
+; ==========================================================================
+;
+;  Xor reassociation special cases
+;  
+; ==========================================================================
+
+; Special case1: 
+;  (x | c1) ^ (x & ~c1) = c1
+define i32 @xor_special1(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, -124
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor_special1
+; CHECK: %xor1 = xor i32 %y, 123
+; CHECK: ret i32 %xor1
+}
+
+; Special case1: 
+;  (x | c1) ^ (x & c1) = x ^ c1
+define i32 @xor_special2(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, 123
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor_special2
+; CHECK: %xor = xor i32 %y, 123
+; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: ret i32 %xor1
+}
+
+; (x | c1) ^ (x | c1) => 0
+define i32 @xor_special3(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 123
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+;CHECK: @xor_special3
+;CHECK: ret i32 0
+}
+
+; (x & c1) ^ (x & c1) => 0
+define i32 @xor_special4(i32 %x) {
+  %or = and i32 %x, 123
+  %or1 = and i32 123, %x
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+;CHECK: @xor_special4
+;CHECK: ret i32 0
+}
+
+; ==========================================================================
+;
+;  Xor reassociation curtail code size
+;  
+; ==========================================================================
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3
+; is enabled if one of operands has multiple uses
+;   
+define i32 @xor_ra_size1(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+
+  %add = add i32 %xor, %or
+  ret i32 %add
+;CHECK: @xor_ra_size1
+;CHECK: %xor = xor i32 %and.ra, 435
+}
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3
+; is disenabled if bothf operands has multiple uses.
+;   
+define i32 @xor_ra_size2(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+
+  %add = add i32 %xor, %or
+  %add2 = add i32 %add, %or1
+  ret i32 %add2
+
+;CHECK: @xor_ra_size2
+;CHECK: %or1 = or i32 %x, 456
+;CHECK: %xor = xor i32 %or, %or1
+}
+
+
+; ==========================================================================
+;
+;  Xor reassociation bugs
+;  
+; ==========================================================================
+
+@xor_bug1_data = external global <{}>, align 4
+define void @xor_bug1() {
+  %1 = ptrtoint i32* undef to i64
+  %2 = xor i64 %1, ptrtoint (<{}>* @xor_bug1_data to i64)
+  %3 = and i64 undef, %2
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/barriercall.ll b/test/Transforms/SLPVectorizer/X86/barriercall.ll
new file mode 100644
index 000000000000..04eb8f919bc7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32 %n) {
+entry:
+  %call = tail call i32 (...)* @bar() #2
+  %mul = mul nsw i32 %n, 5
+  %add = add nsw i32 %mul, 9
+  store i32 %add, i32* %A, align 4
+  %mul1 = mul nsw i32 %n, 9
+  %add2 = add nsw i32 %mul1, 9
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 1
+  store i32 %add2, i32* %arrayidx3, align 4
+  %mul4 = shl i32 %n, 3
+  %add5 = add nsw i32 %mul4, 9
+  %arrayidx6 = getelementptr inbounds i32* %A, i64 2
+  store i32 %add5, i32* %arrayidx6, align 4
+  %mul7 = mul nsw i32 %n, 10
+  %add8 = add nsw i32 %mul7, 9
+  %arrayidx9 = getelementptr inbounds i32* %A, i64 3
+  store i32 %add8, i32* %arrayidx9, align 4
+  ret i32 undef
+}
+
+  ; We can still vectorize the stores below.
+
+declare i32 @bar(...)
diff --git a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
new file mode 100644
index 000000000000..05f8e616bb8e
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
+
+;CHECK: @reduce_compare
+;CHECK: load <2 x double>
+;CHECK: fmul <2 x double>
+;CHECK: fmul <2 x double>
+;CHECK: fadd <2 x double>
+;CHECK: extractelement
+;CHECK: extractelement
+;CHECK: ret
+define void @reduce_compare(double* nocapture %A, i32 %n) {
+entry:
+  %conv = sitofp i32 %n to double
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %0 = shl nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds double* %A, i64 %0
+  %1 = load double* %arrayidx, align 8
+  %mul1 = fmul double %conv, %1
+  %mul2 = fmul double %mul1, 7.000000e+00
+  %add = fadd double %mul2, 5.000000e+00
+  %2 = or i64 %0, 1
+  %arrayidx6 = getelementptr inbounds double* %A, i64 %2
+  %3 = load double* %arrayidx6, align 8
+  %mul8 = fmul double %conv, %3
+  %mul9 = fmul double %mul8, 4.000000e+00
+  %add10 = fadd double %mul9, 9.000000e+00
+  %cmp11 = fcmp ogt double %add, %add10
+  br i1 %cmp11, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0))
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
+
diff --git a/test/Transforms/SLPVectorizer/X86/diamond.ll b/test/Transforms/SLPVectorizer/X86/diamond.ll
new file mode 100644
index 000000000000..8e85cb6c9b8f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; int foo(int * restrict B,  int * restrict A, int n, int m) {
+;   B[0] = n * A[0] + m * A[0];
+;   B[1] = n * A[1] + m * A[1];
+;   B[2] = n * A[2] + m * A[2];
+;   B[3] = n * A[3] + m * A[3];
+;   return 0;
+; }
+
+; CHECK: @foo
+; CHECK: load <4 x i32>
+; CHECK: mul <4 x i32>
+; CHECK: store <4 x i32>
+; CHECK: ret
+define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 {
+entry:
+  %0 = load i32* %A, align 4
+  %mul238 = add i32 %m, %n
+  %add = mul i32 %0, %mul238
+  store i32 %add, i32* %B, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add8 = mul i32 %1, %mul238
+  %arrayidx9 = getelementptr inbounds i32* %B, i64 1
+  store i32 %add8, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %A, i64 2
+  %2 = load i32* %arrayidx10, align 4
+  %add14 = mul i32 %2, %mul238
+  %arrayidx15 = getelementptr inbounds i32* %B, i64 2
+  store i32 %add14, i32* %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds i32* %A, i64 3
+  %3 = load i32* %arrayidx16, align 4
+  %add20 = mul i32 %3, %mul238
+  %arrayidx21 = getelementptr inbounds i32* %B, i64 3
+  store i32 %add20, i32* %arrayidx21, align 4
+  ret i32 0
+}
+
+
+; int foo_fail(int * restrict B,  int * restrict A, int n, int m) {
+;   B[0] = n * A[0] + m * A[0];
+;   B[1] = n * A[1] + m * A[1];
+;   B[2] = n * A[2] + m * A[2];
+;   B[3] = n * A[3] + m * A[3];
+;   return A[0];
+; }
+
+; CHECK: @foo_fail
+; CHECK-NOT: load <4 x i32>
+; CHECK: ret
+define i32 @foo_fail(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+entry:
+  %0 = load i32* %A, align 4
+  %mul238 = add i32 %m, %n
+  %add = mul i32 %0, %mul238
+  store i32 %add, i32* %B, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add8 = mul i32 %1, %mul238
+  %arrayidx9 = getelementptr inbounds i32* %B, i64 1
+  store i32 %add8, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %A, i64 2
+  %2 = load i32* %arrayidx10, align 4
+  %add14 = mul i32 %2, %mul238
+  %arrayidx15 = getelementptr inbounds i32* %B, i64 2
+  store i32 %add14, i32* %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds i32* %A, i64 3
+  %3 = load i32* %arrayidx16, align 4
+  %add20 = mul i32 %3, %mul238
+  %arrayidx21 = getelementptr inbounds i32* %B, i64 3
+  store i32 %add20, i32* %arrayidx21, align 4
+  ret i32 %0  ;<--------- This value has multiple users and can't be vectorized.
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/flag.ll b/test/Transforms/SLPVectorizer/X86/flag.ll
new file mode 100644
index 000000000000..3ca540706744
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/flag.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=1000 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Check that the command line flag works.
+;CHECK:rollable
+;CHECK-NOT:load <4 x i32>
+;CHECK: ret
+
+define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i64 %n) {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
+  %2 = shl i64 %i.019, 2
+  %3 = getelementptr inbounds i32* %in, i64 %2
+  %4 = load i32* %3, align 4
+  %5 = or i64 %2, 1
+  %6 = getelementptr inbounds i32* %in, i64 %5
+  %7 = load i32* %6, align 4
+  %8 = or i64 %2, 2
+  %9 = getelementptr inbounds i32* %in, i64 %8
+  %10 = load i32* %9, align 4
+  %11 = or i64 %2, 3
+  %12 = getelementptr inbounds i32* %in, i64 %11
+  %13 = load i32* %12, align 4
+  %14 = mul i32 %4, 7
+  %15 = add i32 %14, 7
+  %16 = mul i32 %7, 7
+  %17 = add i32 %16, 14
+  %18 = mul i32 %10, 7
+  %19 = add i32 %18, 21
+  %20 = mul i32 %13, 7
+  %21 = add i32 %20, 28
+  %22 = getelementptr inbounds i32* %out, i64 %2
+  store i32 %15, i32* %22, align 4
+  %23 = getelementptr inbounds i32* %out, i64 %5
+  store i32 %17, i32* %23, align 4
+  %24 = getelementptr inbounds i32* %out, i64 %8
+  store i32 %19, i32* %24, align 4
+  %25 = getelementptr inbounds i32* %out, i64 %11
+  store i32 %21, i32* %25, align 4
+  %26 = add i64 %i.019, 1
+  %exitcond = icmp eq i64 %26, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/hoist.ll b/test/Transforms/SLPVectorizer/X86/hoist.ll
new file mode 100644
index 000000000000..5074ceaaabd7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/hoist.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+;int foo(int *A, int n, int k) {
+;  for (int i=0; i < 10000; i+=4) {
+;    A[i]   += n;
+;    A[i+1] += k;
+;    A[i+2] += n;
+;    A[i+3] += k;
+;  }
+;}
+
+; preheader:
+;CHECK: entry
+;CHECK-NEXT: insertelement
+;CHECK-NEXT: insertelement
+;CHECK-NEXT: insertelement
+;CHECK-NEXT: insertelement
+; loop body:
+;CHECK: phi
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ 0, %entry ], [ %add10, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.024
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %n
+  store i32 %add, i32* %arrayidx, align 4
+  %add121 = or i32 %i.024, 1
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %add121
+  %1 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %1, %k
+  store i32 %add3, i32* %arrayidx2, align 4
+  %add422 = or i32 %i.024, 2
+  %arrayidx5 = getelementptr inbounds i32* %A, i32 %add422
+  %2 = load i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %2, %n
+  store i32 %add6, i32* %arrayidx5, align 4
+  %add723 = or i32 %i.024, 3
+  %arrayidx8 = getelementptr inbounds i32* %A, i32 %add723
+  %3 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %3, %k
+  store i32 %add9, i32* %arrayidx8, align 4
+  %add10 = add nsw i32 %i.024, 4
+  %cmp = icmp slt i32 %add10, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret i32 undef
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/lit.local.cfg b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
new file mode 100644
index 000000000000..a8ad0f1a28b2
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll
new file mode 100644
index 000000000000..aaa6063fdeda
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+;int foo (int *A, int n) {
+;  A[0] += n * 5 + 7;
+;  A[1] += n * 5 + 8;
+;  A[2] += n * 5 + 9;
+;  A[3] += n * 5 + 10;
+;  A[4] += n * 5 + 11;
+;}
+
+;CHECK: @foo
+;CHECK: insertelement <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32 %n) {
+  %1 = mul nsw i32 %n, 5
+  %2 = add nsw i32 %1, 7
+  %3 = load i32* %A, align 4
+  %4 = add nsw i32 %2, %3
+  store i32 %4, i32* %A, align 4
+  %5 = add nsw i32 %1, 8
+  %6 = getelementptr inbounds i32* %A, i64 1
+  %7 = load i32* %6, align 4
+  %8 = add nsw i32 %5, %7
+  store i32 %8, i32* %6, align 4
+  %9 = add nsw i32 %1, 9
+  %10 = getelementptr inbounds i32* %A, i64 2
+  %11 = load i32* %10, align 4
+  %12 = add nsw i32 %9, %11
+  store i32 %12, i32* %10, align 4
+  %13 = add nsw i32 %1, 10
+  %14 = getelementptr inbounds i32* %A, i64 3
+  %15 = load i32* %14, align 4
+  %16 = add nsw i32 %13, %15
+  store i32 %16, i32* %14, align 4
+  %17 = add nsw i32 %1, 11
+  %18 = getelementptr inbounds i32* %A, i64 4
+  %19 = load i32* %18, align 4
+  %20 = add nsw i32 %17, %19
+  store i32 %20, i32* %18, align 4
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/reduction.ll b/test/Transforms/SLPVectorizer/X86/reduction.ll
new file mode 100644
index 000000000000..70b7c3a0b998
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+; int foo(double *A, int n, int m) {
+;   double sum = 0, v1 = 2, v0 = 3;
+;   for (int i=0; i < n; ++i)
+;     sum += 7*A[i*2] + 7*A[i*2+1];
+;   return sum;
+; }
+
+;CHECK: reduce
+;CHECK: load <2 x double>
+;CHECK: fmul <2 x double>
+;CHECK: ret
+define i32 @reduce(double* nocapture %A, i32 %n, i32 %m) {
+entry:
+  %cmp13 = icmp sgt i32 %n, 0
+  br i1 %cmp13, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.015 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %sum.014 = phi double [ %add6, %for.body ], [ 0.000000e+00, %entry ]
+  %mul = shl nsw i32 %i.015, 1
+  %arrayidx = getelementptr inbounds double* %A, i32 %mul
+  %0 = load double* %arrayidx, align 4
+  %mul1 = fmul double %0, 7.000000e+00
+  %add12 = or i32 %mul, 1
+  %arrayidx3 = getelementptr inbounds double* %A, i32 %add12
+  %1 = load double* %arrayidx3, align 4
+  %mul4 = fmul double %1, 7.000000e+00
+  %add5 = fadd double %mul1, %mul4
+  %add6 = fadd double %sum.014, %add5
+  %inc = add nsw i32 %i.015, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = fptosi double %add6 to i32
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/reduction2.ll b/test/Transforms/SLPVectorizer/X86/reduction2.ll
new file mode 100644
index 000000000000..7aa7d7e243d0
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: load <2 x double>
+;CHECK: ret
+define double @foo(double* nocapture %D) {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %i.02 = phi i32 [ 0, %0 ], [ %10, %1 ]
+  %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
+  %2 = shl nsw i32 %i.02, 1
+  %3 = getelementptr inbounds double* %D, i32 %2
+  %4 = load double* %3, align 4
+  %A4 = fmul double %4, %4
+  %5 = or i32 %2, 1
+  %6 = getelementptr inbounds double* %D, i32 %5
+  %7 = load double* %6, align 4
+  %A7 = fmul double %7, %7
+  %8 = fadd double %A4, %A7
+  %9 = fadd double %sum.01, %8
+  %10 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %10, 100
+  br i1 %exitcond, label %11, label %1
+
+; <label>:11                                      ; preds = %1
+  ret double %9
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll
new file mode 100644
index 000000000000..b520913a398d
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; SLP vectorization example from http://cs.stanford.edu/people/eschkufz/research/asplos291-schkufza.pdf
+;CHECK: SAXPY
+;CHECK: mul <4 x i32>
+;CHECK: ret
+
+define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, i64 %i) {
+  %1 = getelementptr inbounds i32* %x, i64 %i
+  %2 = load i32* %1, align 4
+  %3 = mul nsw i32 %2, %a
+  %4 = getelementptr inbounds i32* %y, i64 %i
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %3, %5
+  store i32 %6, i32* %1, align 4
+  %7 = add i64 %i, 1
+  %8 = getelementptr inbounds i32* %x, i64 %7
+  %9 = load i32* %8, align 4
+  %10 = mul nsw i32 %9, %a
+  %11 = getelementptr inbounds i32* %y, i64 %7
+  %12 = load i32* %11, align 4
+  %13 = add nsw i32 %10, %12
+  store i32 %13, i32* %8, align 4
+  %14 = add i64 %i, 2
+  %15 = getelementptr inbounds i32* %x, i64 %14
+  %16 = load i32* %15, align 4
+  %17 = mul nsw i32 %16, %a
+  %18 = getelementptr inbounds i32* %y, i64 %14
+  %19 = load i32* %18, align 4
+  %20 = add nsw i32 %17, %19
+  store i32 %20, i32* %15, align 4
+  %21 = add i64 %i, 3
+  %22 = getelementptr inbounds i32* %x, i64 %21
+  %23 = load i32* %22, align 4
+  %24 = mul nsw i32 %23, %a
+  %25 = getelementptr inbounds i32* %y, i64 %21
+  %26 = load i32* %25, align 4
+  %27 = add nsw i32 %24, %26
+  store i32 %27, i32* %22, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/test/Transforms/SLPVectorizer/X86/simple-loop.ll
new file mode 100644
index 000000000000..0111b946d496
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/simple-loop.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK:rollable
+define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i64 %n) {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
+  %2 = shl i64 %i.019, 2
+  %3 = getelementptr inbounds i32* %in, i64 %2
+;CHECK:load <4 x i32>
+  %4 = load i32* %3, align 4
+  %5 = or i64 %2, 1
+  %6 = getelementptr inbounds i32* %in, i64 %5
+  %7 = load i32* %6, align 4
+  %8 = or i64 %2, 2
+  %9 = getelementptr inbounds i32* %in, i64 %8
+  %10 = load i32* %9, align 4
+  %11 = or i64 %2, 3
+  %12 = getelementptr inbounds i32* %in, i64 %11
+  %13 = load i32* %12, align 4
+;CHECK:mul <4 x i32>
+  %14 = mul i32 %4, 7
+;CHECK:add <4 x i32>
+  %15 = add i32 %14, 7
+  %16 = mul i32 %7, 7
+  %17 = add i32 %16, 14
+  %18 = mul i32 %10, 7
+  %19 = add i32 %18, 21
+  %20 = mul i32 %13, 7
+  %21 = add i32 %20, 28
+  %22 = getelementptr inbounds i32* %out, i64 %2
+;CHECK:store <4 x i32>
+  store i32 %15, i32* %22, align 4
+  %23 = getelementptr inbounds i32* %out, i64 %5
+  store i32 %17, i32* %23, align 4
+  %24 = getelementptr inbounds i32* %out, i64 %8
+  store i32 %19, i32* %24, align 4
+  %25 = getelementptr inbounds i32* %out, i64 %11
+  store i32 %21, i32* %25, align 4
+  %26 = add i64 %i.019, 1
+  %exitcond = icmp eq i64 %26, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+;CHECK: ret
+  ret i32 undef
+}
+
+;CHECK:unrollable
+;CHECK-NOT: <4 x i32>
+;CHECK: ret
+define i32 @unrollable(i32* %in, i32* %out, i64 %n) nounwind ssp uwtable {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
+  %2 = shl i64 %i.019, 2
+  %3 = getelementptr inbounds i32* %in, i64 %2
+  %4 = load i32* %3, align 4
+  %5 = or i64 %2, 1
+  %6 = getelementptr inbounds i32* %in, i64 %5
+  %7 = load i32* %6, align 4
+  %8 = or i64 %2, 2
+  %9 = getelementptr inbounds i32* %in, i64 %8
+  %10 = load i32* %9, align 4
+  %11 = or i64 %2, 3
+  %12 = getelementptr inbounds i32* %in, i64 %11
+  %13 = load i32* %12, align 4
+  %14 = mul i32 %4, 7
+  %15 = add i32 %14, 7
+  %16 = mul i32 %7, 7
+  %17 = add i32 %16, 14
+  %18 = mul i32 %10, 7
+  %19 = add i32 %18, 21
+  %20 = mul i32 %13, 7
+  %21 = add i32 %20, 28
+  %22 = getelementptr inbounds i32* %out, i64 %2
+  store i32 %15, i32* %22, align 4
+  %23 = getelementptr inbounds i32* %out, i64 %5
+  store i32 %17, i32* %23, align 4
+  %barrier = call i32 @goo(i32 0)                      ; <---------------- memory barrier.
+  %24 = getelementptr inbounds i32* %out, i64 %8
+  store i32 %19, i32* %24, align 4
+  %25 = getelementptr inbounds i32* %out, i64 %11
+  store i32 %21, i32* %25, align 4
+  %26 = add i64 %i.019, 1
+  %exitcond = icmp eq i64 %26, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
+
+declare i32 @goo(i32)
diff --git a/test/Transforms/SLPVectorizer/X86/simplebb.ll b/test/Transforms/SLPVectorizer/X86/simplebb.ll
new file mode 100644
index 000000000000..cd0b99e64677
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Simple 3-pair chain with loads and stores
+; CHECK: test1
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/lit.local.cfg b/test/Transforms/SLPVectorizer/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 30dd21774343..6078adead036 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1243,3 +1243,66 @@ entry:
   %v = load i32* %a
   ret i32 %v
 }
+
+define void @PR15674(i8* %data, i8* %src, i32 %size) {
+; Arrange (via control flow) to have unmerged stores of a particular width to
+; an alloca where we incrementally store from the end of the array toward the
+; beginning of the array. Ensure that the final integer store, despite being
+; convertable to the integer type that we end up promoting this alloca toward,
+; doesn't get widened to a full alloca store.
+; CHECK: @PR15674
+
+entry:
+  %tmp = alloca [4 x i8], align 1
+; CHECK: alloca i32
+
+  switch i32 %size, label %end [
+    i32 4, label %bb4
+    i32 3, label %bb3
+    i32 2, label %bb2
+    i32 1, label %bb1
+  ]
+
+bb4:
+  %src.gep3 = getelementptr inbounds i8* %src, i32 3
+  %src.3 = load i8* %src.gep3
+  %tmp.gep3 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 3
+  store i8 %src.3, i8* %tmp.gep3
+; CHECK: store i8
+
+  br label %bb3
+
+bb3:
+  %src.gep2 = getelementptr inbounds i8* %src, i32 2
+  %src.2 = load i8* %src.gep2
+  %tmp.gep2 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 2
+  store i8 %src.2, i8* %tmp.gep2
+; CHECK: store i8
+
+  br label %bb2
+
+bb2:
+  %src.gep1 = getelementptr inbounds i8* %src, i32 1
+  %src.1 = load i8* %src.gep1
+  %tmp.gep1 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 1
+  store i8 %src.1, i8* %tmp.gep1
+; CHECK: store i8
+
+  br label %bb1
+
+bb1:
+  %src.gep0 = getelementptr inbounds i8* %src, i32 0
+  %src.0 = load i8* %src.gep0
+  %tmp.gep0 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 0
+  store i8 %src.0, i8* %tmp.gep0
+; CHECK: store i8
+
+  br label %end
+
+end:
+  %tmp.raw = bitcast [4 x i8]* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i32 1, i1 false)
+  ret void
+; CHECK: ret void
+}
+
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index d1dd01d64d49..7d3bcea8b857 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -42,7 +42,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
diff --git a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
deleted file mode 100644
index 565cd7616422..000000000000
--- a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-; CHECK: @test1
-; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
-; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
-; CHECK: memset
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-; Split the array but don't replace the memset with an insert
-; element as its not a constant offset.
-; The load, however, can be replaced with an extract element.
-define float @test1(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  %cast = bitcast float* %ptr1 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
-  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: @test2
-; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
-
-; Do SROA on the array when it has dynamic vector reads and writes.
-define float @test2(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test3
-; CHECK: %0 = alloca [4 x <4 x float>]
-; CHECK-NOT: alloca
-
-; Don't do SROA on a dynamically indexed vector when it spans
-; more than one array element of the alloca array it is within.
-define float @test3(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
-  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test4
-; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <16 x float> %0, i32 %idx2
-
-; Don't do SROA on a dynamically indexed vector when it spans
-; more than one array element of the alloca array it is within.
-; However, unlike test3, the store is on the vector type
-; so SROA will convert the large alloca into the large vector
-; type and do all accesses with insert/extract element
-define float @test4(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
-  store <16 x float> zeroinitializer, <16 x float>* %bigvec
-  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: @test5
-; CHECK: %0 = alloca [4 x <4 x float>]
-; CHECK-NOT: alloca
-
-; Don't do SROA as the is a second dynamically indexed array
-; which may span multiple elements of the alloca.
-define float @test5(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
-  %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr4
-  ret float %ret
-}
-
-; CHECK: test6
-; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-%vector.pair = type { %vector.anon, %vector.anon }
-%vector.anon = type { %vector }
-%vector = type { <4 x float> }
-
-; Dynamic GEPs on vectors were crashing when the vector was inside a struct
-; as the new GEP for the new alloca might not include all the indices from
-; the original GEP, just the indices it needs to get to the correct offset of
-; some type, not necessarily the dynamic vector.
-; This test makes sure we don't have this crash.
-define float @test6(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca %vector.pair
-  store %vector.pair zeroinitializer, %vector.pair* %0
-  %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test7
-; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-%array.pair = type { [2 x %array.anon], %array.anon }
-%array.anon = type { [2 x %vector] }
-
-; This is the same as test6 and tests the same crash, but on arrays.
-define float @test7(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca %array.pair
-  store %array.pair zeroinitializer, %array.pair* %0
-  %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test8
-; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
-; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
-; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
-; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
-
-; Do SROA on the vector when it has dynamic vector reads and writes
-; from a non-zero offset.
-define float @test8(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca <4 x float>
-  store <4 x float> zeroinitializer, <4 x float>* %0
-  %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
-  %ptr2 = bitcast float* %ptr1 to <3 x float>*
-  %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
-  store float 1.0, float* %ptr3
-  %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
-  %ptr5 = bitcast float* %ptr4 to <2 x float>*
-  %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
-  %ret = load float* %ptr6
-  ret float %ret
-}
-
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
index fc89b165f85b..f6b068fd79c3 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
@@ -1,11 +1,11 @@
 ; This test checks to make sure that 'br X, Dest, Dest' is folded into 
 ; 'br Dest'
 
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep "br i1 %c2"
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 declare void @noop()
 
+; CHECK-NOT: br i1 %c2
 define i32 @test(i1 %c1, i1 %c2) {
 	call void @noop( )
 	br i1 %c1, label %A, label %Y
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
index c1b032fb8b39..78049080a64e 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
@@ -3,8 +3,9 @@
 ; due to the fact that the SimplifyCFG function does not use 
 ; the ConstantFoldTerminator function.
 
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep "br i1 %c2"
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: br i1 %c2
 
 declare void @noop()
 
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
index af59ba04f441..fbfb100274f6 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
@@ -1,7 +1,6 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep switch
-
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
+; CHECK-NOT: switch
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
index 93f851c6f9ea..806659635e4b 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep switch
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: switch
 
 ; Test normal folding
 define i32 @test1() {
diff --git a/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll b/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
index 760aa139bf7b..907261bd438a 100644
--- a/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
+++ b/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
@@ -1,9 +1,7 @@
 ; Make sure this doesn't turn into an infinite loop
 
-; RUN: opt < %s -simplifycfg -constprop -simplifycfg |\
-; RUN:   llvm-dis | grep bb86
-; END.
-	
+; RUN: opt < %s -simplifycfg -constprop -simplifycfg | llvm-dis | FileCheck %s
+
 %struct.anon = type { i32, i32, i32, i32, [1024 x i8] }
 @_zero_ = external global %struct.anon*		; <%struct.anon**> [#uses=2]
 @_one_ = external global %struct.anon*		; <%struct.anon**> [#uses=4]
@@ -112,6 +110,7 @@ cond_true83:		; preds = %bb80
 	%tmp71 = call i32 @_do_compare( %struct.anon* null, %struct.anon* null, i32 0, i32 1 )		; <i32> [#uses=1]
 	%tmp76 = icmp eq i32 %tmp71, 0		; <i1> [#uses=1]
 	br i1 %tmp76, label %bb80.outer, label %bb80
+; CHECK: bb86
 bb86:		; preds = %bb80
 	call void @free_num( %struct.anon** %num )
 	%tmp88 = load %struct.anon** %guess		; <%struct.anon*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll b/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
index 009d1c8cc4da..8f21b9bab9b6 100644
--- a/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
+++ b/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
@@ -1,6 +1,7 @@
 ; PR957
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep select
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: select
 
 @G = extern_weak global i32
 
diff --git a/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll b/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
index a20c46e1ad1f..a90e07276253 100644
--- a/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
+++ b/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -simplifycfg -S | not grep invoke
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: invoke
 
 declare i32 @func(i8*) nounwind
 
diff --git a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
index 14baeea4b0b9..cf29b7159793 100644
--- a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
+++ b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
@@ -1,5 +1,5 @@
 ; The phi should not be eliminated in this case, because the fp op could trap.
-; RUN: opt < %s -simplifycfg -S | grep "= phi double"
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
@@ -19,6 +19,7 @@ cond_true:		; preds = %entry
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %entry
+; CHECK: = phi double
 	%F.0 = phi double [ %tmp, %entry ], [ %tmp7, %cond_true ]		; <double> [#uses=1]
 	store double %F.0, double* @G, align 8
 	ret void
diff --git a/test/Transforms/SimplifyCFG/switch-to-icmp.ll b/test/Transforms/SimplifyCFG/switch-to-icmp.ll
index 414f8475bc23..e9a6db45cb00 100644
--- a/test/Transforms/SimplifyCFG/switch-to-icmp.ll
+++ b/test/Transforms/SimplifyCFG/switch-to-icmp.ll
@@ -37,3 +37,21 @@ lor.end:
 ; CHECK: @test2
 ; CHECK: %switch = icmp ult i32 %x, 2
 }
+
+define i32 @test3(i1 %flag) {
+entry:
+ switch i1 %flag, label %bad [
+   i1 true, label %good
+   i1 false, label %good
+ ]
+
+good:
+ ret i32 0
+
+bad:
+ ret i32 1
+
+; CHECK: @test3
+; CHECK: entry:
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll b/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
deleted file mode 100644
index ac89199b0ec1..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -inline -simplify-libcalls -functionattrs | \
-; RUN:   llvm-dis | grep nocapture | count 2
-; Check that nocapture attributes are added when run after an SCC pass.
-; PR3520
-
-define i32 @use(i8* %x) nounwind readonly {
-entry:
-	%0 = tail call i64 @strlen(i8* %x) nounwind readonly		; <i64> [#uses=1]
-	%1 = trunc i64 %0 to i32		; <i32> [#uses=1]
-	ret i32 %1
-}
-
-declare i64 @strlen(i8*) nounwind readonly
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 8dc7853d9130..15cf626c72bf 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -28,6 +28,11 @@ if 'TMP' in os.environ:
 if 'TEMP' in os.environ:
     config.environment['TEMP'] = os.environ['TEMP']
 
+# Propagate path to symbolizer for ASan/MSan.
+for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']:
+    if symbolizer in os.environ:
+        config.environment[symbolizer] = os.environ[symbolizer]
+
 ###
 
 # Check that the object root is known.
@@ -81,10 +86,3 @@ if config.enable_shared:
         shlibpath = os.pathsep + shlibpath
     shlibpath = config.shlibdir + shlibpath
     config.environment[config.shlibpath_var] = shlibpath
-
-# Setup paths to llvm-symbolizer for Sanitizer tools.
-llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
-if llvm_tools_dir:
-    llvm_symbolizer_path = os.path.join(llvm_tools_dir, 'llvm-symbolizer')
-    config.environment['ASAN_SYMBOLIZER_PATH'] = llvm_symbolizer_path
-    config.environment['MSAN_SYMBOLIZER_PATH'] = llvm_symbolizer_path
diff --git a/test/Verifier/2002-04-13-RetTypes.ll b/test/Verifier/2002-04-13-RetTypes.ll
index af468395be38..9385ebe5ff1a 100644
--- a/test/Verifier/2002-04-13-RetTypes.ll
+++ b/test/Verifier/2002-04-13-RetTypes.ll
@@ -1,7 +1,8 @@
-; RUN: not llvm-as < %s 2>&1 | grep "value doesn't match function result type 'i32'"
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
 ; Verify the operand type of the ret instructions in a function match the
-; delcared return type of the function they live in.
+; declared return type of the function they live in.
+; CHECK: value doesn't match function result type 'i32'
 ;
 
 define i32 @testfunc() {
diff --git a/test/Verifier/2002-11-05-GetelementptrPointers.ll b/test/Verifier/2002-11-05-GetelementptrPointers.ll
index 108ae5f76579..66b233ec63ae 100644
--- a/test/Verifier/2002-11-05-GetelementptrPointers.ll
+++ b/test/Verifier/2002-11-05-GetelementptrPointers.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "invalid getelementptr indices"
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: invalid getelementptr indices
 
 ; This testcase is invalid because we are indexing into a pointer that is 
 ; contained WITHIN a structure.
diff --git a/test/Verifier/2006-07-11-StoreStruct.ll b/test/Verifier/2006-07-11-StoreStruct.ll
index 65b229d1ca9d..70aea8779bf1 100644
--- a/test/Verifier/2006-07-11-StoreStruct.ll
+++ b/test/Verifier/2006-07-11-StoreStruct.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-as < %s 2>&1 | not grep "Instruction operands must be first-class"
+; RUN: llvm-as < %s 2>&1 | FileCheck %s 
+
+; CHECK-NOT: Instruction operands must be first-class
 
 ; This previously was for PR826, but structs are now first-class so
 ; the following is now valid.
diff --git a/test/Verifier/2006-10-15-AddrLabel.ll b/test/Verifier/2006-10-15-AddrLabel.ll
index c8fedb5f195d..decbf5ba42bb 100644
--- a/test/Verifier/2006-10-15-AddrLabel.ll
+++ b/test/Verifier/2006-10-15-AddrLabel.ll
@@ -1,5 +1,6 @@
 ; RUN: not llvm-as < %s > /dev/null 2> %t
-; RUN: grep "basic block pointers are invalid" %t
+; RUN: FileCheck %s --input-file=%t
+; CHECK: basic block pointers are invalid
 
 define i32 @main() {
          %foo  = call i8* %llvm.stacksave()
diff --git a/test/Verifier/2006-12-12-IntrinsicDefine.ll b/test/Verifier/2006-12-12-IntrinsicDefine.ll
index 6e7468c1d9eb..8cc3d24b5c80 100644
--- a/test/Verifier/2006-12-12-IntrinsicDefine.ll
+++ b/test/Verifier/2006-12-12-IntrinsicDefine.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "llvm intrinsics cannot be defined"
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: llvm intrinsics cannot be defined
 ; PR1047
 
 define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) {
diff --git a/test/Verifier/2008-03-01-AllocaSized.ll b/test/Verifier/2008-03-01-AllocaSized.ll
index 51258bef76e5..fc12a96e4f9f 100644
--- a/test/Verifier/2008-03-01-AllocaSized.ll
+++ b/test/Verifier/2008-03-01-AllocaSized.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Cannot allocate unsized type"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: Cannot allocate unsized type
 ; PR2113
 
 define void @test() {
diff --git a/test/Verifier/2008-08-22-MemCpyAlignment.ll b/test/Verifier/2008-08-22-MemCpyAlignment.ll
index c6d5afd51c95..3f7cb5234305 100644
--- a/test/Verifier/2008-08-22-MemCpyAlignment.ll
+++ b/test/Verifier/2008-08-22-MemCpyAlignment.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "alignment argument of memory intrinsics must be a constant int"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: alignment argument of memory intrinsics must be a constant int
 ; PR2318
 
 define void @x(i8* %a, i8* %src, i64 %len, i32 %align) nounwind  {
diff --git a/test/Verifier/2008-11-15-RetVoid.ll b/test/Verifier/2008-11-15-RetVoid.ll
index 42503fabbec9..62f6da113139 100644
--- a/test/Verifier/2008-11-15-RetVoid.ll
+++ b/test/Verifier/2008-11-15-RetVoid.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "value doesn't match function result type 'void'"
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: value doesn't match function result type 'void'
 
 define void @foo() {
   ret i32 0
diff --git a/test/Verifier/2010-08-07-PointerIntrinsic.ll b/test/Verifier/2010-08-07-PointerIntrinsic.ll
index 3136c61514ba..a668d04213fa 100644
--- a/test/Verifier/2010-08-07-PointerIntrinsic.ll
+++ b/test/Verifier/2010-08-07-PointerIntrinsic.ll
@@ -1,5 +1,6 @@
 ; RUN: not llvm-as < %s 2> %t
-; RUN: grep "Broken module" %t
+; RUN: FileCheck %s --input-file=%t
+; CHECK: Broken module
 ; PR7316
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
diff --git a/test/Verifier/AmbiguousPhi.ll b/test/Verifier/AmbiguousPhi.ll
index f31bc107accf..cb05a7282629 100644
--- a/test/Verifier/AmbiguousPhi.ll
+++ b/test/Verifier/AmbiguousPhi.ll
@@ -1,6 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "multiple entries for the same basic block"
-
-
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: multiple entries for the same basic block
 
 define i32 @test(i32 %i, i32 %j, i1 %c) {
 	br i1 %c, label %A, label %A
diff --git a/test/Verifier/PhiGrouping.ll b/test/Verifier/PhiGrouping.ll
index 7b42fd28e3d8..291f0848cb2b 100644
--- a/test/Verifier/PhiGrouping.ll
+++ b/test/Verifier/PhiGrouping.ll
@@ -1,6 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "PHI nodes not grouped at top"
-
-
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: PHI nodes not grouped at top
 
 define i32 @test(i32 %i, i32 %j, i1 %c) {
 	br i1 %c, label %A, label %B
diff --git a/test/Verifier/SelfReferential.ll b/test/Verifier/SelfReferential.ll
index c24c0ebba3d1..7f0166a158d5 100644
--- a/test/Verifier/SelfReferential.ll
+++ b/test/Verifier/SelfReferential.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Only PHI nodes may reference their own value"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: Only PHI nodes may reference their own value
 
 ; Test that self referential instructions are not allowed
 
diff --git a/test/Verifier/aliasing-chain.ll b/test/Verifier/aliasing-chain.ll
index a52e796b2b86..ae0b77fdc395 100644
--- a/test/Verifier/aliasing-chain.ll
+++ b/test/Verifier/aliasing-chain.ll
@@ -1,5 +1,5 @@
-; RUN:  not llvm-as %s -o /dev/null 2>&1 | grep "Aliasing chain should end with function or global variable"
-
+; RUN:  not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: Aliasing chain should end with function or global variable
 ; Test that alising chain does not create a cycle
 
 @b1 = alias i32* @c1
diff --git a/test/lit.cfg b/test/lit.cfg
index 8ee2078b696e..ea91f45754d6 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -22,9 +22,18 @@ if sys.platform in ['win32']:
                                      config.environment['PATH']))
         config.environment['PATH'] = path
 
+# Choose between lit's internal shell pipeline runner and a real shell.  If
+# LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override.
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if use_lit_shell:
+    # 0 is external, "" is default, and everything else is internal.
+    execute_external = (use_lit_shell == "0")
+else:
+    # Otherwise we default to internal on Windows and external elsewhere, as
+    # bash on Windows is usually very slow.
+    execute_external = (not sys.platform in ['win32'])
+
 # testFormat: The test format to use to interpret tests.
-execute_external = (not sys.platform in ['win32']
-                    or lit.getBashPath() not in [None, ""])
 config.test_format = lit.formats.ShTest(execute_external)
 
 # To ignore test output on stderr so it doesn't trigger failures uncomment this:
@@ -47,26 +56,13 @@ llvm_obj_root = getattr(config, 'llvm_obj_root', None)
 if llvm_obj_root is not None:
     config.test_exec_root = os.path.join(llvm_obj_root, 'test')
 
-# Tweak the PATH to include the scripts dir, the tools dir, and the llvm-gcc bin
-# dir (if available).
+# Tweak the PATH to include the tools dir.
 if llvm_obj_root is not None:
-    llvm_src_root = getattr(config, 'llvm_src_root', None)
-    if not llvm_src_root:
-        lit.fatal('No LLVM source root set!')
-    path = os.path.pathsep.join((os.path.join(llvm_src_root, 'test',
-                                              'Scripts'),
-                                 config.environment['PATH']))
-    config.environment['PATH'] = path
-
     llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
     if not llvm_tools_dir:
         lit.fatal('No LLVM tools dir set!')
     path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
     config.environment['PATH'] = path
-    # Setup paths to llvm-symbolizer for Sanitizer tools.
-    llvm_symbolizer_path = os.path.join(llvm_tools_dir, 'llvm-symbolizer')
-    config.environment['ASAN_SYMBOLIZER_PATH'] = llvm_symbolizer_path
-    config.environment['MSAN_SYMBOLIZER_PATH'] = llvm_symbolizer_path
 
 # Propagate 'HOME' through the environment.
 if 'HOME' in os.environ:
@@ -94,6 +90,11 @@ config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '')
 config.environment['PYTHON_EXECUTABLE'] = getattr(config, 'python_executable',
                                                   '')
 
+# Propagate path to symbolizer for ASan/MSan.
+for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']:
+    if symbolizer in os.environ:
+        config.environment[symbolizer] = os.environ[symbolizer]
+
 ###
 
 import os
@@ -239,7 +240,7 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
 ### Features
 
 # Shell execution
-if sys.platform not in ['win32'] or lit.getBashPath() != '':
+if execute_external:
     config.available_features.add('shell')
 
 # Loadable module
@@ -256,6 +257,17 @@ if loadable_module:
 if config.lto_is_enabled == "1" and platform.system() == "Darwin":
     config.available_features.add('lto_on_osx')
 
+# Sanitizers.
+if config.llvm_use_sanitizer == "Address":
+    config.available_features.add("asan")
+if (config.llvm_use_sanitizer == "Memory" or
+        config.llvm_use_sanitizer == "MemoryWithOrigins"):
+    config.available_features.add("msan")
+
+# Direct object generation
+if not 'hexagon' in config.target_triple:
+    config.available_features.add("object-emission")
+
 # llc knows whether he is compiled with -DNDEBUG.
 import subprocess
 try:
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index bfd901a0797b..8024b24fcde7 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -18,6 +18,7 @@ config.llvm_bindings = "@LLVM_BINDINGS@"
 config.host_os = "@HOST_OS@"
 config.host_arch = "@HOST_ARCH@"
 config.llvm_use_intel_jitevents = "@LLVM_USE_INTEL_JITEVENTS@"
+config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386 b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386
new file mode 100644
index 000000000000..15e43ef54fef
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386 differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64 b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64
new file mode 100644
index 000000000000..cd63173b0e3a
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64 differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64
new file mode 100644
index 000000000000..d39e60c8eec3
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64 differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm
new file mode 100644
index 000000000000..908507d20ab9
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386
new file mode 100644
index 000000000000..7860df6de763
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386 differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips
new file mode 100644
index 000000000000..e387942b450b
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el
new file mode 100644
index 000000000000..a9779645c312
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-ppc64 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-ppc64
new file mode 100644
index 000000000000..c46e4c04285b
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-ppc64 differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-x86_64 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-x86_64
new file mode 100644
index 000000000000..3ca9d8c6939f
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-x86_64 differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.macho-arm b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-arm
new file mode 100644
index 000000000000..992ae17ffb15
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-arm differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.macho-i386 b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-i386
new file mode 100644
index 000000000000..5305fe8663a2
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-i386 differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.macho-x86_64 b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-x86_64
new file mode 100644
index 000000000000..42b80dd9d2f0
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-x86_64 differ
diff --git a/test/tools/llvm-readobj/Inputs/relocs.py b/test/tools/llvm-readobj/Inputs/relocs.py
new file mode 100644
index 000000000000..232d080d7d78
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.py
@@ -0,0 +1,1086 @@
+#!/usr/bin/env python
+
+# Generates ELF, COFF and MachO object files for different architectures
+# containing all relocations:
+#
+# ELF:   i386, x86_64, ppc64, aarch64, arm, mips, mips64el
+# COFF:  i386, x86_64
+# MachO: i386, x86_64, arm
+# (see end of file for triples)
+#
+# To simplify generation, object files are generated with just the proper
+# number of relocations through repeated instructions. Afterwards, the
+# relocations in the object file are patched to their proper value.
+
+import operator
+import shutil
+import StringIO
+import struct
+import subprocess
+import sys
+
+class EnumType(type):
+  def __init__(self, name, bases = (), attributes = {}):
+    super(EnumType, self).__init__(name, bases, attributes)
+
+    type.__setattr__(self, '_map', {})
+    type.__setattr__(self, '_nameMap', {})
+
+    for symbol in attributes:
+      if symbol.startswith('__') or symbol.endswith('__'):
+        continue
+
+      value = attributes[symbol]
+
+      # MyEnum.symbol == value
+      type.__setattr__(self, symbol, value)
+      self._nameMap[symbol] = value
+
+      # The first symbol with the given value is authoritative.
+      if not (value in self._map):
+        # MyEnum[value] == symbol
+        self._map[value] = symbol
+
+  # Not supported (Enums are immutable).
+  def __setattr__(self, name, value):
+    raise NotSupportedException, self.__setattr__
+
+  # Not supported (Enums are immutable).
+  def __delattr__(self, name):
+    raise NotSupportedException, self.__delattr__
+
+  # Gets the enum symbol for the specified value.
+  def __getitem__(self, value):
+    symbol = self._map.get(value)
+    if symbol is None:
+      raise KeyError, value
+    return symbol
+
+  # Gets the enum symbol for the specified value or none.
+  def lookup(self, value):
+    symbol = self._map.get(value)
+    return symbol
+
+  # Not supported (Enums are immutable).
+  def __setitem__(self, value, symbol):
+    raise NotSupportedException, self.__setitem__
+
+  # Not supported (Enums are immutable).
+  def __delitem__(self, value):
+    raise NotSupportedException, self.__delitem__
+
+  def entries(self):
+    # sort by (value, name)
+    def makeKey(item):
+      return (item[1], item[0])
+    e = []
+    for pair in sorted(self._nameMap.iteritems(), key=makeKey):
+      e.append(pair)
+    return e
+
+  def __iter__(self):
+    for e in self.entries():
+      yield e
+
+Enum = EnumType('Enum', (), {})
+
+class BinaryReader:
+  def __init__(self, path):
+    self.file = open(path, "r+b", 0)
+    self.isLSB = None
+    self.is64Bit = None
+    self.isN64 = False
+
+  def tell(self):
+    return self.file.tell()
+
+  def seek(self, pos):
+    self.file.seek(pos)
+
+  def read(self, N):
+    data = self.file.read(N)
+    if len(data) != N:
+      raise ValueError, "Out of data!"
+    return data
+
+  def int8(self):
+    return ord(self.read(1))
+
+  def uint8(self):
+    return ord(self.read(1))
+
+  def int16(self):
+    return struct.unpack('><'[self.isLSB] + 'h', self.read(2))[0]
+
+  def uint16(self):
+    return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
+
+  def int32(self):
+    return struct.unpack('><'[self.isLSB] + 'i', self.read(4))[0]
+
+  def uint32(self):
+    return struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
+
+  def int64(self):
+    return struct.unpack('><'[self.isLSB] + 'q', self.read(8))[0]
+
+  def uint64(self):
+    return struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
+
+  def writeUInt8(self, value):
+    self.file.write(struct.pack('><'[self.isLSB] + 'B', value))
+
+  def writeUInt16(self, value):
+    self.file.write(struct.pack('><'[self.isLSB] + 'H', value))
+
+  def writeUInt32(self, value):
+    self.file.write(struct.pack('><'[self.isLSB] + 'I', value))
+
+  def writeUInt64(self, value):
+    self.file.write(struct.pack('><'[self.isLSB] + 'Q', value))
+
+  def word(self):
+    if self.is64Bit:
+      return self.uint64()
+    else:
+      return self.uint32()
+
+  def writeWord(self, value):
+    if self.is64Bit:
+      self.writeUInt64(value)
+    else:
+      self.writeUInt32(value)
+
+class StringTable:
+  def __init__(self, strings):
+    self.string_table = strings
+
+  def __getitem__(self, index):
+    end = self.string_table.index('\x00', index)
+    return self.string_table[index:end]
+
+class ElfSection:
+  def __init__(self, f):
+    self.sh_name = f.uint32()
+    self.sh_type = f.uint32()
+    self.sh_flags = f.word()
+    self.sh_addr = f.word()
+    self.sh_offset = f.word()
+    self.sh_size = f.word()
+    self.sh_link = f.uint32()
+    self.sh_info = f.uint32()
+    self.sh_addralign = f.word()
+    self.sh_entsize = f.word()
+
+  def patch(self, f, relocs):
+    if self.sh_type == 4 or self.sh_type == 9: # SHT_RELA / SHT_REL
+      self.patchRelocs(f, relocs)
+
+  def patchRelocs(self, f, relocs):
+    entries = self.sh_size // self.sh_entsize
+
+    for index in range(entries):
+      f.seek(self.sh_offset + index * self.sh_entsize)
+      r_offset = f.word()
+
+      if index < len(relocs):
+        ri = index
+      else:
+        ri = 0
+
+      if f.isN64:
+        r_sym =   f.uint32()
+        r_ssym =  f.uint8()
+        f.seek(f.tell())
+        f.writeUInt8(relocs[ri][1])
+        f.writeUInt8(relocs[ri][1])
+        f.writeUInt8(relocs[ri][1])
+      else:
+        pos = f.tell()
+        r_info = f.word()
+
+        r_type = relocs[ri][1]
+        if f.is64Bit:
+          r_info = (r_info & 0xFFFFFFFF00000000) | (r_type & 0xFFFFFFFF)
+        else:
+          r_info = (r_info & 0xFF00) | (r_type & 0xFF)
+
+        print("    %s" % relocs[ri][0])
+        f.seek(pos)
+        f.writeWord(r_info)
+
+
+class CoffSection:
+  def __init__(self, f):
+    self.raw_name                = f.read(8)
+    self.virtual_size            = f.uint32()
+    self.virtual_address         = f.uint32()
+    self.raw_data_size           = f.uint32()
+    self.pointer_to_raw_data     = f.uint32()
+    self.pointer_to_relocations  = f.uint32()
+    self.pointer_to_line_numbers = f.uint32()
+    self.relocation_count        = f.uint16()
+    self.line_number_count       = f.uint16()
+    self.characteristics         = f.uint32()
+
+
+def compileAsm(filename, triple, src):
+  cmd = ["llvm-mc", "-triple=" + triple, "-filetype=obj", "-o", filename]
+  print("  Running: " + " ".join(cmd))
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+  p.communicate(input=src)
+  p.wait()
+
+def compileIR(filename, triple, src):
+  cmd = ["llc", "-mtriple=" + triple, "-filetype=obj", "-o", filename]
+  print("  Running: " + " ".join(cmd))
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+  p.communicate(input=src)
+  p.wait()
+
+
+def craftElf(filename, triple, relocs, dummyReloc):
+  print("Crafting " + filename + " for " + triple)
+  if type(dummyReloc) is tuple:
+    preSrc, dummyReloc, relocsPerDummy = dummyReloc
+    src = preSrc + "\n"
+    for i in range((len(relocs) + relocsPerDummy - 1) / relocsPerDummy):
+      src += dummyReloc.format(i) + "\n"
+    compileIR(filename, triple, src)
+  else:
+    src = (dummyReloc + "\n") * len(relocs)
+    compileAsm(filename, triple, src)
+
+  print("  Patching relocations...")
+  patchElf(filename, relocs)
+
+def patchElf(path, relocs):
+  f = BinaryReader(path)
+
+  magic = f.read(4)
+  assert magic == '\x7FELF'
+
+  fileclass = f.uint8()
+  if fileclass == 1:
+    f.is64Bit = False
+  elif fileclass == 2:
+    f.is64Bit = True
+  else:
+    raise ValueError, "Unknown file class %x" % fileclass
+
+  byteordering = f.uint8()
+  if byteordering == 1:
+      f.isLSB = True
+  elif byteordering == 2:
+      f.isLSB = False
+  else:
+      raise ValueError, "Unknown byte ordering %x" % byteordering
+
+  f.seek(18)
+  e_machine = f.uint16()
+  if e_machine == 0x0008 and f.is64Bit: # EM_MIPS && 64 bit
+      f.isN64 = True
+
+  e_version = f.uint32()
+  e_entry = f.word()
+  e_phoff = f.word()
+  e_shoff = f.word()
+  e_flags = f.uint32()
+  e_ehsize = f.uint16()
+  e_phentsize = f.uint16()
+  e_phnum = f.uint16()
+  e_shentsize = f.uint16()
+  e_shnum = f.uint16()
+  e_shstrndx = f.uint16()
+
+  sections = []
+  for index in range(e_shnum):
+    f.seek(e_shoff + index * e_shentsize)
+    s = ElfSection(f)
+    sections.append(s)
+
+  f.seek(sections[e_shstrndx].sh_offset)
+  shstrtab = StringTable(f.read(sections[e_shstrndx].sh_size))
+
+  strtab = None
+  for section in sections:
+    if shstrtab[section.sh_name] == ".strtab":
+      f.seek(section.sh_offset)
+      strtab = StringTable(f.read(section.sh_size))
+      break
+
+  for index in range(e_shnum):
+    sections[index].patch(f, relocs)
+
+
+def craftCoff(filename, triple, relocs, dummyReloc):
+  print("Crafting " + filename + " for " + triple)
+  src = (dummyReloc + "\n") * len(relocs)
+  compileAsm(filename, triple, src)
+
+  print("  Patching relocations...")
+  patchCoff(filename, relocs)
+
+def patchCoff(path, relocs):
+  f = BinaryReader(path)
+  f.isLSB = True
+
+  machine_type            = f.uint16()
+  section_count           = f.uint16()
+  f.seek(20)
+  sections = [CoffSection(f) for idx in range(section_count)]
+
+  section = sections[0]
+  f.seek(section.pointer_to_relocations)
+  for i in range(section.relocation_count):
+    virtual_addr = f.uint32()
+    symtab_idx   = f.uint32()
+    print("    %s" % relocs[i][0])
+    f.writeUInt16(relocs[i][1])
+
+
+def craftMacho(filename, triple, relocs, dummyReloc):
+  print("Crafting " + filename + " for " + triple)
+
+  if type(dummyReloc) is tuple:
+    srcType, preSrc, dummyReloc, relocsPerDummy = dummyReloc
+    src = preSrc + "\n"
+    for i in range((len(relocs) + relocsPerDummy - 1) / relocsPerDummy):
+      src += dummyReloc.format(i) + "\n"
+    if srcType == "asm":
+      compileAsm(filename, triple, src)
+    elif srcType == "ir":
+      compileIR(filename, triple, src)
+  else:
+    src = (dummyReloc + "\n") * len(relocs)
+    compileAsm(filename, triple, src)
+
+  print("  Patching relocations...")
+  patchMacho(filename, relocs)
+
+def patchMacho(filename, relocs):
+  f = BinaryReader(filename)
+
+  magic = f.read(4)
+  if magic == '\xFE\xED\xFA\xCE':
+    f.isLSB, f.is64Bit = False, False
+  elif magic == '\xCE\xFA\xED\xFE':
+    f.isLSB, f.is64Bit = True, False
+  elif magic == '\xFE\xED\xFA\xCF':
+    f.isLSB, f.is64Bit = False, True
+  elif magic == '\xCF\xFA\xED\xFE':
+    f.isLSB, f.is64Bit = True, True
+  else:
+    raise ValueError,"Not a Mach-O object file: %r (bad magic)" % path
+
+  cputype = f.uint32()
+  cpusubtype = f.uint32()
+  filetype = f.uint32()
+  numLoadCommands = f.uint32()
+  loadCommandsSize = f.uint32()
+  flag = f.uint32()
+  if f.is64Bit:
+    reserved = f.uint32()
+
+  start = f.tell()
+
+  for i in range(numLoadCommands):
+    patchMachoLoadCommand(f, relocs)
+
+  if f.tell() - start != loadCommandsSize:
+    raise ValueError,"%s: warning: invalid load commands size: %r" % (
+      sys.argv[0], loadCommandsSize)
+
+def patchMachoLoadCommand(f, relocs):
+  start = f.tell()
+  cmd = f.uint32()
+  cmdSize = f.uint32()
+
+  if cmd == 1:
+    patchMachoSegmentLoadCommand(f, relocs)
+  elif cmd == 25:
+    patchMachoSegmentLoadCommand(f, relocs)
+  else:
+    f.read(cmdSize - 8)
+
+  if f.tell() - start != cmdSize:
+    raise ValueError,"%s: warning: invalid load command size: %r" % (
+      sys.argv[0], cmdSize)
+
+def patchMachoSegmentLoadCommand(f, relocs):
+  segment_name = f.read(16)
+  vm_addr = f.word()
+  vm_size = f.word()
+  file_offset = f.word()
+  file_size = f.word()
+  maxprot = f.uint32()
+  initprot = f.uint32()
+  numSections = f.uint32()
+  flags = f.uint32()
+  for i in range(numSections):
+    patchMachoSection(f, relocs)
+
+def patchMachoSection(f, relocs):
+  section_name = f.read(16)
+  segment_name = f.read(16)
+  address = f.word()
+  size = f.word()
+  offset = f.uint32()
+  alignment = f.uint32()
+  relocOffset = f.uint32()
+  numReloc = f.uint32()
+  flags = f.uint32()
+  reserved1 = f.uint32()
+  reserved2 = f.uint32()
+  if f.is64Bit:
+    reserved3 = f.uint32()
+
+  prev_pos = f.tell()
+
+  f.seek(relocOffset)
+  for i in range(numReloc):
+    ri = i < len(relocs) and i or 0
+    print("    %s" % relocs[ri][0])
+    word1 = f.uint32()
+    pos = f.tell()
+    value = f.uint32()
+    f.seek(pos)
+    value = (value & 0x0FFFFFFF) | ((relocs[ri][1] & 0xF) << 28)
+    f.writeUInt32(value)
+  f.seek(prev_pos)
+
+
+class Relocs_Elf_X86_64(Enum):
+  R_X86_64_NONE       = 0
+  R_X86_64_64         = 1
+  R_X86_64_PC32       = 2
+  R_X86_64_GOT32      = 3
+  R_X86_64_PLT32      = 4
+  R_X86_64_COPY       = 5
+  R_X86_64_GLOB_DAT   = 6
+  R_X86_64_JUMP_SLOT  = 7
+  R_X86_64_RELATIVE   = 8
+  R_X86_64_GOTPCREL   = 9
+  R_X86_64_32         = 10
+  R_X86_64_32S        = 11
+  R_X86_64_16         = 12
+  R_X86_64_PC16       = 13
+  R_X86_64_8          = 14
+  R_X86_64_PC8        = 15
+  R_X86_64_DTPMOD64   = 16
+  R_X86_64_DTPOFF64   = 17
+  R_X86_64_TPOFF64    = 18
+  R_X86_64_TLSGD      = 19
+  R_X86_64_TLSLD      = 20
+  R_X86_64_DTPOFF32   = 21
+  R_X86_64_GOTTPOFF   = 22
+  R_X86_64_TPOFF32    = 23
+  R_X86_64_PC64       = 24
+  R_X86_64_GOTOFF64   = 25
+  R_X86_64_GOTPC32    = 26
+  R_X86_64_GOT64      = 27
+  R_X86_64_GOTPCREL64 = 28
+  R_X86_64_GOTPC64    = 29
+  R_X86_64_GOTPLT64   = 30
+  R_X86_64_PLTOFF64   = 31
+  R_X86_64_SIZE32     = 32
+  R_X86_64_SIZE64     = 33
+  R_X86_64_GOTPC32_TLSDESC = 34
+  R_X86_64_TLSDESC_CALL    = 35
+  R_X86_64_TLSDESC    = 36
+  R_X86_64_IRELATIVE  = 37
+
+class Relocs_Elf_i386(Enum):
+  R_386_NONE          = 0
+  R_386_32            = 1
+  R_386_PC32          = 2
+  R_386_GOT32         = 3
+  R_386_PLT32         = 4
+  R_386_COPY          = 5
+  R_386_GLOB_DAT      = 6
+  R_386_JUMP_SLOT     = 7
+  R_386_RELATIVE      = 8
+  R_386_GOTOFF        = 9
+  R_386_GOTPC         = 10
+  R_386_32PLT         = 11
+  R_386_TLS_TPOFF     = 14
+  R_386_TLS_IE        = 15
+  R_386_TLS_GOTIE     = 16
+  R_386_TLS_LE        = 17
+  R_386_TLS_GD        = 18
+  R_386_TLS_LDM       = 19
+  R_386_16            = 20
+  R_386_PC16          = 21
+  R_386_8             = 22
+  R_386_PC8           = 23
+  R_386_TLS_GD_32     = 24
+  R_386_TLS_GD_PUSH   = 25
+  R_386_TLS_GD_CALL   = 26
+  R_386_TLS_GD_POP    = 27
+  R_386_TLS_LDM_32    = 28
+  R_386_TLS_LDM_PUSH  = 29
+  R_386_TLS_LDM_CALL  = 30
+  R_386_TLS_LDM_POP   = 31
+  R_386_TLS_LDO_32    = 32
+  R_386_TLS_IE_32     = 33
+  R_386_TLS_LE_32     = 34
+  R_386_TLS_DTPMOD32  = 35
+  R_386_TLS_DTPOFF32  = 36
+  R_386_TLS_TPOFF32   = 37
+  R_386_TLS_GOTDESC   = 39
+  R_386_TLS_DESC_CALL = 40
+  R_386_TLS_DESC      = 41
+  R_386_IRELATIVE     = 42
+  R_386_NUM           = 43
+
+class Relocs_Elf_MBlaze(Enum):
+  R_MICROBLAZE_NONE           = 0
+  R_MICROBLAZE_32             = 1
+  R_MICROBLAZE_32_PCREL       = 2
+  R_MICROBLAZE_64_PCREL       = 3
+  R_MICROBLAZE_32_PCREL_LO    = 4
+  R_MICROBLAZE_64             = 5
+  R_MICROBLAZE_32_LO          = 6
+  R_MICROBLAZE_SRO32          = 7
+  R_MICROBLAZE_SRW32          = 8
+  R_MICROBLAZE_64_NONE        = 9
+  R_MICROBLAZE_32_SYM_OP_SYM  = 10
+  R_MICROBLAZE_GNU_VTINHERIT  = 11
+  R_MICROBLAZE_GNU_VTENTRY    = 12
+  R_MICROBLAZE_GOTPC_64       = 13
+  R_MICROBLAZE_GOT_64         = 14
+  R_MICROBLAZE_PLT_64         = 15
+  R_MICROBLAZE_REL            = 16
+  R_MICROBLAZE_JUMP_SLOT      = 17
+  R_MICROBLAZE_GLOB_DAT       = 18
+  R_MICROBLAZE_GOTOFF_64      = 19
+  R_MICROBLAZE_GOTOFF_32      = 20
+  R_MICROBLAZE_COPY           = 21
+
+class Relocs_Elf_PPC32(Enum):
+  R_PPC_NONE                  = 0
+  R_PPC_ADDR32                = 1
+  R_PPC_ADDR24                = 2
+  R_PPC_ADDR16                = 3
+  R_PPC_ADDR16_LO             = 4
+  R_PPC_ADDR16_HI             = 5
+  R_PPC_ADDR16_HA             = 6
+  R_PPC_ADDR14                = 7
+  R_PPC_ADDR14_BRTAKEN        = 8
+  R_PPC_ADDR14_BRNTAKEN       = 9
+  R_PPC_REL24                 = 10
+  R_PPC_REL14                 = 11
+  R_PPC_REL14_BRTAKEN         = 12
+  R_PPC_REL14_BRNTAKEN        = 13
+  R_PPC_REL32                 = 26
+  R_PPC_TPREL16_LO            = 70
+  R_PPC_TPREL16_HA            = 72
+
+class Relocs_Elf_PPC64(Enum):
+  R_PPC64_NONE                = 0
+  R_PPC64_ADDR32              = 1
+  R_PPC64_ADDR16_LO           = 4
+  R_PPC64_ADDR16_HI           = 5
+  R_PPC64_ADDR14              = 7
+  R_PPC64_REL24               = 10
+  R_PPC64_REL32               = 26
+  R_PPC64_ADDR64              = 38
+  R_PPC64_ADDR16_HIGHER       = 39
+  R_PPC64_ADDR16_HIGHEST      = 41
+  R_PPC64_REL64               = 44
+  R_PPC64_TOC16               = 47
+  R_PPC64_TOC16_LO            = 48
+  R_PPC64_TOC16_HA            = 50
+  R_PPC64_TOC                 = 51
+  R_PPC64_ADDR16_DS           = 56
+  R_PPC64_ADDR16_LO_DS        = 57
+  R_PPC64_TOC16_DS            = 63
+  R_PPC64_TOC16_LO_DS         = 64
+  R_PPC64_TLS                 = 67
+  R_PPC64_TPREL16_LO          = 70
+  R_PPC64_TPREL16_HA          = 72
+  R_PPC64_DTPREL16_LO         = 75
+  R_PPC64_DTPREL16_HA         = 77
+  R_PPC64_GOT_TLSGD16_LO      = 80
+  R_PPC64_GOT_TLSGD16_HA      = 82
+  R_PPC64_GOT_TLSLD16_LO      = 84
+  R_PPC64_GOT_TLSLD16_HA      = 86
+  R_PPC64_GOT_TPREL16_LO_DS   = 88
+  R_PPC64_GOT_TPREL16_HA      = 90
+  R_PPC64_TLSGD               = 107
+  R_PPC64_TLSLD               = 108
+
+class Relocs_Elf_AArch64(Enum):
+  R_AARCH64_NONE                        = 0x100
+  R_AARCH64_ABS64                       = 0x101
+  R_AARCH64_ABS32                       = 0x102
+  R_AARCH64_ABS16                       = 0x103
+  R_AARCH64_PREL64                      = 0x104
+  R_AARCH64_PREL32                      = 0x105
+  R_AARCH64_PREL16                      = 0x106
+  R_AARCH64_MOVW_UABS_G0                = 0x107
+  R_AARCH64_MOVW_UABS_G0_NC             = 0x108
+  R_AARCH64_MOVW_UABS_G1                = 0x109
+  R_AARCH64_MOVW_UABS_G1_NC             = 0x10a
+  R_AARCH64_MOVW_UABS_G2                = 0x10b
+  R_AARCH64_MOVW_UABS_G2_NC             = 0x10c
+  R_AARCH64_MOVW_UABS_G3                = 0x10d
+  R_AARCH64_MOVW_SABS_G0                = 0x10e
+  R_AARCH64_MOVW_SABS_G1                = 0x10f
+  R_AARCH64_MOVW_SABS_G2                = 0x110
+  R_AARCH64_LD_PREL_LO19                = 0x111
+  R_AARCH64_ADR_PREL_LO21               = 0x112
+  R_AARCH64_ADR_PREL_PG_HI21            = 0x113
+  R_AARCH64_ADD_ABS_LO12_NC             = 0x115
+  R_AARCH64_LDST8_ABS_LO12_NC           = 0x116
+  R_AARCH64_TSTBR14                     = 0x117
+  R_AARCH64_CONDBR19                    = 0x118
+  R_AARCH64_JUMP26                      = 0x11a
+  R_AARCH64_CALL26                      = 0x11b
+  R_AARCH64_LDST16_ABS_LO12_NC          = 0x11c
+  R_AARCH64_LDST32_ABS_LO12_NC          = 0x11d
+  R_AARCH64_LDST64_ABS_LO12_NC          = 0x11e
+  R_AARCH64_LDST128_ABS_LO12_NC         = 0x12b
+  R_AARCH64_ADR_GOT_PAGE                = 0x137
+  R_AARCH64_LD64_GOT_LO12_NC            = 0x138
+  R_AARCH64_TLSLD_MOVW_DTPREL_G2        = 0x20b
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1        = 0x20c
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC     = 0x20d
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0        = 0x20e
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC     = 0x20f
+  R_AARCH64_TLSLD_ADD_DTPREL_HI12       = 0x210
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12       = 0x211
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC    = 0x212
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12     = 0x213
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC  = 0x214
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12    = 0x215
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC = 0x216
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12    = 0x217
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC = 0x218
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12    = 0x219
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC = 0x21a
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G1      = 0x21b
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC   = 0x21c
+  R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21   = 0x21d
+  R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 0x21e
+  R_AARCH64_TLSIE_LD_GOTTPREL_PREL19    = 0x21f
+  R_AARCH64_TLSLE_MOVW_TPREL_G2         = 0x220
+  R_AARCH64_TLSLE_MOVW_TPREL_G1         = 0x221
+  R_AARCH64_TLSLE_MOVW_TPREL_G1_NC      = 0x222
+  R_AARCH64_TLSLE_MOVW_TPREL_G0         = 0x223
+  R_AARCH64_TLSLE_MOVW_TPREL_G0_NC      = 0x224
+  R_AARCH64_TLSLE_ADD_TPREL_HI12        = 0x225
+  R_AARCH64_TLSLE_ADD_TPREL_LO12        = 0x226
+  R_AARCH64_TLSLE_ADD_TPREL_LO12_NC     = 0x227
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12      = 0x228
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC   = 0x229
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12     = 0x22a
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC  = 0x22b
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12     = 0x22c
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC  = 0x22d
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12     = 0x22e
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC  = 0x22f
+  R_AARCH64_TLSDESC_ADR_PAGE            = 0x232
+  R_AARCH64_TLSDESC_LD64_LO12_NC        = 0x233
+  R_AARCH64_TLSDESC_ADD_LO12_NC         = 0x234
+  R_AARCH64_TLSDESC_CALL                = 0x239
+
+class Relocs_Elf_ARM(Enum):
+  R_ARM_NONE                  = 0x00
+  R_ARM_PC24                  = 0x01
+  R_ARM_ABS32                 = 0x02
+  R_ARM_REL32                 = 0x03
+  R_ARM_LDR_PC_G0             = 0x04
+  R_ARM_ABS16                 = 0x05
+  R_ARM_ABS12                 = 0x06
+  R_ARM_THM_ABS5              = 0x07
+  R_ARM_ABS8                  = 0x08
+  R_ARM_SBREL32               = 0x09
+  R_ARM_THM_CALL              = 0x0a
+  R_ARM_THM_PC8               = 0x0b
+  R_ARM_BREL_ADJ              = 0x0c
+  R_ARM_TLS_DESC              = 0x0d
+  R_ARM_THM_SWI8              = 0x0e
+  R_ARM_XPC25                 = 0x0f
+  R_ARM_THM_XPC22             = 0x10
+  R_ARM_TLS_DTPMOD32          = 0x11
+  R_ARM_TLS_DTPOFF32          = 0x12
+  R_ARM_TLS_TPOFF32           = 0x13
+  R_ARM_COPY                  = 0x14
+  R_ARM_GLOB_DAT              = 0x15
+  R_ARM_JUMP_SLOT             = 0x16
+  R_ARM_RELATIVE              = 0x17
+  R_ARM_GOTOFF32              = 0x18
+  R_ARM_BASE_PREL             = 0x19
+  R_ARM_GOT_BREL              = 0x1a
+  R_ARM_PLT32                 = 0x1b
+  R_ARM_CALL                  = 0x1c
+  R_ARM_JUMP24                = 0x1d
+  R_ARM_THM_JUMP24            = 0x1e
+  R_ARM_BASE_ABS              = 0x1f
+  R_ARM_ALU_PCREL_7_0         = 0x20
+  R_ARM_ALU_PCREL_15_8        = 0x21
+  R_ARM_ALU_PCREL_23_15       = 0x22
+  R_ARM_LDR_SBREL_11_0_NC     = 0x23
+  R_ARM_ALU_SBREL_19_12_NC    = 0x24
+  R_ARM_ALU_SBREL_27_20_CK    = 0x25
+  R_ARM_TARGET1               = 0x26
+  R_ARM_SBREL31               = 0x27
+  R_ARM_V4BX                  = 0x28
+  R_ARM_TARGET2               = 0x29
+  R_ARM_PREL31                = 0x2a
+  R_ARM_MOVW_ABS_NC           = 0x2b
+  R_ARM_MOVT_ABS              = 0x2c
+  R_ARM_MOVW_PREL_NC          = 0x2d
+  R_ARM_MOVT_PREL             = 0x2e
+  R_ARM_THM_MOVW_ABS_NC       = 0x2f
+  R_ARM_THM_MOVT_ABS          = 0x30
+  R_ARM_THM_MOVW_PREL_NC      = 0x31
+  R_ARM_THM_MOVT_PREL         = 0x32
+  R_ARM_THM_JUMP19            = 0x33
+  R_ARM_THM_JUMP6             = 0x34
+  R_ARM_THM_ALU_PREL_11_0     = 0x35
+  R_ARM_THM_PC12              = 0x36
+  R_ARM_ABS32_NOI             = 0x37
+  R_ARM_REL32_NOI             = 0x38
+  R_ARM_ALU_PC_G0_NC          = 0x39
+  R_ARM_ALU_PC_G0             = 0x3a
+  R_ARM_ALU_PC_G1_NC          = 0x3b
+  R_ARM_ALU_PC_G1             = 0x3c
+  R_ARM_ALU_PC_G2             = 0x3d
+  R_ARM_LDR_PC_G1             = 0x3e
+  R_ARM_LDR_PC_G2             = 0x3f
+  R_ARM_LDRS_PC_G0            = 0x40
+  R_ARM_LDRS_PC_G1            = 0x41
+  R_ARM_LDRS_PC_G2            = 0x42
+  R_ARM_LDC_PC_G0             = 0x43
+  R_ARM_LDC_PC_G1             = 0x44
+  R_ARM_LDC_PC_G2             = 0x45
+  R_ARM_ALU_SB_G0_NC          = 0x46
+  R_ARM_ALU_SB_G0             = 0x47
+  R_ARM_ALU_SB_G1_NC          = 0x48
+  R_ARM_ALU_SB_G1             = 0x49
+  R_ARM_ALU_SB_G2             = 0x4a
+  R_ARM_LDR_SB_G0             = 0x4b
+  R_ARM_LDR_SB_G1             = 0x4c
+  R_ARM_LDR_SB_G2             = 0x4d
+  R_ARM_LDRS_SB_G0            = 0x4e
+  R_ARM_LDRS_SB_G1            = 0x4f
+  R_ARM_LDRS_SB_G2            = 0x50
+  R_ARM_LDC_SB_G0             = 0x51
+  R_ARM_LDC_SB_G1             = 0x52
+  R_ARM_LDC_SB_G2             = 0x53
+  R_ARM_MOVW_BREL_NC          = 0x54
+  R_ARM_MOVT_BREL             = 0x55
+  R_ARM_MOVW_BREL             = 0x56
+  R_ARM_THM_MOVW_BREL_NC      = 0x57
+  R_ARM_THM_MOVT_BREL         = 0x58
+  R_ARM_THM_MOVW_BREL         = 0x59
+  R_ARM_TLS_GOTDESC           = 0x5a
+  R_ARM_TLS_CALL              = 0x5b
+  R_ARM_TLS_DESCSEQ           = 0x5c
+  R_ARM_THM_TLS_CALL          = 0x5d
+  R_ARM_PLT32_ABS             = 0x5e
+  R_ARM_GOT_ABS               = 0x5f
+  R_ARM_GOT_PREL              = 0x60
+  R_ARM_GOT_BREL12            = 0x61
+  R_ARM_GOTOFF12              = 0x62
+  R_ARM_GOTRELAX              = 0x63
+  R_ARM_GNU_VTENTRY           = 0x64
+  R_ARM_GNU_VTINHERIT         = 0x65
+  R_ARM_THM_JUMP11            = 0x66
+  R_ARM_THM_JUMP8             = 0x67
+  R_ARM_TLS_GD32              = 0x68
+  R_ARM_TLS_LDM32             = 0x69
+  R_ARM_TLS_LDO32             = 0x6a
+  R_ARM_TLS_IE32              = 0x6b
+  R_ARM_TLS_LE32              = 0x6c
+  R_ARM_TLS_LDO12             = 0x6d
+  R_ARM_TLS_LE12              = 0x6e
+  R_ARM_TLS_IE12GP            = 0x6f
+  R_ARM_PRIVATE_0             = 0x70
+  R_ARM_PRIVATE_1             = 0x71
+  R_ARM_PRIVATE_2             = 0x72
+  R_ARM_PRIVATE_3             = 0x73
+  R_ARM_PRIVATE_4             = 0x74
+  R_ARM_PRIVATE_5             = 0x75
+  R_ARM_PRIVATE_6             = 0x76
+  R_ARM_PRIVATE_7             = 0x77
+  R_ARM_PRIVATE_8             = 0x78
+  R_ARM_PRIVATE_9             = 0x79
+  R_ARM_PRIVATE_10            = 0x7a
+  R_ARM_PRIVATE_11            = 0x7b
+  R_ARM_PRIVATE_12            = 0x7c
+  R_ARM_PRIVATE_13            = 0x7d
+  R_ARM_PRIVATE_14            = 0x7e
+  R_ARM_PRIVATE_15            = 0x7f
+  R_ARM_ME_TOO                = 0x80
+  R_ARM_THM_TLS_DESCSEQ16     = 0x81
+  R_ARM_THM_TLS_DESCSEQ32     = 0x82
+
+class Relocs_Elf_Mips(Enum):
+  R_MIPS_NONE              =  0
+  R_MIPS_16                =  1
+  R_MIPS_32                =  2
+  R_MIPS_REL32             =  3
+  R_MIPS_26                =  4
+  R_MIPS_HI16              =  5
+  R_MIPS_LO16              =  6
+  R_MIPS_GPREL16           =  7
+  R_MIPS_LITERAL           =  8
+  R_MIPS_GOT16             =  9
+  R_MIPS_PC16              = 10
+  R_MIPS_CALL16            = 11
+  R_MIPS_GPREL32           = 12
+  R_MIPS_SHIFT5            = 16
+  R_MIPS_SHIFT6            = 17
+  R_MIPS_64                = 18
+  R_MIPS_GOT_DISP          = 19
+  R_MIPS_GOT_PAGE          = 20
+  R_MIPS_GOT_OFST          = 21
+  R_MIPS_GOT_HI16          = 22
+  R_MIPS_GOT_LO16          = 23
+  R_MIPS_SUB               = 24
+  R_MIPS_INSERT_A          = 25
+  R_MIPS_INSERT_B          = 26
+  R_MIPS_DELETE            = 27
+  R_MIPS_HIGHER            = 28
+  R_MIPS_HIGHEST           = 29
+  R_MIPS_CALL_HI16         = 30
+  R_MIPS_CALL_LO16         = 31
+  R_MIPS_SCN_DISP          = 32
+  R_MIPS_REL16             = 33
+  R_MIPS_ADD_IMMEDIATE     = 34
+  R_MIPS_PJUMP             = 35
+  R_MIPS_RELGOT            = 36
+  R_MIPS_JALR              = 37
+  R_MIPS_TLS_DTPMOD32      = 38
+  R_MIPS_TLS_DTPREL32      = 39
+  R_MIPS_TLS_DTPMOD64      = 40
+  R_MIPS_TLS_DTPREL64      = 41
+  R_MIPS_TLS_GD            = 42
+  R_MIPS_TLS_LDM           = 43
+  R_MIPS_TLS_DTPREL_HI16   = 44
+  R_MIPS_TLS_DTPREL_LO16   = 45
+  R_MIPS_TLS_GOTTPREL      = 46
+  R_MIPS_TLS_TPREL32       = 47
+  R_MIPS_TLS_TPREL64       = 48
+  R_MIPS_TLS_TPREL_HI16    = 49
+  R_MIPS_TLS_TPREL_LO16    = 50
+  R_MIPS_GLOB_DAT          = 51
+  R_MIPS_COPY              = 126
+  R_MIPS_JUMP_SLOT         = 127
+  R_MIPS_NUM               = 218
+
+class Relocs_Elf_Hexagon(Enum):
+  R_HEX_NONE              =  0
+  R_HEX_B22_PCREL         =  1
+  R_HEX_B15_PCREL         =  2
+  R_HEX_B7_PCREL          =  3
+  R_HEX_LO16              =  4
+  R_HEX_HI16              =  5
+  R_HEX_32                =  6
+  R_HEX_16                =  7
+  R_HEX_8                 =  8
+  R_HEX_GPREL16_0         =  9
+  R_HEX_GPREL16_1         =  10
+  R_HEX_GPREL16_2         =  11
+  R_HEX_GPREL16_3         =  12
+  R_HEX_HL16              =  13
+  R_HEX_B13_PCREL         =  14
+  R_HEX_B9_PCREL          =  15
+  R_HEX_B32_PCREL_X       =  16
+  R_HEX_32_6_X            =  17
+  R_HEX_B22_PCREL_X       =  18
+  R_HEX_B15_PCREL_X       =  19
+  R_HEX_B13_PCREL_X       =  20
+  R_HEX_B9_PCREL_X        =  21
+  R_HEX_B7_PCREL_X        =  22
+  R_HEX_16_X              =  23
+  R_HEX_12_X              =  24
+  R_HEX_11_X              =  25
+  R_HEX_10_X              =  26
+  R_HEX_9_X               =  27
+  R_HEX_8_X               =  28
+  R_HEX_7_X               =  29
+  R_HEX_6_X               =  30
+  R_HEX_32_PCREL          =  31
+  R_HEX_COPY              =  32
+  R_HEX_GLOB_DAT          =  33
+  R_HEX_JMP_SLOT          =  34
+  R_HEX_RELATIVE          =  35
+  R_HEX_PLT_B22_PCREL     =  36
+  R_HEX_GOTREL_LO16       =  37
+  R_HEX_GOTREL_HI16       =  38
+  R_HEX_GOTREL_32         =  39
+  R_HEX_GOT_LO16          =  40
+  R_HEX_GOT_HI16          =  41
+  R_HEX_GOT_32            =  42
+  R_HEX_GOT_16            =  43
+  R_HEX_DTPMOD_32         =  44
+  R_HEX_DTPREL_LO16       =  45
+  R_HEX_DTPREL_HI16       =  46
+  R_HEX_DTPREL_32         =  47
+  R_HEX_DTPREL_16         =  48
+  R_HEX_GD_PLT_B22_PCREL  =  49
+  R_HEX_GD_GOT_LO16       =  50
+  R_HEX_GD_GOT_HI16       =  51
+  R_HEX_GD_GOT_32         =  52
+  R_HEX_GD_GOT_16         =  53
+  R_HEX_IE_LO16           =  54
+  R_HEX_IE_HI16           =  55
+  R_HEX_IE_32             =  56
+  R_HEX_IE_GOT_LO16       =  57
+  R_HEX_IE_GOT_HI16       =  58
+  R_HEX_IE_GOT_32         =  59
+  R_HEX_IE_GOT_16         =  60
+  R_HEX_TPREL_LO16        =  61
+  R_HEX_TPREL_HI16        =  62
+  R_HEX_TPREL_32          =  63
+  R_HEX_TPREL_16          =  64
+  R_HEX_6_PCREL_X         =  65
+  R_HEX_GOTREL_32_6_X     =  66
+  R_HEX_GOTREL_16_X       =  67
+  R_HEX_GOTREL_11_X       =  68
+  R_HEX_GOT_32_6_X        =  69
+  R_HEX_GOT_16_X          =  70
+  R_HEX_GOT_11_X          =  71
+  R_HEX_DTPREL_32_6_X     =  72
+  R_HEX_DTPREL_16_X       =  73
+  R_HEX_DTPREL_11_X       =  74
+  R_HEX_GD_GOT_32_6_X     =  75
+  R_HEX_GD_GOT_16_X       =  76
+  R_HEX_GD_GOT_11_X       =  77
+  R_HEX_IE_32_6_X         =  78
+  R_HEX_IE_16_X           =  79
+  R_HEX_IE_GOT_32_6_X     =  80
+  R_HEX_IE_GOT_16_X       =  81
+  R_HEX_IE_GOT_11_X       =  82
+  R_HEX_TPREL_32_6_X      =  83
+  R_HEX_TPREL_16_X        =  84
+  R_HEX_TPREL_11_X        =  85
+
+
+class Relocs_Coff_i386(Enum):
+  IMAGE_REL_I386_ABSOLUTE = 0x0000
+  IMAGE_REL_I386_DIR16    = 0x0001
+  IMAGE_REL_I386_REL16    = 0x0002
+  IMAGE_REL_I386_DIR32    = 0x0006
+  IMAGE_REL_I386_DIR32NB  = 0x0007
+  IMAGE_REL_I386_SEG12    = 0x0009
+  IMAGE_REL_I386_SECTION  = 0x000A
+  IMAGE_REL_I386_SECREL   = 0x000B
+  IMAGE_REL_I386_TOKEN    = 0x000C
+  IMAGE_REL_I386_SECREL7  = 0x000D
+  IMAGE_REL_I386_REL32    = 0x0014
+
+class Relocs_Coff_X86_64(Enum):
+  IMAGE_REL_AMD64_ABSOLUTE  = 0x0000
+  IMAGE_REL_AMD64_ADDR64    = 0x0001
+  IMAGE_REL_AMD64_ADDR32    = 0x0002
+  IMAGE_REL_AMD64_ADDR32NB  = 0x0003
+  IMAGE_REL_AMD64_REL32     = 0x0004
+  IMAGE_REL_AMD64_REL32_1   = 0x0005
+  IMAGE_REL_AMD64_REL32_2   = 0x0006
+  IMAGE_REL_AMD64_REL32_3   = 0x0007
+  IMAGE_REL_AMD64_REL32_4   = 0x0008
+  IMAGE_REL_AMD64_REL32_5   = 0x0009
+  IMAGE_REL_AMD64_SECTION   = 0x000A
+  IMAGE_REL_AMD64_SECREL    = 0x000B
+  IMAGE_REL_AMD64_SECREL7   = 0x000C
+  IMAGE_REL_AMD64_TOKEN     = 0x000D
+  IMAGE_REL_AMD64_SREL32    = 0x000E
+  IMAGE_REL_AMD64_PAIR      = 0x000F
+  IMAGE_REL_AMD64_SSPAN32   = 0x0010
+
+class Relocs_Coff_ARM(Enum):
+  IMAGE_REL_ARM_ABSOLUTE  = 0x0000
+  IMAGE_REL_ARM_ADDR32    = 0x0001
+  IMAGE_REL_ARM_ADDR32NB  = 0x0002
+  IMAGE_REL_ARM_BRANCH24  = 0x0003
+  IMAGE_REL_ARM_BRANCH11  = 0x0004
+  IMAGE_REL_ARM_TOKEN     = 0x0005
+  IMAGE_REL_ARM_BLX24     = 0x0008
+  IMAGE_REL_ARM_BLX11     = 0x0009
+  IMAGE_REL_ARM_SECTION   = 0x000E
+  IMAGE_REL_ARM_SECREL    = 0x000F
+  IMAGE_REL_ARM_MOV32A    = 0x0010
+  IMAGE_REL_ARM_MOV32T    = 0x0011
+  IMAGE_REL_ARM_BRANCH20T = 0x0012
+  IMAGE_REL_ARM_BRANCH24T = 0x0014
+  IMAGE_REL_ARM_BLX23T    = 0x0015
+
+
+class Relocs_Macho_i386(Enum):
+  RIT_Vanilla                     = 0
+  RIT_Pair                        = 1
+  RIT_Difference                  = 2
+  RIT_Generic_PreboundLazyPointer = 3
+  RIT_Generic_LocalDifference     = 4
+  RIT_Generic_TLV                 = 5
+
+class Relocs_Macho_X86_64(Enum):
+  RIT_X86_64_Unsigned   = 0
+  RIT_X86_64_Signed     = 1
+  RIT_X86_64_Branch     = 2
+  RIT_X86_64_GOTLoad    = 3
+  RIT_X86_64_GOT        = 4
+  RIT_X86_64_Subtractor = 5
+  RIT_X86_64_Signed1    = 6
+  RIT_X86_64_Signed2    = 7
+  RIT_X86_64_Signed4    = 8
+  RIT_X86_64_TLV        = 9
+
+class Relocs_Macho_ARM(Enum):
+  RIT_Vanilla                     = 0
+  RIT_Pair                        = 1
+  RIT_Difference                  = 2
+  RIT_ARM_LocalDifference         = 3
+  RIT_ARM_PreboundLazyPointer     = 4
+  RIT_ARM_Branch24Bit             = 5
+  RIT_ARM_ThumbBranch22Bit        = 6
+  RIT_ARM_ThumbBranch32Bit        = 7
+  RIT_ARM_Half                    = 8
+  RIT_ARM_HalfDifference          = 9
+
+class Relocs_Macho_PPC(Enum):
+  PPC_RELOC_VANILLA        = 0
+  PPC_RELOC_PAIR           = 1
+  PPC_RELOC_BR14           = 2
+  PPC_RELOC_BR24           = 3
+  PPC_RELOC_HI16           = 4
+  PPC_RELOC_LO16           = 5
+  PPC_RELOC_HA16           = 6
+  PPC_RELOC_LO14           = 7
+  PPC_RELOC_SECTDIFF       = 8
+  PPC_RELOC_PB_LA_PTR      = 9
+  PPC_RELOC_HI16_SECTDIFF  = 10
+  PPC_RELOC_LO16_SECTDIFF  = 11
+  PPC_RELOC_HA16_SECTDIFF  = 12
+  PPC_RELOC_JBSR           = 13
+  PPC_RELOC_LO14_SECTDIFF  = 14
+  PPC_RELOC_LOCAL_SECTDIFF = 15
+
+
+craftElf("relocs.obj.elf-x86_64",   "x86_64-pc-linux-gnu",         Relocs_Elf_X86_64.entries(), "leaq sym@GOTTPOFF(%rip), %rax")
+craftElf("relocs.obj.elf-i386",     "i386-pc-linux-gnu",           Relocs_Elf_i386.entries(),   "mov sym@GOTOFF(%ebx), %eax")
+#craftElf("relocs-elf-ppc32",   "powerpc-unknown-linux-gnu",   Relocs_Elf_PPC32.entries(), ...)
+craftElf("relocs.obj.elf-ppc64",   "powerpc64-unknown-linux-gnu", Relocs_Elf_PPC64.entries(),
+         ("@t = thread_local global i32 0, align 4", "define i32* @f{0}() nounwind {{ ret i32* @t }}", 2))
+craftElf("relocs.obj.elf-aarch64",  "aarch64",                     Relocs_Elf_AArch64.entries(), "movz x0, #:abs_g0:sym")
+craftElf("relocs.obj.elf-arm",      "arm-unknown-unknown",         Relocs_Elf_ARM.entries(), "b sym")
+craftElf("relocs.obj.elf-mips",     "mips-unknown-linux",          Relocs_Elf_Mips.entries(), "lui $2, %hi(sym)")
+craftElf("relocs.obj.elf-mips64el", "mips64el-unknown-linux",        Relocs_Elf_Mips.entries(), "lui $2, %hi(sym)")
+#craftElf("relocs.obj.elf-mblaze",   "mblaze-unknown-unknown",      Relocs_Elf_MBlaze.entries(), ...)
+#craftElf("relocs.obj.elf-hexagon",  "hexagon-unknown-unknown",     Relocs_Elf_Hexagon.entries(), ...)
+
+craftCoff("relocs.obj.coff-i386",   "i386-pc-win32",   Relocs_Coff_i386.entries(),   "mov foo@imgrel(%ebx, %ecx, 4), %eax")
+craftCoff("relocs.obj.coff-x86_64", "x86_64-pc-win32", Relocs_Coff_X86_64.entries(), "mov foo@imgrel(%ebx, %ecx, 4), %eax")
+#craftCoff("relocs.obj.coff-arm",    "arm-pc-win32",    Relocs_Coff_ARM.entries(), "...")
+
+craftMacho("relocs.obj.macho-i386",   "i386-apple-darwin9", Relocs_Macho_i386.entries(),
+          ("asm", ".subsections_via_symbols; .text; a: ; b:", "call a", 1))
+craftMacho("relocs.obj.macho-x86_64", "x86_64-apple-darwin9", Relocs_Macho_X86_64.entries(),
+          ("asm", ".subsections_via_symbols; .text; a: ; b:", "call a", 1))
+craftMacho("relocs.obj.macho-arm",    "armv7-apple-darwin10", Relocs_Macho_ARM.entries(), "bl sym")
+#craftMacho("relocs.obj.macho-ppc",   "powerpc-apple-darwin10", Relocs_Macho_PPC.entries(), ...)
diff --git a/test/tools/llvm-readobj/Inputs/trivial.ll b/test/tools/llvm-readobj/Inputs/trivial.ll
new file mode 100644
index 000000000000..2cd7ec89e246
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.ll
@@ -0,0 +1,19 @@
+; llc -mtriple=i386-pc-win32 trivial.ll -filetype=obj -o trivial-object-test.coff-i386
+; llc -mtriple=x86_64-pc-win32 trivial.ll -filetype=obj -o trivial-object-test.coff-x86-64
+; llc -mtriple=i386-linux-gnu trivial.ll -filetype=obj -o trivial-object-test.elf-i386 -relocation-model=pic
+; llc -mtriple=x86_64-linux-gnu trivial.ll -filetype=obj -o trivial-object-test.elf-x86-64 -relocation-model=pic
+; llc -mtriple=i386-apple-darwin10 trivial.ll -filetype=obj -o trivial-object-test.macho-i386 -relocation-model=pic
+; llc -mtriple=x86_64-apple-darwin10 trivial.ll -filetype=obj -o trivial-object-test.macho-x86-64 -relocation-model=pic
+
+@.str = private unnamed_addr constant [13 x i8] c"Hello World\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
+  tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare void @SomeOtherFunction(...)
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.coff-i386 b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-i386
new file mode 100644
index 000000000000..282e5699a767
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-i386 differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.coff-x86-64 b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-x86-64
new file mode 100644
index 000000000000..8a7060e61076
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-x86-64 differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.elf-i386 b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-i386
new file mode 100644
index 000000000000..f85e40d6261f
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-i386 differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.elf-x86-64 b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-x86-64
new file mode 100644
index 000000000000..95285c1f230c
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-x86-64 differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-arm b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-arm
new file mode 100644
index 000000000000..117df9efc1c4
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-arm differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-i386 b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-i386
new file mode 100644
index 000000000000..5048171ccb0c
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-i386 differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc
new file mode 100644
index 000000000000..dd2e956499e4
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc64 b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc64
new file mode 100644
index 000000000000..20ec8ef23f0f
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc64 differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-x86-64 b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-x86-64
new file mode 100644
index 000000000000..bcdfc8aa6967
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-x86-64 differ
diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test
new file mode 100644
index 000000000000..226eb9342334
--- /dev/null
+++ b/test/tools/llvm-readobj/file-headers.test
@@ -0,0 +1,100 @@
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF32
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-x86-64 \
+RUN:   | FileCheck %s -check-prefix COFF64
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF32
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-x86-64 \
+RUN:   | FileCheck %s -check-prefix ELF64
+
+COFF32:      File: {{(.*[/\\])?}}trivial.obj.coff-i386
+COFF32-NEXT: Format: COFF-i386
+COFF32-NEXT: Arch: i386
+COFF32-NEXT: AddressSize: 32bit
+COFF32-NEXT: ImageFileHeader {
+COFF32-NEXT:   Machine: IMAGE_FILE_MACHINE_I386 (0x14C)
+COFF32-NEXT:   SectionCount: 2
+COFF32-NEXT:   TimeDateStamp: 2013-03-20 17:56:46 (0x5149F85E)
+COFF32-NEXT:   PointerToSymbolTable: 0xA5
+COFF32-NEXT:   SymbolCount: 7
+COFF32-NEXT:   OptionalHeaderSize: 0
+COFF32-NEXT:   Characteristics [ (0x0)
+COFF32-NEXT:   ]
+COFF32-NEXT: }
+
+COFF64:      File: {{(.*[/\\])?}}trivial.obj.coff-x86-64
+COFF64-NEXT: Format: COFF-x86-64
+COFF64-NEXT: Arch: x86_64
+COFF64-NEXT: AddressSize: 64bit
+COFF64-NEXT: ImageFileHeader {
+COFF64-NEXT:   Machine: IMAGE_FILE_MACHINE_AMD64 (0x8664)
+COFF64-NEXT:   SectionCount: 2
+COFF64-NEXT:   TimeDateStamp: 2013-03-20 17:56:46 (0x5149F85E)
+COFF64-NEXT:   PointerToSymbolTable: 0xAB
+COFF64-NEXT:   SymbolCount: 7
+COFF64-NEXT:   OptionalHeaderSize: 0
+COFF64-NEXT:   Characteristics [ (0x0)
+COFF64-NEXT:   ]
+COFF64-NEXT: }
+
+ELF32:      File: {{(.*[/\\])?}}trivial.obj.elf-i386
+ELF32-NEXT: Format: ELF32-i386
+ELF32-NEXT: Arch: i386
+ELF32-NEXT: AddressSize: 32bit
+ELF32-NEXT: LoadName:
+ELF32-NEXT: ElfHeader {
+ELF32-NEXT:   Ident {
+ELF32-NEXT:     Magic: (7F 45 4C 46)
+ELF32-NEXT:     Class: 32-bit (0x1)
+ELF32-NEXT:     DataEncoding: LittleEndian (0x1)
+ELF32-NEXT:     FileVersion: 1
+ELF32-NEXT:     OS/ABI: GNU/Linux (0x3)
+ELF32-NEXT:     ABIVersion: 0
+ELF32-NEXT:     Unused: (00 00 00 00 00 00 00)
+ELF32-NEXT:   }
+ELF32-NEXT:   Type: Relocatable (0x1)
+ELF32-NEXT:   Machine: EM_386 (0x3)
+ELF32-NEXT:   Version: 1
+ELF32-NEXT:   Entry: 0x0
+ELF32-NEXT:   ProgramHeaderOffset: 0x0
+ELF32-NEXT:   SectionHeaderOffset: 0xC8
+ELF32-NEXT:   Flags [ (0x0)
+ELF32-NEXT:   ]
+ELF32-NEXT:   HeaderSize: 52
+ELF32-NEXT:   ProgramHeaderEntrySize: 0
+ELF32-NEXT:   ProgramHeaderCount: 0
+ELF32-NEXT:   SectionHeaderEntrySize: 40
+ELF32-NEXT:   SectionHeaderCount: 10
+ELF32-NEXT:   StringTableSectionIndex: 7
+ELF32-NEXT: }
+
+ELF64:      File: {{(.*[/\\])?}}trivial.obj.elf-x86-64
+ELF64-NEXT: Format: ELF64-x86-64
+ELF64-NEXT: Arch: x86_64
+ELF64-NEXT: AddressSize: 64bit
+ELF64-NEXT: LoadName:
+ELF64-NEXT: ElfHeader {
+ELF64-NEXT:   Ident {
+ELF64-NEXT:     Magic: (7F 45 4C 46)
+ELF64-NEXT:     Class: 64-bit (0x2)
+ELF64-NEXT:     DataEncoding: LittleEndian (0x1)
+ELF64-NEXT:     FileVersion: 1
+ELF64-NEXT:     OS/ABI: GNU/Linux (0x3)
+ELF64-NEXT:     ABIVersion: 0
+ELF64-NEXT:     Unused: (00 00 00 00 00 00 00)
+ELF64-NEXT:   }
+ELF64-NEXT:   Type: Relocatable (0x1)
+ELF64-NEXT:   Machine: EM_X86_64 (0x3E)
+ELF64-NEXT:   Version: 1
+ELF64-NEXT:   Entry: 0x0
+ELF64-NEXT:   ProgramHeaderOffset: 0x0
+ELF64-NEXT:   SectionHeaderOffset: 0xB8
+ELF64-NEXT:   Flags [ (0x0)
+ELF64-NEXT:   ]
+ELF64-NEXT:   HeaderSize: 64
+ELF64-NEXT:   ProgramHeaderEntrySize: 0
+ELF64-NEXT:   ProgramHeaderCount: 0
+ELF64-NEXT:   SectionHeaderEntrySize: 64
+ELF64-NEXT:   SectionHeaderCount: 10
+ELF64-NEXT:   StringTableSectionIndex: 7
+ELF64-NEXT: }
diff --git a/test/tools/llvm-readobj/lit.local.cfg b/test/tools/llvm-readobj/lit.local.cfg
new file mode 100644
index 000000000000..df9b335dd131
--- /dev/null
+++ b/test/tools/llvm-readobj/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.test']
diff --git a/test/tools/llvm-readobj/program-headers.test b/test/tools/llvm-readobj/program-headers.test
new file mode 100644
index 000000000000..2a574bb2e646
--- /dev/null
+++ b/test/tools/llvm-readobj/program-headers.test
@@ -0,0 +1,74 @@
+RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-i386 \
+RUN:     | FileCheck %s -check-prefix ELF-I386
+RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-x86-64 \
+RUN:     | FileCheck %s -check-prefix ELF-X86-64
+
+ELF-I386:      ProgramHeaders [
+ELF-I386-NEXT:   ProgramHeader {
+ELF-I386-NEXT:     Type: PT_LOAD (0x1)
+ELF-I386-NEXT:     Offset: 0x0
+ELF-I386-NEXT:     VirtualAddress: 0x8048000
+ELF-I386-NEXT:     PhysicalAddress: 0x8048000
+ELF-I386-NEXT:     FileSize: 308
+ELF-I386-NEXT:     MemSize: 308
+ELF-I386-NEXT:     Flags [ (0x5)
+ELF-I386-NEXT:       PF_R (0x4)
+ELF-I386-NEXT:       PF_X (0x1)
+ELF-I386-NEXT:     ]
+ELF-I386-NEXT:     Alignment: 4096
+ELF-I386-NEXT:   }
+ELF-I386-NEXT:   ProgramHeader {
+ELF-I386-NEXT:     Type: PT_GNU_STACK (0x6474E551)
+ELF-I386-NEXT:     Offset: 0x0
+ELF-I386-NEXT:     VirtualAddress: 0x0
+ELF-I386-NEXT:     PhysicalAddress: 0x0
+ELF-I386-NEXT:     FileSize: 0
+ELF-I386-NEXT:     MemSize: 0
+ELF-I386-NEXT:     Flags [ (0x6)
+ELF-I386-NEXT:       PF_R (0x4)
+ELF-I386-NEXT:       PF_W (0x2)
+ELF-I386-NEXT:     ]
+ELF-I386-NEXT:     Alignment: 4
+ELF-I386-NEXT:   }
+ELF-I386-NEXT: ]
+
+ELF-X86-64:      ProgramHeaders [
+ELF-X86-64-NEXT:   ProgramHeader {
+ELF-X86-64-NEXT:     Type: PT_LOAD (0x1)
+ELF-X86-64-NEXT:     Offset: 0x0
+ELF-X86-64-NEXT:     VirtualAddress: 0x400000
+ELF-X86-64-NEXT:     PhysicalAddress: 0x400000
+ELF-X86-64-NEXT:     FileSize: 312
+ELF-X86-64-NEXT:     MemSize: 312
+ELF-X86-64-NEXT:     Flags [ (0x5)
+ELF-X86-64-NEXT:       PF_R (0x4)
+ELF-X86-64-NEXT:       PF_X (0x1)
+ELF-X86-64-NEXT:     ]
+ELF-X86-64-NEXT:     Alignment: 2097152
+ELF-X86-64-NEXT:   }
+ELF-X86-64-NEXT:   ProgramHeader {
+ELF-X86-64-NEXT:     Type: PT_GNU_EH_FRAME (0x6474E550)
+ELF-X86-64-NEXT:     Offset: 0xF4
+ELF-X86-64-NEXT:     VirtualAddress: 0x4000F4
+ELF-X86-64-NEXT:     PhysicalAddress: 0x4000F4
+ELF-X86-64-NEXT:     FileSize: 20
+ELF-X86-64-NEXT:     MemSize: 20
+ELF-X86-64-NEXT:     Flags [ (0x4)
+ELF-X86-64-NEXT:       PF_R (0x4)
+ELF-X86-64-NEXT:     ]
+ELF-X86-64-NEXT:     Alignment: 4
+ELF-X86-64-NEXT:   }
+ELF-X86-64-NEXT:   ProgramHeader {
+ELF-X86-64-NEXT:     Type: PT_GNU_STACK (0x6474E551)
+ELF-X86-64-NEXT:     Offset: 0x0
+ELF-X86-64-NEXT:     VirtualAddress: 0x0
+ELF-X86-64-NEXT:     PhysicalAddress: 0x0
+ELF-X86-64-NEXT:     FileSize: 0
+ELF-X86-64-NEXT:     MemSize: 0
+ELF-X86-64-NEXT:     Flags [ (0x6)
+ELF-X86-64-NEXT:       PF_R (0x4)
+ELF-X86-64-NEXT:       PF_W (0x2)
+ELF-X86-64-NEXT:     ]
+ELF-X86-64-NEXT:     Alignment: 8
+ELF-X86-64-NEXT:   }
+ELF-X86-64-NEXT: ]
diff --git a/test/tools/llvm-readobj/reloc-types.test b/test/tools/llvm-readobj/reloc-types.test
new file mode 100644
index 000000000000..08603bc7fb55
--- /dev/null
+++ b/test/tools/llvm-readobj/reloc-types.test
@@ -0,0 +1,663 @@
+// Test that libObject and subsequently llvm-readobj shows proper relocation type
+// names and values.
+
+// Todo: ELF-PPC, ELF-HEXAGON
+
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-i386     | FileCheck %s -check-prefix ELF-32
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-x86_64   | FileCheck %s -check-prefix ELF-64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-aarch64  | FileCheck %s -check-prefix ELF-AARCH64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-arm      | FileCheck %s -check-prefix ELF-ARM
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-mips     | FileCheck %s -check-prefix ELF-MIPS
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-mips64el | FileCheck %s -check-prefix ELF-MIPS64EL
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-ppc64    | FileCheck %s -check-prefix ELF-PPC64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.coff-i386    | FileCheck %s -check-prefix COFF-32
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.coff-x86_64  | FileCheck %s -check-prefix COFF-64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.macho-arm    | FileCheck %s -check-prefix MACHO-ARM
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.macho-i386   | FileCheck %s -check-prefix MACHO-32
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.macho-x86_64 | FileCheck %s -check-prefix MACHO-64
+
+
+ELF-32: Type: R_386_NONE (0)
+ELF-32: Type: R_386_32 (1)
+ELF-32: Type: R_386_PC32 (2)
+ELF-32: Type: R_386_GOT32 (3)
+ELF-32: Type: R_386_PLT32 (4)
+ELF-32: Type: R_386_COPY (5)
+ELF-32: Type: R_386_GLOB_DAT (6)
+ELF-32: Type: R_386_JUMP_SLOT (7)
+ELF-32: Type: R_386_RELATIVE (8)
+ELF-32: Type: R_386_GOTOFF (9)
+ELF-32: Type: R_386_GOTPC (10)
+ELF-32: Type: R_386_32PLT (11)
+ELF-32: Type: R_386_TLS_TPOFF (14)
+ELF-32: Type: R_386_TLS_IE (15)
+ELF-32: Type: R_386_TLS_GOTIE (16)
+ELF-32: Type: R_386_TLS_LE (17)
+ELF-32: Type: R_386_TLS_GD (18)
+ELF-32: Type: R_386_TLS_LDM (19)
+ELF-32: Type: R_386_16 (20)
+ELF-32: Type: R_386_PC16 (21)
+ELF-32: Type: R_386_8 (22)
+ELF-32: Type: R_386_PC8 (23)
+ELF-32: Type: R_386_TLS_GD_32 (24)
+ELF-32: Type: R_386_TLS_GD_PUSH (25)
+ELF-32: Type: R_386_TLS_GD_CALL (26)
+ELF-32: Type: R_386_TLS_GD_POP (27)
+ELF-32: Type: R_386_TLS_LDM_32 (28)
+ELF-32: Type: R_386_TLS_LDM_PUSH (29)
+ELF-32: Type: R_386_TLS_LDM_CALL (30)
+ELF-32: Type: R_386_TLS_LDM_POP (31)
+ELF-32: Type: R_386_TLS_LDO_32 (32)
+ELF-32: Type: R_386_TLS_IE_32 (33)
+ELF-32: Type: R_386_TLS_LE_32 (34)
+ELF-32: Type: R_386_TLS_DTPMOD32 (35)
+ELF-32: Type: R_386_TLS_DTPOFF32 (36)
+ELF-32: Type: R_386_TLS_TPOFF32 (37)
+ELF-32: Type: R_386_TLS_GOTDESC (39)
+ELF-32: Type: R_386_TLS_DESC_CALL (40)
+ELF-32: Type: R_386_TLS_DESC (41)
+ELF-32: Type: R_386_IRELATIVE (42)
+_LF-32: Type: R_386_NUM (43)
+
+ELF-64: Type: R_X86_64_NONE (0)
+ELF-64: Type: R_X86_64_64 (1)
+ELF-64: Type: R_X86_64_PC32 (2)
+ELF-64: Type: R_X86_64_GOT32 (3)
+ELF-64: Type: R_X86_64_PLT32 (4)
+ELF-64: Type: R_X86_64_COPY (5)
+ELF-64: Type: R_X86_64_GLOB_DAT (6)
+ELF-64: Type: R_X86_64_JUMP_SLOT (7)
+ELF-64: Type: R_X86_64_RELATIVE (8)
+ELF-64: Type: R_X86_64_GOTPCREL (9)
+ELF-64: Type: R_X86_64_32 (10)
+ELF-64: Type: R_X86_64_32S (11)
+ELF-64: Type: R_X86_64_16 (12)
+ELF-64: Type: R_X86_64_PC16 (13)
+ELF-64: Type: R_X86_64_8 (14)
+ELF-64: Type: R_X86_64_PC8 (15)
+ELF-64: Type: R_X86_64_DTPMOD64 (16)
+ELF-64: Type: R_X86_64_DTPOFF64 (17)
+ELF-64: Type: R_X86_64_TPOFF64 (18)
+ELF-64: Type: R_X86_64_TLSGD (19)
+ELF-64: Type: R_X86_64_TLSLD (20)
+ELF-64: Type: R_X86_64_DTPOFF32 (21)
+ELF-64: Type: R_X86_64_GOTTPOFF (22)
+ELF-64: Type: R_X86_64_TPOFF32 (23)
+ELF-64: Type: R_X86_64_PC64 (24)
+ELF-64: Type: R_X86_64_GOTOFF64 (25)
+ELF-64: Type: R_X86_64_GOTPC32 (26)
+ELF-64: Type: R_X86_64_GOT64 (27)
+ELF-64: Type: R_X86_64_GOTPCREL64 (28)
+ELF-64: Type: R_X86_64_GOTPC64 (29)
+ELF-64: Type: R_X86_64_GOTPLT64 (30)
+ELF-64: Type: R_X86_64_PLTOFF64 (31)
+ELF-64: Type: R_X86_64_SIZE32 (32)
+ELF-64: Type: R_X86_64_SIZE64 (33)
+ELF-64: Type: R_X86_64_GOTPC32_TLSDESC (34)
+ELF-64: Type: R_X86_64_TLSDESC_CALL (35)
+ELF-64: Type: R_X86_64_TLSDESC (36)
+ELF-64: Type: R_X86_64_IRELATIVE (37)
+
+ELF-PPC: Type: R_PPC_NONE (0)
+ELF-PPC: Type: R_PPC_ADDR32 (1)
+ELF-PPC: Type: R_PPC_ADDR24 (2)
+ELF-PPC: Type: R_PPC_ADDR16 (3)
+ELF-PPC: Type: R_PPC_ADDR16_LO (4)
+ELF-PPC: Type: R_PPC_ADDR16_HI (5)
+ELF-PPC: Type: R_PPC_ADDR16_HA (6)
+ELF-PPC: Type: R_PPC_ADDR14 (7)
+ELF-PPC: Type: R_PPC_ADDR14_BRTAKEN (8)
+ELF-PPC: Type: R_PPC_ADDR14_BRNTAKEN (9)
+ELF-PPC: Type: R_PPC_REL24 (10)
+ELF-PPC: Type: R_PPC_REL14 (11)
+ELF-PPC: Type: R_PPC_REL14_BRTAKEN (12)
+ELF-PPC: Type: R_PPC_REL14_BRNTAKEN (13)
+ELF-PPC: Type: R_PPC_REL32 (26)
+ELF-PPC: Type: R_PPC_TPREL16_LO (70)
+ELF-PPC: Type: R_PPC_TPREL16_HA (72)
+
+ELF-PPC64: Type: R_PPC64_NONE (0)
+ELF-PPC64: Type: R_PPC64_ADDR32 (1)
+ELF-PPC64: Type: R_PPC64_ADDR16_LO (4)
+ELF-PPC64: Type: R_PPC64_ADDR16_HI (5)
+ELF-PPC64: Type: R_PPC64_ADDR14 (7)
+ELF-PPC64: Type: R_PPC64_REL24 (10)
+ELF-PPC64: Type: R_PPC64_REL32 (26)
+ELF-PPC64: Type: R_PPC64_ADDR64 (38)
+ELF-PPC64: Type: R_PPC64_ADDR16_HIGHER (39)
+ELF-PPC64: Type: R_PPC64_ADDR16_HIGHEST (41)
+ELF-PPC64: Type: R_PPC64_REL64 (44)
+ELF-PPC64: Type: R_PPC64_TOC16 (47)
+ELF-PPC64: Type: R_PPC64_TOC16_LO (48)
+ELF-PPC64: Type: R_PPC64_TOC16_HA (50)
+ELF-PPC64: Type: R_PPC64_TOC (51)
+ELF-PPC64: Type: R_PPC64_ADDR16_DS (56)
+ELF-PPC64: Type: R_PPC64_ADDR16_LO_DS (57)
+ELF-PPC64: Type: R_PPC64_TOC16_DS (63)
+ELF-PPC64: Type: R_PPC64_TOC16_LO_DS (64)
+ELF-PPC64: Type: R_PPC64_TLS (67)
+ELF-PPC64: Type: R_PPC64_TPREL16_LO (70)
+ELF-PPC64: Type: R_PPC64_TPREL16_HA (72)
+ELF-PPC64: Type: R_PPC64_DTPREL16_LO (75)
+ELF-PPC64: Type: R_PPC64_DTPREL16_HA (77)
+ELF-PPC64: Type: R_PPC64_GOT_TLSGD16_LO (80)
+ELF-PPC64: Type: R_PPC64_GOT_TLSGD16_HA (82)
+ELF-PPC64: Type: R_PPC64_GOT_TLSLD16_LO (84)
+ELF-PPC64: Type: R_PPC64_GOT_TLSLD16_HA (86)
+ELF-PPC64: Type: R_PPC64_GOT_TPREL16_LO_DS (88)
+ELF-PPC64: Type: R_PPC64_GOT_TPREL16_HA (90)
+ELF-PPC64: Type: R_PPC64_TLSGD (107)
+ELF-PPC64: Type: R_PPC64_TLSLD (108)
+
+ELF-AARCH64: Type: R_AARCH64_NONE (256)
+ELF-AARCH64: Type: R_AARCH64_ABS64 (257)
+ELF-AARCH64: Type: R_AARCH64_ABS32 (258)
+ELF-AARCH64: Type: R_AARCH64_ABS16 (259)
+ELF-AARCH64: Type: R_AARCH64_PREL64 (260)
+ELF-AARCH64: Type: R_AARCH64_PREL32 (261)
+ELF-AARCH64: Type: R_AARCH64_PREL16 (262)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G0 (263)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G0_NC (264)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G1 (265)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G1_NC (266)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G2 (267)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G2_NC (268)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G3 (269)
+ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G0 (270)
+ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G1 (271)
+ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G2 (272)
+ELF-AARCH64: Type: R_AARCH64_LD_PREL_LO19 (273)
+ELF-AARCH64: Type: R_AARCH64_ADR_PREL_LO21 (274)
+ELF-AARCH64: Type: R_AARCH64_ADR_PREL_PG_HI21 (275)
+ELF-AARCH64: Type: R_AARCH64_ADD_ABS_LO12_NC (277)
+ELF-AARCH64: Type: R_AARCH64_LDST8_ABS_LO12_NC (278)
+ELF-AARCH64: Type: R_AARCH64_TSTBR14 (279)
+ELF-AARCH64: Type: R_AARCH64_CONDBR19 (280)
+ELF-AARCH64: Type: R_AARCH64_JUMP26 (282)
+ELF-AARCH64: Type: R_AARCH64_CALL26 (283)
+ELF-AARCH64: Type: R_AARCH64_LDST16_ABS_LO12_NC (284)
+ELF-AARCH64: Type: R_AARCH64_LDST32_ABS_LO12_NC (285)
+ELF-AARCH64: Type: R_AARCH64_LDST64_ABS_LO12_NC (286)
+ELF-AARCH64: Type: R_AARCH64_LDST128_ABS_LO12_NC (299)
+ELF-AARCH64: Type: R_AARCH64_ADR_GOT_PAGE (311)
+ELF-AARCH64: Type: R_AARCH64_LD64_GOT_LO12_NC (312)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G2 (523)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1 (524)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC (525)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G0 (526)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC (527)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_HI12 (528)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_LO12 (529)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC (530)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 (531)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC (532)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 (533)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC (534)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST32_DTPREL_LO12 (535)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC (536)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST64_DTPREL_LO12 (537)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC (538)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 (539)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC (540)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 (541)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC (542)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 (543)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G2 (544)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G1 (545)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G1_NC (546)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G0 (547)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC (548)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_HI12 (549)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_LO12 (550)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC (551)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST8_TPREL_LO12 (552)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC (553)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST16_TPREL_LO12 (554)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC (555)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12 (556)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC (557)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12 (558)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC (559)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PAGE (562)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD64_LO12_NC (563)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADD_LO12_NC (564)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_CALL (569)
+
+ELF-ARM: Type: R_ARM_NONE (0)
+ELF-ARM: Type: R_ARM_PC24 (1)
+ELF-ARM: Type: R_ARM_ABS32 (2)
+ELF-ARM: Type: R_ARM_REL32 (3)
+ELF-ARM: Type: R_ARM_LDR_PC_G0 (4)
+ELF-ARM: Type: R_ARM_ABS16 (5)
+ELF-ARM: Type: R_ARM_ABS12 (6)
+ELF-ARM: Type: R_ARM_THM_ABS5 (7)
+ELF-ARM: Type: R_ARM_ABS8 (8)
+ELF-ARM: Type: R_ARM_SBREL32 (9)
+ELF-ARM: Type: R_ARM_THM_CALL (10)
+ELF-ARM: Type: R_ARM_THM_PC8 (11)
+ELF-ARM: Type: R_ARM_BREL_ADJ (12)
+ELF-ARM: Type: R_ARM_TLS_DESC (13)
+ELF-ARM: Type: R_ARM_THM_SWI8 (14)
+ELF-ARM: Type: R_ARM_XPC25 (15)
+ELF-ARM: Type: R_ARM_THM_XPC22 (16)
+ELF-ARM: Type: R_ARM_TLS_DTPMOD32 (17)
+ELF-ARM: Type: R_ARM_TLS_DTPOFF32 (18)
+ELF-ARM: Type: R_ARM_TLS_TPOFF32 (19)
+ELF-ARM: Type: R_ARM_COPY (20)
+ELF-ARM: Type: R_ARM_GLOB_DAT (21)
+ELF-ARM: Type: R_ARM_JUMP_SLOT (22)
+ELF-ARM: Type: R_ARM_RELATIVE (23)
+ELF-ARM: Type: R_ARM_GOTOFF32 (24)
+ELF-ARM: Type: R_ARM_BASE_PREL (25)
+ELF-ARM: Type: R_ARM_GOT_BREL (26)
+ELF-ARM: Type: R_ARM_PLT32 (27)
+ELF-ARM: Type: R_ARM_CALL (28)
+ELF-ARM: Type: R_ARM_JUMP24 (29)
+ELF-ARM: Type: R_ARM_THM_JUMP24 (30)
+ELF-ARM: Type: R_ARM_BASE_ABS (31)
+ELF-ARM: Type: R_ARM_ALU_PCREL_7_0 (32)
+ELF-ARM: Type: R_ARM_ALU_PCREL_15_8 (33)
+ELF-ARM: Type: R_ARM_ALU_PCREL_23_15 (34)
+ELF-ARM: Type: R_ARM_LDR_SBREL_11_0_NC (35)
+ELF-ARM: Type: R_ARM_ALU_SBREL_19_12_NC (36)
+ELF-ARM: Type: R_ARM_ALU_SBREL_27_20_CK (37)
+ELF-ARM: Type: R_ARM_TARGET1 (38)
+ELF-ARM: Type: R_ARM_SBREL31 (39)
+ELF-ARM: Type: R_ARM_V4BX (40)
+ELF-ARM: Type: R_ARM_TARGET2 (41)
+ELF-ARM: Type: R_ARM_PREL31 (42)
+ELF-ARM: Type: R_ARM_MOVW_ABS_NC (43)
+ELF-ARM: Type: R_ARM_MOVT_ABS (44)
+ELF-ARM: Type: R_ARM_MOVW_PREL_NC (45)
+ELF-ARM: Type: R_ARM_MOVT_PREL (46)
+ELF-ARM: Type: R_ARM_THM_MOVW_ABS_NC (47)
+ELF-ARM: Type: R_ARM_THM_MOVT_ABS (48)
+ELF-ARM: Type: R_ARM_THM_MOVW_PREL_NC (49)
+ELF-ARM: Type: R_ARM_THM_MOVT_PREL (50)
+ELF-ARM: Type: R_ARM_THM_JUMP19 (51)
+ELF-ARM: Type: R_ARM_THM_JUMP6 (52)
+ELF-ARM: Type: R_ARM_THM_ALU_PREL_11_0 (53)
+ELF-ARM: Type: R_ARM_THM_PC12 (54)
+ELF-ARM: Type: R_ARM_ABS32_NOI (55)
+ELF-ARM: Type: R_ARM_REL32_NOI (56)
+ELF-ARM: Type: R_ARM_ALU_PC_G0_NC (57)
+ELF-ARM: Type: R_ARM_ALU_PC_G0 (58)
+ELF-ARM: Type: R_ARM_ALU_PC_G1_NC (59)
+ELF-ARM: Type: R_ARM_ALU_PC_G1 (60)
+ELF-ARM: Type: R_ARM_ALU_PC_G2 (61)
+ELF-ARM: Type: R_ARM_LDR_PC_G1 (62)
+ELF-ARM: Type: R_ARM_LDR_PC_G2 (63)
+ELF-ARM: Type: R_ARM_LDRS_PC_G0 (64)
+ELF-ARM: Type: R_ARM_LDRS_PC_G1 (65)
+ELF-ARM: Type: R_ARM_LDRS_PC_G2 (66)
+ELF-ARM: Type: R_ARM_LDC_PC_G0 (67)
+ELF-ARM: Type: R_ARM_LDC_PC_G1 (68)
+ELF-ARM: Type: R_ARM_LDC_PC_G2 (69)
+ELF-ARM: Type: R_ARM_ALU_SB_G0_NC (70)
+ELF-ARM: Type: R_ARM_ALU_SB_G0 (71)
+ELF-ARM: Type: R_ARM_ALU_SB_G1_NC (72)
+ELF-ARM: Type: R_ARM_ALU_SB_G1 (73)
+ELF-ARM: Type: R_ARM_ALU_SB_G2 (74)
+ELF-ARM: Type: R_ARM_LDR_SB_G0 (75)
+ELF-ARM: Type: R_ARM_LDR_SB_G1 (76)
+ELF-ARM: Type: R_ARM_LDR_SB_G2 (77)
+ELF-ARM: Type: R_ARM_LDRS_SB_G0 (78)
+ELF-ARM: Type: R_ARM_LDRS_SB_G1 (79)
+ELF-ARM: Type: R_ARM_LDRS_SB_G2 (80)
+ELF-ARM: Type: R_ARM_LDC_SB_G0 (81)
+ELF-ARM: Type: R_ARM_LDC_SB_G1 (82)
+ELF-ARM: Type: R_ARM_LDC_SB_G2 (83)
+ELF-ARM: Type: R_ARM_MOVW_BREL_NC (84)
+ELF-ARM: Type: R_ARM_MOVT_BREL (85)
+ELF-ARM: Type: R_ARM_MOVW_BREL (86)
+ELF-ARM: Type: R_ARM_THM_MOVW_BREL_NC (87)
+ELF-ARM: Type: R_ARM_THM_MOVT_BREL (88)
+ELF-ARM: Type: R_ARM_THM_MOVW_BREL (89)
+ELF-ARM: Type: R_ARM_TLS_GOTDESC (90)
+ELF-ARM: Type: R_ARM_TLS_CALL (91)
+ELF-ARM: Type: R_ARM_TLS_DESCSEQ (92)
+ELF-ARM: Type: R_ARM_THM_TLS_CALL (93)
+ELF-ARM: Type: R_ARM_PLT32_ABS (94)
+ELF-ARM: Type: R_ARM_GOT_ABS (95)
+ELF-ARM: Type: R_ARM_GOT_PREL (96)
+ELF-ARM: Type: R_ARM_GOT_BREL12 (97)
+ELF-ARM: Type: R_ARM_GOTOFF12 (98)
+ELF-ARM: Type: R_ARM_GOTRELAX (99)
+ELF-ARM: Type: R_ARM_GNU_VTENTRY (100)
+ELF-ARM: Type: R_ARM_GNU_VTINHERIT (101)
+ELF-ARM: Type: R_ARM_THM_JUMP11 (102)
+ELF-ARM: Type: R_ARM_THM_JUMP8 (103)
+ELF-ARM: Type: R_ARM_TLS_GD32 (104)
+ELF-ARM: Type: R_ARM_TLS_LDM32 (105)
+ELF-ARM: Type: R_ARM_TLS_LDO32 (106)
+ELF-ARM: Type: R_ARM_TLS_IE32 (107)
+ELF-ARM: Type: R_ARM_TLS_LE32 (108)
+ELF-ARM: Type: R_ARM_TLS_LDO12 (109)
+ELF-ARM: Type: R_ARM_TLS_LE12 (110)
+ELF-ARM: Type: R_ARM_TLS_IE12GP (111)
+ELF-ARM: Type: R_ARM_PRIVATE_0 (112)
+ELF-ARM: Type: R_ARM_PRIVATE_1 (113)
+ELF-ARM: Type: R_ARM_PRIVATE_2 (114)
+ELF-ARM: Type: R_ARM_PRIVATE_3 (115)
+ELF-ARM: Type: R_ARM_PRIVATE_4 (116)
+ELF-ARM: Type: R_ARM_PRIVATE_5 (117)
+ELF-ARM: Type: R_ARM_PRIVATE_6 (118)
+ELF-ARM: Type: R_ARM_PRIVATE_7 (119)
+ELF-ARM: Type: R_ARM_PRIVATE_8 (120)
+ELF-ARM: Type: R_ARM_PRIVATE_9 (121)
+ELF-ARM: Type: R_ARM_PRIVATE_10 (122)
+ELF-ARM: Type: R_ARM_PRIVATE_11 (123)
+ELF-ARM: Type: R_ARM_PRIVATE_12 (124)
+ELF-ARM: Type: R_ARM_PRIVATE_13 (125)
+ELF-ARM: Type: R_ARM_PRIVATE_14 (126)
+ELF-ARM: Type: R_ARM_PRIVATE_15 (127)
+ELF-ARM: Type: R_ARM_ME_TOO (128)
+ELF-ARM: Type: R_ARM_THM_TLS_DESCSEQ16 (129)
+ELF-ARM: Type: R_ARM_THM_TLS_DESCSEQ32 (130)
+
+ELF-MIPS: Type: R_MIPS_NONE (0)
+ELF-MIPS: Type: R_MIPS_16 (1)
+ELF-MIPS: Type: R_MIPS_32 (2)
+ELF-MIPS: Type: R_MIPS_REL32 (3)
+ELF-MIPS: Type: R_MIPS_26 (4)
+ELF-MIPS: Type: R_MIPS_HI16 (5)
+ELF-MIPS: Type: R_MIPS_LO16 (6)
+ELF-MIPS: Type: R_MIPS_GPREL16 (7)
+ELF-MIPS: Type: R_MIPS_LITERAL (8)
+ELF-MIPS: Type: R_MIPS_GOT16 (9)
+ELF-MIPS: Type: R_MIPS_PC16 (10)
+ELF-MIPS: Type: R_MIPS_CALL16 (11)
+ELF-MIPS: Type: R_MIPS_GPREL32 (12)
+ELF-MIPS: Type: R_MIPS_SHIFT5 (16)
+ELF-MIPS: Type: R_MIPS_SHIFT6 (17)
+ELF-MIPS: Type: R_MIPS_64 (18)
+ELF-MIPS: Type: R_MIPS_GOT_DISP (19)
+ELF-MIPS: Type: R_MIPS_GOT_PAGE (20)
+ELF-MIPS: Type: R_MIPS_GOT_OFST (21)
+ELF-MIPS: Type: R_MIPS_GOT_HI16 (22)
+ELF-MIPS: Type: R_MIPS_GOT_LO16 (23)
+ELF-MIPS: Type: R_MIPS_SUB (24)
+ELF-MIPS: Type: R_MIPS_INSERT_A (25)
+ELF-MIPS: Type: R_MIPS_INSERT_B (26)
+ELF-MIPS: Type: R_MIPS_DELETE (27)
+ELF-MIPS: Type: R_MIPS_HIGHER (28)
+ELF-MIPS: Type: R_MIPS_HIGHEST (29)
+ELF-MIPS: Type: R_MIPS_CALL_HI16 (30)
+ELF-MIPS: Type: R_MIPS_CALL_LO16 (31)
+ELF-MIPS: Type: R_MIPS_SCN_DISP (32)
+ELF-MIPS: Type: R_MIPS_REL16 (33)
+ELF-MIPS: Type: R_MIPS_ADD_IMMEDIATE (34)
+ELF-MIPS: Type: R_MIPS_PJUMP (35)
+ELF-MIPS: Type: R_MIPS_RELGOT (36)
+ELF-MIPS: Type: R_MIPS_JALR (37)
+ELF-MIPS: Type: R_MIPS_TLS_DTPMOD32 (38)
+ELF-MIPS: Type: R_MIPS_TLS_DTPREL32 (39)
+ELF-MIPS: Type: R_MIPS_TLS_DTPMOD64 (40)
+ELF-MIPS: Type: R_MIPS_TLS_DTPREL64 (41)
+ELF-MIPS: Type: R_MIPS_TLS_GD (42)
+ELF-MIPS: Type: R_MIPS_TLS_LDM (43)
+ELF-MIPS: Type: R_MIPS_TLS_DTPREL_HI16 (44)
+ELF-MIPS: Type: R_MIPS_TLS_DTPREL_LO16 (45)
+ELF-MIPS: Type: R_MIPS_TLS_GOTTPREL (46)
+ELF-MIPS: Type: R_MIPS_TLS_TPREL32 (47)
+ELF-MIPS: Type: R_MIPS_TLS_TPREL64 (48)
+ELF-MIPS: Type: R_MIPS_TLS_TPREL_HI16 (49)
+ELF-MIPS: Type: R_MIPS_TLS_TPREL_LO16 (50)
+ELF-MIPS: Type: R_MIPS_GLOB_DAT (51)
+ELF-MIPS: Type: R_MIPS_COPY (126)
+ELF-MIPS: Type: R_MIPS_JUMP_SLOT (127)
+ELF-MIPS: Type: R_MIPS_NUM (218)
+ELF-MIPS64EL: Type: R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE (0)
+ELF-MIPS64EL: Type: R_MIPS_16/R_MIPS_16/R_MIPS_16 (65793)
+ELF-MIPS64EL: Type: R_MIPS_32/R_MIPS_32/R_MIPS_32 (131586)
+ELF-MIPS64EL: Type: R_MIPS_REL32/R_MIPS_REL32/R_MIPS_REL32 (197379)
+ELF-MIPS64EL: Type: R_MIPS_26/R_MIPS_26/R_MIPS_26 (263172)
+ELF-MIPS64EL: Type: R_MIPS_HI16/R_MIPS_HI16/R_MIPS_HI16 (328965)
+ELF-MIPS64EL: Type: R_MIPS_LO16/R_MIPS_LO16/R_MIPS_LO16 (394758)
+ELF-MIPS64EL: Type: R_MIPS_GPREL16/R_MIPS_GPREL16/R_MIPS_GPREL16 (460551)
+ELF-MIPS64EL: Type: R_MIPS_LITERAL/R_MIPS_LITERAL/R_MIPS_LITERAL (526344)
+ELF-MIPS64EL: Type: R_MIPS_GOT16/R_MIPS_GOT16/R_MIPS_GOT16 (592137)
+ELF-MIPS64EL: Type: R_MIPS_PC16/R_MIPS_PC16/R_MIPS_PC16 (657930)
+ELF-MIPS64EL: Type: R_MIPS_CALL16/R_MIPS_CALL16/R_MIPS_CALL16 (723723)
+ELF-MIPS64EL: Type: R_MIPS_GPREL32/R_MIPS_GPREL32/R_MIPS_GPREL32 (789516)
+ELF-MIPS64EL: Type: R_MIPS_SHIFT5/R_MIPS_SHIFT5/R_MIPS_SHIFT5 (1052688)
+ELF-MIPS64EL: Type: R_MIPS_SHIFT6/R_MIPS_SHIFT6/R_MIPS_SHIFT6 (1118481)
+ELF-MIPS64EL: Type: R_MIPS_64/R_MIPS_64/R_MIPS_64 (1184274)
+ELF-MIPS64EL: Type: R_MIPS_GOT_DISP/R_MIPS_GOT_DISP/R_MIPS_GOT_DISP (1250067)
+ELF-MIPS64EL: Type: R_MIPS_GOT_PAGE/R_MIPS_GOT_PAGE/R_MIPS_GOT_PAGE (1315860)
+ELF-MIPS64EL: Type: R_MIPS_GOT_OFST/R_MIPS_GOT_OFST/R_MIPS_GOT_OFST (1381653)
+ELF-MIPS64EL: Type: R_MIPS_GOT_HI16/R_MIPS_GOT_HI16/R_MIPS_GOT_HI16 (1447446)
+ELF-MIPS64EL: Type: R_MIPS_GOT_LO16/R_MIPS_GOT_LO16/R_MIPS_GOT_LO16 (1513239)
+ELF-MIPS64EL: Type: R_MIPS_SUB/R_MIPS_SUB/R_MIPS_SUB (1579032)
+ELF-MIPS64EL: Type: R_MIPS_INSERT_A/R_MIPS_INSERT_A/R_MIPS_INSERT_A (1644825)
+ELF-MIPS64EL: Type: R_MIPS_INSERT_B/R_MIPS_INSERT_B/R_MIPS_INSERT_B (1710618)
+ELF-MIPS64EL: Type: R_MIPS_DELETE/R_MIPS_DELETE/R_MIPS_DELETE (1776411)
+ELF-MIPS64EL: Type: R_MIPS_HIGHER/R_MIPS_HIGHER/R_MIPS_HIGHER (1842204)
+ELF-MIPS64EL: Type: R_MIPS_HIGHEST/R_MIPS_HIGHEST/R_MIPS_HIGHEST (1907997)
+ELF-MIPS64EL: Type: R_MIPS_CALL_HI16/R_MIPS_CALL_HI16/R_MIPS_CALL_HI16 (1973790)
+ELF-MIPS64EL: Type: R_MIPS_CALL_LO16/R_MIPS_CALL_LO16/R_MIPS_CALL_LO16 (2039583)
+ELF-MIPS64EL: Type: R_MIPS_SCN_DISP/R_MIPS_SCN_DISP/R_MIPS_SCN_DISP (2105376)
+ELF-MIPS64EL: Type: R_MIPS_REL16/R_MIPS_REL16/R_MIPS_REL16 (2171169)
+ELF-MIPS64EL: Type: R_MIPS_ADD_IMMEDIATE/R_MIPS_ADD_IMMEDIATE/R_MIPS_ADD_IMMEDIATE (2236962)
+ELF-MIPS64EL: Type: R_MIPS_PJUMP/R_MIPS_PJUMP/R_MIPS_PJUMP (2302755)
+ELF-MIPS64EL: Type: R_MIPS_RELGOT/R_MIPS_RELGOT/R_MIPS_RELGOT (2368548)
+ELF-MIPS64EL: Type: R_MIPS_JALR/R_MIPS_JALR/R_MIPS_JALR (2434341)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPMOD32/R_MIPS_TLS_DTPMOD32/R_MIPS_TLS_DTPMOD32 (2500134)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL32/R_MIPS_TLS_DTPREL32/R_MIPS_TLS_DTPREL32 (2565927)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPMOD64/R_MIPS_TLS_DTPMOD64/R_MIPS_TLS_DTPMOD64 (2631720)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL64/R_MIPS_TLS_DTPREL64/R_MIPS_TLS_DTPREL64 (2697513)
+ELF-MIPS64EL: Type: R_MIPS_TLS_GD/R_MIPS_TLS_GD/R_MIPS_TLS_GD (2763306)
+ELF-MIPS64EL: Type: R_MIPS_TLS_LDM/R_MIPS_TLS_LDM/R_MIPS_TLS_LDM (2829099)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL_HI16/R_MIPS_TLS_DTPREL_HI16/R_MIPS_TLS_DTPREL_HI16 (2894892)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL_LO16/R_MIPS_TLS_DTPREL_LO16/R_MIPS_TLS_DTPREL_LO16 (2960685)
+ELF-MIPS64EL: Type: R_MIPS_TLS_GOTTPREL/R_MIPS_TLS_GOTTPREL/R_MIPS_TLS_GOTTPREL (3026478)
+ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL32/R_MIPS_TLS_TPREL32/R_MIPS_TLS_TPREL32 (3092271)
+ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL64/R_MIPS_TLS_TPREL64/R_MIPS_TLS_TPREL64 (3158064)
+ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL_HI16/R_MIPS_TLS_TPREL_HI16/R_MIPS_TLS_TPREL_HI16 (3223857)
+ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL_LO16/R_MIPS_TLS_TPREL_LO16/R_MIPS_TLS_TPREL_LO16 (3289650)
+ELF-MIPS64EL: Type: R_MIPS_GLOB_DAT/R_MIPS_GLOB_DAT/R_MIPS_GLOB_DAT (3355443)
+ELF-MIPS64EL: Type: R_MIPS_COPY/R_MIPS_COPY/R_MIPS_COPY (8289918)
+ELF-MIPS64EL: Type: R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT (8355711)
+ELF-MIPS64EL: Type: R_MIPS_NUM/R_MIPS_NUM/R_MIPS_NUM (14342874)
+
+ELF-MBLAZE: Type: R_MICROBLAZE_NONE (0)
+ELF-MBLAZE: Type: R_MICROBLAZE_32 (1)
+ELF-MBLAZE: Type: R_MICROBLAZE_32_PCREL (2)
+ELF-MBLAZE: Type: R_MICROBLAZE_64_PCREL (3)
+ELF-MBLAZE: Type: R_MICROBLAZE_32_PCREL_LO (4)
+ELF-MBLAZE: Type: R_MICROBLAZE_64 (5)
+ELF-MBLAZE: Type: R_MICROBLAZE_32_LO (6)
+ELF-MBLAZE: Type: R_MICROBLAZE_SRO32 (7)
+ELF-MBLAZE: Type: R_MICROBLAZE_SRW32 (8)
+ELF-MBLAZE: Type: R_MICROBLAZE_64_NONE (9)
+ELF-MBLAZE: Type: R_MICROBLAZE_32_SYM_OP_SYM (10)
+ELF-MBLAZE: Type: R_MICROBLAZE_GNU_VTINHERIT (11)
+ELF-MBLAZE: Type: R_MICROBLAZE_GNU_VTENTRY (12)
+ELF-MBLAZE: Type: R_MICROBLAZE_GOTPC_64 (13)
+ELF-MBLAZE: Type: R_MICROBLAZE_GOT_64 (14)
+ELF-MBLAZE: Type: R_MICROBLAZE_PLT_64 (15)
+ELF-MBLAZE: Type: R_MICROBLAZE_REL (16)
+ELF-MBLAZE: Type: R_MICROBLAZE_JUMP_SLOT (17)
+ELF-MBLAZE: Type: R_MICROBLAZE_GLOB_DAT (18)
+ELF-MBLAZE: Type: R_MICROBLAZE_GOTOFF_64 (19)
+ELF-MBLAZE: Type: R_MICROBLAZE_GOTOFF_32 (20)
+ELF-MBLAZE: Type: R_MICROBLAZE_COPY (21)
+
+ELF-HEXAGON: Type: R_HEX_NONE (0)
+ELF-HEXAGON: Type: R_HEX_B22_PCREL (1)
+ELF-HEXAGON: Type: R_HEX_B15_PCREL (2)
+ELF-HEXAGON: Type: R_HEX_B7_PCREL (3)
+ELF-HEXAGON: Type: R_HEX_LO16 (4)
+ELF-HEXAGON: Type: R_HEX_HI16 (5)
+ELF-HEXAGON: Type: R_HEX_32 (6)
+ELF-HEXAGON: Type: R_HEX_16 (7)
+ELF-HEXAGON: Type: R_HEX_8 (8)
+ELF-HEXAGON: Type: R_HEX_GPREL16_0 (9)
+ELF-HEXAGON: Type: R_HEX_GPREL16_1 (10)
+ELF-HEXAGON: Type: R_HEX_GPREL16_2 (11)
+ELF-HEXAGON: Type: R_HEX_GPREL16_3 (12)
+ELF-HEXAGON: Type: R_HEX_HL16 (13)
+ELF-HEXAGON: Type: R_HEX_B13_PCREL (14)
+ELF-HEXAGON: Type: R_HEX_B9_PCREL (15)
+ELF-HEXAGON: Type: R_HEX_B32_PCREL_X (16)
+ELF-HEXAGON: Type: R_HEX_32_6_X (17)
+ELF-HEXAGON: Type: R_HEX_B22_PCREL_X (18)
+ELF-HEXAGON: Type: R_HEX_B15_PCREL_X (19)
+ELF-HEXAGON: Type: R_HEX_B13_PCREL_X (20)
+ELF-HEXAGON: Type: R_HEX_B9_PCREL_X (21)
+ELF-HEXAGON: Type: R_HEX_B7_PCREL_X (22)
+ELF-HEXAGON: Type: R_HEX_16_X (23)
+ELF-HEXAGON: Type: R_HEX_12_X (24)
+ELF-HEXAGON: Type: R_HEX_11_X (25)
+ELF-HEXAGON: Type: R_HEX_10_X (26)
+ELF-HEXAGON: Type: R_HEX_9_X (27)
+ELF-HEXAGON: Type: R_HEX_8_X (28)
+ELF-HEXAGON: Type: R_HEX_7_X (29)
+ELF-HEXAGON: Type: R_HEX_6_X (30)
+ELF-HEXAGON: Type: R_HEX_32_PCREL (31)
+ELF-HEXAGON: Type: R_HEX_COPY (32)
+ELF-HEXAGON: Type: R_HEX_GLOB_DAT (33)
+ELF-HEXAGON: Type: R_HEX_JMP_SLOT (34)
+ELF-HEXAGON: Type: R_HEX_RELATIVE (35)
+ELF-HEXAGON: Type: R_HEX_PLT_B22_PCREL (36)
+ELF-HEXAGON: Type: R_HEX_GOTREL_LO16 (37)
+ELF-HEXAGON: Type: R_HEX_GOTREL_HI16 (38)
+ELF-HEXAGON: Type: R_HEX_GOTREL_32 (39)
+ELF-HEXAGON: Type: R_HEX_GOT_LO16 (40)
+ELF-HEXAGON: Type: R_HEX_GOT_HI16 (41)
+ELF-HEXAGON: Type: R_HEX_GOT_32 (42)
+ELF-HEXAGON: Type: R_HEX_GOT_16 (43)
+ELF-HEXAGON: Type: R_HEX_DTPMOD_32 (44)
+ELF-HEXAGON: Type: R_HEX_DTPREL_LO16 (45)
+ELF-HEXAGON: Type: R_HEX_DTPREL_HI16 (46)
+ELF-HEXAGON: Type: R_HEX_DTPREL_32 (47)
+ELF-HEXAGON: Type: R_HEX_DTPREL_16 (48)
+ELF-HEXAGON: Type: R_HEX_GD_PLT_B22_PCREL (49)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_LO16 (50)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_HI16 (51)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_32 (52)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_16 (53)
+ELF-HEXAGON: Type: R_HEX_IE_LO16 (54)
+ELF-HEXAGON: Type: R_HEX_IE_HI16 (55)
+ELF-HEXAGON: Type: R_HEX_IE_32 (56)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_LO16 (57)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_HI16 (58)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_32 (59)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_16 (60)
+ELF-HEXAGON: Type: R_HEX_TPREL_LO16 (61)
+ELF-HEXAGON: Type: R_HEX_TPREL_HI16 (62)
+ELF-HEXAGON: Type: R_HEX_TPREL_32 (63)
+ELF-HEXAGON: Type: R_HEX_TPREL_16 (64)
+ELF-HEXAGON: Type: R_HEX_6_PCREL_X (65)
+ELF-HEXAGON: Type: R_HEX_GOTREL_32_6_X (66)
+ELF-HEXAGON: Type: R_HEX_GOTREL_16_X (67)
+ELF-HEXAGON: Type: R_HEX_GOTREL_11_X (68)
+ELF-HEXAGON: Type: R_HEX_GOT_32_6_X (69)
+ELF-HEXAGON: Type: R_HEX_GOT_16_X (70)
+ELF-HEXAGON: Type: R_HEX_GOT_11_X (71)
+ELF-HEXAGON: Type: R_HEX_DTPREL_32_6_X (72)
+ELF-HEXAGON: Type: R_HEX_DTPREL_16_X (73)
+ELF-HEXAGON: Type: R_HEX_DTPREL_11_X (74)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_32_6_X (75)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_16_X (76)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_11_X (77)
+ELF-HEXAGON: Type: R_HEX_IE_32_6_X (78)
+ELF-HEXAGON: Type: R_HEX_IE_16_X (79)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_32_6_X (80)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_16_X (81)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_11_X (82)
+ELF-HEXAGON: Type: R_HEX_TPREL_32_6_X (83)
+ELF-HEXAGON: Type: R_HEX_TPREL_16_X (84)
+ELF-HEXAGON: Type: R_HEX_TPREL_11_X (85)
+
+COFF-32: Type: IMAGE_REL_I386_ABSOLUTE (0)
+COFF-32: Type: IMAGE_REL_I386_DIR16 (1)
+COFF-32: Type: IMAGE_REL_I386_REL16 (2)
+COFF-32: Type: IMAGE_REL_I386_DIR32 (6)
+COFF-32: Type: IMAGE_REL_I386_DIR32NB (7)
+COFF-32: Type: IMAGE_REL_I386_SEG12 (9)
+COFF-32: Type: IMAGE_REL_I386_SECTION (10)
+COFF-32: Type: IMAGE_REL_I386_SECREL (11)
+COFF-32: Type: IMAGE_REL_I386_TOKEN (12)
+COFF-32: Type: IMAGE_REL_I386_SECREL7 (13)
+COFF-32: Type: IMAGE_REL_I386_REL32 (20)
+
+COFF-64: Type: IMAGE_REL_AMD64_ABSOLUTE (0)
+COFF-64: Type: IMAGE_REL_AMD64_ADDR64 (1)
+COFF-64: Type: IMAGE_REL_AMD64_ADDR32 (2)
+COFF-64: Type: IMAGE_REL_AMD64_ADDR32NB (3)
+COFF-64: Type: IMAGE_REL_AMD64_REL32 (4)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_1 (5)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_2 (6)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_3 (7)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_4 (8)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_5 (9)
+COFF-64: Type: IMAGE_REL_AMD64_SECTION (10)
+COFF-64: Type: IMAGE_REL_AMD64_SECREL (11)
+COFF-64: Type: IMAGE_REL_AMD64_SECREL7 (12)
+COFF-64: Type: IMAGE_REL_AMD64_TOKEN (13)
+COFF-64: Type: IMAGE_REL_AMD64_SREL32 (14)
+COFF-64: Type: IMAGE_REL_AMD64_PAIR (15)
+COFF-64: Type: IMAGE_REL_AMD64_SSPAN32 (16)
+
+COFF-ARM: Type: IMAGE_REL_ARM_ABSOLUTE (0x0000)
+COFF-ARM: Type: IMAGE_REL_ARM_ADDR32 (0x0001)
+COFF-ARM: Type: IMAGE_REL_ARM_ADDR32NB (0x0002)
+COFF-ARM: Type: IMAGE_REL_ARM_BRANCH24 (0x0003)
+COFF-ARM: Type: IMAGE_REL_ARM_BRANCH11 (0x0004)
+COFF-ARM: Type: IMAGE_REL_ARM_TOKEN (0x0005)
+COFF-ARM: Type: IMAGE_REL_ARM_BLX24 (0x0008)
+COFF-ARM: Type: IMAGE_REL_ARM_BLX11 (0x0009)
+COFF-ARM: Type: IMAGE_REL_ARM_SECTION (0x000E)
+COFF-ARM: Type: IMAGE_REL_ARM_SECREL (0x000F)
+COFF-ARM: Type: IMAGE_REL_ARM_MOV32A (0x0010)
+COFF-ARM: Type: IMAGE_REL_ARM_MOV32T (0x0011)
+COFF-ARM: Type: IMAGE_REL_ARM_BRANCH20T (0x0012)
+COFF-ARM: Type: IMAGE_REL_ARM_BRANCH24T (0x0014)
+COFF-ARM: Type: IMAGE_REL_ARM_BLX23T (0x0015)
+
+MACHO-32: Type: GENERIC_RELOC_VANILLA (0)
+MACHO-32: Type: GENERIC_RELOC_PAIR (1)
+MACHO-32: Type: GENERIC_RELOC_SECTDIFF (2)
+MACHO-32: Type: GENERIC_RELOC_PB_LA_PTR (3)
+MACHO-32: Type: GENERIC_RELOC_LOCAL_SECTDIFF (4)
+MACHO-32: Type: GENERIC_RELOC_TLV (5)
+
+MACHO-64: Type: X86_64_RELOC_UNSIGNED (0)
+MACHO-64: Type: X86_64_RELOC_SIGNED (1)
+MACHO-64: Type: X86_64_RELOC_BRANCH (2)
+MACHO-64: Type: X86_64_RELOC_GOT_LOAD (3)
+MACHO-64: Type: X86_64_RELOC_GOT (4)
+MACHO-64: Type: X86_64_RELOC_SUBTRACTOR (5)
+MACHO-64: Type: X86_64_RELOC_SIGNED_1 (6)
+MACHO-64: Type: X86_64_RELOC_SIGNED_2 (7)
+MACHO-64: Type: X86_64_RELOC_SIGNED_4 (8)
+MACHO-64: Type: X86_64_RELOC_TLV (9)
+
+MACHO-ARM: Type: ARM_RELOC_VANILLA (0)
+MACHO-ARM: Type: ARM_RELOC_PAIR (1)
+MACHO-ARM: Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM: Type: ARM_RELOC_LOCAL_SECTDIFF (3)
+MACHO-ARM: Type: ARM_RELOC_PB_LA_PTR (4)
+MACHO-ARM: Type: ARM_RELOC_BR24 (5)
+MACHO-ARM: Type: ARM_THUMB_RELOC_BR22 (6)
+MACHO-ARM: Type: ARM_THUMB_32BIT_BRANCH (7)
+MACHO-ARM: Type: ARM_RELOC_HALF (8)
+MACHO-ARM: Type: ARM_RELOC_HALF_SECTDIFF (9)
+
+MACHO-PPC: PPC_RELOC_VANILLA (0)
+MACHO-PPC: PPC_RELOC_PAIR (1)
+MACHO-PPC: PPC_RELOC_BR14 (2)
+MACHO-PPC: PPC_RELOC_BR24 (3)
+MACHO-PPC: PPC_RELOC_HI16 (4)
+MACHO-PPC: PPC_RELOC_LO16 (5)
+MACHO-PPC: PPC_RELOC_HA16 (6)
+MACHO-PPC: PPC_RELOC_LO14 (7)
+MACHO-PPC: PPC_RELOC_SECTDIFF (8)
+MACHO-PPC: PPC_RELOC_PB_LA_PTR (9)
+MACHO-PPC: PPC_RELOC_HI16_SECTDIFF (10)
+MACHO-PPC: PPC_RELOC_LO16_SECTDIFF (11)
+MACHO-PPC: PPC_RELOC_HA16_SECTDIFF (12)
+MACHO-PPC: PPC_RELOC_JBSR (13)
+MACHO-PPC: PPC_RELOC_LO14_SECTDIFF (14)
+MACHO-PPC: PPC_RELOC_LOCAL_SECTDIFF (15)
diff --git a/test/tools/llvm-readobj/relocations.test b/test/tools/llvm-readobj/relocations.test
new file mode 100644
index 000000000000..c23a7fe5bb0a
--- /dev/null
+++ b/test/tools/llvm-readobj/relocations.test
@@ -0,0 +1,173 @@
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-i386 \
+RUN:   | FileCheck %s -check-prefix MACHO-I386
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-x86-64 \
+RUN:   | FileCheck %s -check-prefix MACHO-X86-64
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-ppc \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-ppc64 \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/trivial.obj.macho-arm \
+RUN:   | FileCheck %s -check-prefix MACHO-ARM
+
+COFF:      Relocations [
+COFF-NEXT:   Section (1) .text {
+COFF-NEXT:     0x4 IMAGE_REL_I386_DIR32 .data
+COFF-NEXT:     0x9 IMAGE_REL_I386_REL32 _puts
+COFF-NEXT:     0xE IMAGE_REL_I386_REL32 _SomeOtherFunction
+COFF-NEXT:   }
+COFF-NEXT: ]
+
+ELF:      Relocations [
+ELF-NEXT:   Section (1) .text {
+ELF-NEXT:     0xC R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0
+ELF-NEXT:     0x12 R_386_GOTOFF .L.str 0x0
+ELF-NEXT:     0x1A R_386_PLT32 puts 0x0
+ELF-NEXT:     0x1F R_386_PLT32 SomeOtherFunction 0x0
+ELF-NEXT:   }
+ELF-NEXT: ]
+
+MACHO-I386:      Relocations [
+MACHO-I386-NEXT:   Section __text {
+MACHO-I386-NEXT:     0x18 1 2 1 GENERIC_RELOC_VANILLA 0 _SomeOtherFunction
+MACHO-I386-NEXT:     0x13 1 2 1 GENERIC_RELOC_VANILLA 0 _puts
+MACHO-I386-NEXT:     0xB 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 _main
+MACHO-I386-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 _main
+MACHO-I386-NEXT:   }
+MACHO-I386-NEXT: ]
+
+MACHO-X86-64: Relocations [
+MACHO-X86-64-NEXT:  Section __text {
+MACHO-X86-64-NEXT:    0xE 1 2 1 X86_64_RELOC_BRANCH 0 _SomeOtherFunction
+MACHO-X86-64-NEXT:    0x9 1 2 1 X86_64_RELOC_BRANCH 0 _puts
+MACHO-X86-64-NEXT:    0x4 1 2 1 X86_64_RELOC_SIGNED 0 L_.str
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:]
+
+MACHO-PPC: Relocations [
+MACHO-PPC-NEXT:   Section __text {
+MACHO-PPC-NEXT:     0x24 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _b
+MACHO-PPC-NEXT:     0x0 0 2 n/a PPC_RELOC_PAIR 1 _b
+MACHO-PPC-NEXT:     0x1C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _b
+MACHO-PPC-NEXT:     0x58 0 2 n/a PPC_RELOC_PAIR 1 _b
+MACHO-PPC-NEXT:     0x18 1 2 0 PPC_RELOC_BR24 0 _b
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section __picsymbolstub1 {
+MACHO-PPC-NEXT:     0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _b
+MACHO-PPC-NEXT:     0x0 0 2 n/a PPC_RELOC_PAIR 1 _b
+MACHO-PPC-NEXT:     0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _b
+MACHO-PPC-NEXT:     0x20 0 2 n/a PPC_RELOC_PAIR 1 _b
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section __la_symbol_ptr {
+MACHO-PPC-NEXT:     0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT: ]
+
+MACHO-PPC64: Relocations [
+MACHO-PPC64-NEXT:   Section __text {
+MACHO-PPC64-NEXT:     0x24 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:     0x0 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:     0x1C 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:     0x58 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:     0x18 1 2 0 0 _b
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section __picsymbolstub1 {
+MACHO-PPC64-NEXT:     0x14 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:     0x0 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:     0xC 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:     0x24 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section __la_symbol_ptr {
+MACHO-PPC64-NEXT:     0x0 0 3 1 0 dyld_stub_binding_helper
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT: ]
+
+
+MACHO-ARM:       Relocations [
+MACHO-ARM-NEXT:    Section __text {
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x38
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: N/A
+MACHO-ARM-NEXT:        Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM-NEXT:        Symbol: _b
+MACHO-ARM-NEXT:        Scattered: 1
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x0
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: N/A
+MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:        Symbol: _b
+MACHO-ARM-NEXT:        Scattered: 1
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x20
+MACHO-ARM-NEXT:        PCRel: 1
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: 1
+MACHO-ARM-NEXT:        Type: ARM_RELOC_BR24 (5)
+MACHO-ARM-NEXT:        Symbol: _g
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x1C
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 1
+MACHO-ARM-NEXT:        Extern: 1
+MACHO-ARM-NEXT:        Type: ARM_RELOC_HALF (8)
+MACHO-ARM-NEXT:        Symbol: _g
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x0
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 1
+MACHO-ARM-NEXT:        Extern: 0
+MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:        Symbol: _b
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x18
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 0
+MACHO-ARM-NEXT:        Extern: 1
+MACHO-ARM-NEXT:        Type: ARM_RELOC_HALF (8)
+MACHO-ARM-NEXT:        Symbol: _g
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x0
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 0
+MACHO-ARM-NEXT:        Extern: 0
+MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:        Symbol: _b
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0xC
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: N/A
+MACHO-ARM-NEXT:        Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM-NEXT:        Symbol: _b
+MACHO-ARM-NEXT:        Scattered: 1
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x0
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: N/A
+MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:        Symbol: _b
+MACHO-ARM-NEXT:        Scattered: 1
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:    }
+MACHO-ARM-NEXT:  ]
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
new file mode 100644
index 000000000000..526ddfe82c1c
--- /dev/null
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -0,0 +1,841 @@
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-i386 \
+RUN:   | FileCheck %s -check-prefix MACHO-I386
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-x86-64 \
+RUN:   | FileCheck %s -check-prefix MACHO-X86-64
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-ppc \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-ppc64 \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC64
+RUN: llvm-readobj -expand-relocs -s -st -sr -sd %p/Inputs/trivial.obj.macho-arm \
+RUN:   | FileCheck %s -check-prefix MACHO-ARM
+
+COFF:      Sections [
+COFF-NEXT:   Section {
+COFF-NEXT:     Number: 1
+COFF-NEXT:     Name: .text (2E 74 65 78 74 00 00 00)
+COFF-NEXT:     VirtualSize: 0x0
+COFF-NEXT:     VirtualAddress: 0x0
+COFF-NEXT:     RawDataSize: 22
+COFF-NEXT:     PointerToRawData: 0x64
+COFF-NEXT:     PointerToRelocations: 0x7A
+COFF-NEXT:     PointerToLineNumbers: 0x0
+COFF-NEXT:     RelocationCount: 3
+COFF-NEXT:     LineNumberCount: 0
+COFF-NEXT:     Characteristics [ (0x60500020)
+COFF-NEXT:       IMAGE_SCN_ALIGN_16BYTES (0x500000)
+COFF-NEXT:       IMAGE_SCN_CNT_CODE (0x20)
+COFF-NEXT:       IMAGE_SCN_MEM_EXECUTE (0x20000000)
+COFF-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+COFF-NEXT:     ]
+COFF-NEXT:     Relocations [
+COFF-NEXT:       0x4 IMAGE_REL_I386_DIR32 .data
+COFF-NEXT:       0x9 IMAGE_REL_I386_REL32 _puts
+COFF-NEXT:       0xE IMAGE_REL_I386_REL32 _SomeOtherFunction
+COFF-NEXT:     ]
+COFF-NEXT:     Symbols [
+COFF-NEXT:       Symbol {
+COFF-NEXT:         Name: .text
+COFF-NEXT:         Value: 0
+COFF-NEXT:         Section: .text (1)
+COFF-NEXT:         BaseType: Null (0x0)
+COFF-NEXT:         ComplexType: Null (0x0)
+COFF-NEXT:         StorageClass: Static (0x3)
+COFF-NEXT:         AuxSymbolCount: 1
+COFF-NEXT:         AuxSectionDef {
+COFF-NEXT:           Length: 22
+COFF-NEXT:           RelocationCount: 3
+COFF-NEXT:           LineNumberCount: 0
+COFF-NEXT:           Checksum: 0x0
+COFF-NEXT:           Number: 1
+COFF-NEXT:           Selection: 0x0
+COFF-NEXT:           Unused: (00 00 00)
+COFF-NEXT:         }
+COFF-NEXT:       }
+COFF-NEXT:       Symbol {
+COFF-NEXT:         Name: _main
+COFF-NEXT:         Value: 0
+COFF-NEXT:         Section: .text (1)
+COFF-NEXT:         BaseType: Null (0x0)
+COFF-NEXT:         ComplexType: Function (0x2)
+COFF-NEXT:         StorageClass: External (0x2)
+COFF-NEXT:         AuxSymbolCount: 0
+COFF-NEXT:       }
+COFF-NEXT:     ]
+COFF-NEXT:     SectionData (
+COFF-NEXT:       0000: 50C70424 00000000 E8000000 00E80000  |P..$............|
+COFF-NEXT:       0010: 000031C0 5AC3                        |..1.Z.|
+COFF-NEXT:     )
+COFF-NEXT:   }
+
+ELF:      Sections [
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 0
+ELF-NEXT:     Name:  (0)
+ELF-NEXT:     Type: SHT_NULL (0x0)
+ELF-NEXT:     Flags [ (0x0)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x0
+ELF-NEXT:     Size: 0
+ELF-NEXT:     Link: 0
+ELF-NEXT:     Info: 0
+ELF-NEXT:     AddressAlignment: 0
+ELF-NEXT:     EntrySize: 0
+ELF-NEXT:     Relocations [
+ELF-NEXT:     ]
+ELF-NEXT:     Symbols [
+ELF-NEXT:     ]
+ELF-NEXT:     SectionData (
+ELF-NEXT:     )
+ELF-NEXT:   }
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 1
+ELF-NEXT:     Name: .text (5)
+ELF-NEXT:     Type: SHT_PROGBITS (0x1)
+ELF-NEXT:     Flags [ (0x6)
+ELF-NEXT:       SHF_ALLOC (0x2)
+ELF-NEXT:       SHF_EXECINSTR (0x4)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x40
+ELF-NEXT:     Size: 42
+ELF-NEXT:     Link: 0
+ELF-NEXT:     Info: 0
+ELF-NEXT:     AddressAlignment: 16
+ELF-NEXT:     EntrySize: 0
+ELF-NEXT:     Relocations [
+ELF-NEXT:       0xC R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0
+ELF-NEXT:       0x12 R_386_GOTOFF .L.str 0x0
+ELF-NEXT:       0x1A R_386_PLT32 puts 0x0
+ELF-NEXT:       0x1F R_386_PLT32 SomeOtherFunction 0x0
+ELF-NEXT:     ]
+ELF-NEXT:     Symbols [
+ELF-NEXT:       Symbol {
+ELF-NEXT:         Name: .text (0)
+ELF-NEXT:         Value: 0x0
+ELF-NEXT:         Size: 0
+ELF-NEXT:         Binding: Local (0x0)
+ELF-NEXT:         Type: Section (0x3)
+ELF-NEXT:         Other: 0
+ELF-NEXT:         Section: .text (0x1)
+ELF-NEXT:       }
+ELF-NEXT:       Symbol {
+ELF-NEXT:         Name: main (12)
+ELF-NEXT:         Value: 0x0
+ELF-NEXT:         Size: 42
+ELF-NEXT:         Binding: Global (0x1)
+ELF-NEXT:         Type: Function (0x2)
+ELF-NEXT:         Other: 0
+ELF-NEXT:         Section: .text (0x1)
+ELF-NEXT:       }
+ELF-NEXT:     ]
+ELF-NEXT:     SectionData (
+ELF-NEXT:       0000: 5383EC08 E8000000 005B81C3 03000000  |S........[......|
+ELF-NEXT:       0010: 8D830000 00008904 24E8FCFF FFFFE8FC  |........$.......|
+ELF-NEXT:       0020: FFFFFF31 C083C408 5BC3               |...1....[.|
+ELF-NEXT:     )
+ELF-NEXT:   }
+
+MACHO-I386:      Sections [
+MACHO-I386-NEXT:   Section {
+MACHO-I386-NEXT:     Index: 0
+MACHO-I386-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Address: 0x0
+MACHO-I386-NEXT:     Size: 0x22
+MACHO-I386-NEXT:     Offset: 324
+MACHO-I386-NEXT:     Alignment: 4
+MACHO-I386-NEXT:     RelocationOffset: 0x174
+MACHO-I386-NEXT:     RelocationCount: 4
+MACHO-I386-NEXT:     Type: 0x0
+MACHO-I386-NEXT:     Attributes [ (0x800004)
+MACHO-I386-NEXT:       PureInstructions (0x800000)
+MACHO-I386-NEXT:       SomeInstructions (0x4)
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     Reserved1: 0x0
+MACHO-I386-NEXT:     Reserved2: 0x0
+MACHO-I386-NEXT:     Relocations [
+MACHO-I386-NEXT:       0x18 1 2 1 GENERIC_RELOC_VANILLA 0 _SomeOtherFunction
+MACHO-I386-NEXT:       0x13 1 2 1 GENERIC_RELOC_VANILLA 0 _puts
+MACHO-I386-NEXT:       0xB 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 _main
+MACHO-I386-NEXT:       0x0 0 2 n/a GENERIC_RELOC_PAIR 1 _main
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     Symbols [
+MACHO-I386-NEXT:       Symbol {
+MACHO-I386-NEXT:         Name: _main (1)
+MACHO-I386-NEXT:         Type: 0xF
+MACHO-I386-NEXT:         Section: __text (0x1)
+MACHO-I386-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-I386-NEXT:         Flags [ (0x0)
+MACHO-I386-NEXT:         ]
+MACHO-I386-NEXT:         Value: 0x0
+MACHO-I386-NEXT:       }
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     SectionData (
+MACHO-I386-NEXT:       0000: 83EC0CE8 00000000 588D801A 00000089  |........X.......|
+MACHO-I386-NEXT:       0010: 0424E8E9 FFFFFFE8 E4FFFFFF 31C083C4  |.$..........1...|
+MACHO-I386-NEXT:       0020: 0CC3                                 |..|
+MACHO-I386-NEXT:     )
+MACHO-I386-NEXT:   }
+
+
+MACHO-X86-64:     Sections [
+MACHO-X86-64-NEXT:  Section {
+MACHO-X86-64-NEXT:    Index: 0
+MACHO-X86-64-NEXT:    Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Address: 0x0
+MACHO-X86-64-NEXT:    Size: 0x16
+MACHO-X86-64-NEXT:    Offset: 368
+MACHO-X86-64-NEXT:    Alignment: 4
+MACHO-X86-64-NEXT:    RelocationOffset: 0x194
+MACHO-X86-64-NEXT:    RelocationCount: 3
+MACHO-X86-64-NEXT:    Type: 0x0
+MACHO-X86-64-NEXT:    Attributes [ (0x800004)
+MACHO-X86-64-NEXT:      PureInstructions (0x800000)
+MACHO-X86-64-NEXT:      SomeInstructions (0x4)
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Reserved1: 0x0
+MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:    Relocations [
+MACHO-X86-64-NEXT:      0xE 1 2 1 X86_64_RELOC_BRANCH 0 _SomeOtherFunction
+MACHO-X86-64-NEXT:      0x9 1 2 1 X86_64_RELOC_BRANCH 0 _puts
+MACHO-X86-64-NEXT:      0x4 1 2 1 X86_64_RELOC_SIGNED 0 L_.str
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Symbols [
+MACHO-X86-64-NEXT:      Symbol {
+MACHO-X86-64-NEXT:        Name: _main (1)
+MACHO-X86-64-NEXT:        Type: 0xF
+MACHO-X86-64-NEXT:        Section: __text (0x1)
+MACHO-X86-64-NEXT:        RefType: UndefinedNonLazy (0x0)
+MACHO-X86-64-NEXT:        Flags [ (0x0)
+MACHO-X86-64-NEXT:        ]
+MACHO-X86-64-NEXT:        Value: 0x0
+MACHO-X86-64-NEXT:      }
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    SectionData (
+MACHO-X86-64-NEXT:      0000: 50488D3D 00000000 E8000000 00E80000  |PH.=............|
+MACHO-X86-64-NEXT:      0010: 000031C0 5AC3                        |..1.Z.|
+MACHO-X86-64-NEXT:    )
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:  Section {
+MACHO-X86-64-NEXT:    Index: 1
+MACHO-X86-64-NEXT:    Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Address: 0x16
+MACHO-X86-64-NEXT:    Size: 0xD
+MACHO-X86-64-NEXT:    Offset: 390
+MACHO-X86-64-NEXT:    Alignment: 0
+MACHO-X86-64-NEXT:    RelocationOffset: 0x0
+MACHO-X86-64-NEXT:    RelocationCount: 0
+MACHO-X86-64-NEXT:    Type: ExtReloc (0x2)
+MACHO-X86-64-NEXT:    Attributes [ (0x0)
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Reserved1: 0x0
+MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:    Relocations [
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Symbols [
+MACHO-X86-64-NEXT:      Symbol {
+MACHO-X86-64-NEXT:        Name: L_.str (32)
+MACHO-X86-64-NEXT:        Type: Section (0xE)
+MACHO-X86-64-NEXT:        Section: __cstring (0x2)
+MACHO-X86-64-NEXT:        RefType: UndefinedNonLazy (0x0)
+MACHO-X86-64-NEXT:        Flags [ (0x0)
+MACHO-X86-64-NEXT:        ]
+MACHO-X86-64-NEXT:        Value: 0x16
+MACHO-X86-64-NEXT:      }
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    SectionData (
+MACHO-X86-64-NEXT:      0000: 48656C6C 6F20576F 726C640A 00        |Hello World..|
+MACHO-X86-64-NEXT:    )
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:]
+
+MACHO-PPC: Sections [
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 0
+MACHO-PPC-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x0
+MACHO-PPC-NEXT:     Size: 0x3C
+MACHO-PPC-NEXT:     Offset: 528
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x27C
+MACHO-PPC-NEXT:     RelocationCount: 5
+MACHO-PPC-NEXT:     Type: 0x0
+MACHO-PPC-NEXT:     Attributes [ (0x800004)
+MACHO-PPC-NEXT:       PureInstructions (0x800000)
+MACHO-PPC-NEXT:       SomeInstructions (0x4)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:       0x24 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _b
+MACHO-PPC-NEXT:       0x0 0 2 n/a PPC_RELOC_PAIR 1 _b
+MACHO-PPC-NEXT:       0x1C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _b
+MACHO-PPC-NEXT:       0x58 0 2 n/a PPC_RELOC_PAIR 1 _b
+MACHO-PPC-NEXT:       0x18 1 2 0 PPC_RELOC_BR24 0 _b
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:       Symbol {
+MACHO-PPC-NEXT:         Name: _f (4)
+MACHO-PPC-NEXT:         Type: 0xF
+MACHO-PPC-NEXT:         Section: __text (0x1)
+MACHO-PPC-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-PPC-NEXT:         Flags [ (0x0)
+MACHO-PPC-NEXT:         ]
+MACHO-PPC-NEXT:         Value: 0x0
+MACHO-PPC-NEXT:       }
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 7C0802A6 93E1FFFC 429F0005 7FE802A6  ||.......B.......|
+MACHO-PPC-NEXT:       0010: 90010008 9421FFB0 48000029 3C5F0000  |.....!..H..)<_..|
+MACHO-PPC-NEXT:       0020: 38210050 80420058 80010008 83E1FFFC  |8!.P.B.X........|
+MACHO-PPC-NEXT:       0030: 7C0803A6 80620000 4E800020           ||....b..N.. |
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 1
+MACHO-PPC-NEXT:     Name: __picsymbolstub1 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 31)
+MACHO-PPC-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x40
+MACHO-PPC-NEXT:     Size: 0x20
+MACHO-PPC-NEXT:     Offset: 592
+MACHO-PPC-NEXT:     Alignment: 5
+MACHO-PPC-NEXT:     RelocationOffset: 0x2A4
+MACHO-PPC-NEXT:     RelocationCount: 4
+MACHO-PPC-NEXT:     Type: 0x8
+MACHO-PPC-NEXT:     Attributes [ (0x800004)
+MACHO-PPC-NEXT:       PureInstructions (0x800000)
+MACHO-PPC-NEXT:       SomeInstructions (0x4)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x20
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:       0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _b
+MACHO-PPC-NEXT:       0x0 0 2 n/a PPC_RELOC_PAIR 1 _b
+MACHO-PPC-NEXT:       0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _b
+MACHO-PPC-NEXT:       0x20 0 2 n/a PPC_RELOC_PAIR 1 _b
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 7C0802A6 429F0005 7D6802A6 3D6B0000  ||...B...}h..=k..|
+MACHO-PPC-NEXT:       0010: 7C0803A6 858B0020 7D8903A6 4E800420  ||...... }...N.. |
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 2
+MACHO-PPC-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x60
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 624
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x0
+MACHO-PPC-NEXT:     RelocationCount: 0
+MACHO-PPC-NEXT:     Type: 0x0
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:       Symbol {
+MACHO-PPC-NEXT:         Name: _b (1)
+MACHO-PPC-NEXT:         Type: 0xF
+MACHO-PPC-NEXT:         Section: __data (0x3)
+MACHO-PPC-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-PPC-NEXT:         Flags [ (0x0)
+MACHO-PPC-NEXT:         ]
+MACHO-PPC-NEXT:         Value: 0x60
+MACHO-PPC-NEXT:       }
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 0000002A                             |...*|
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 3
+MACHO-PPC-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x64
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 628
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x0
+MACHO-PPC-NEXT:     RelocationCount: 0
+MACHO-PPC-NEXT:     Type: 0x6
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x1
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 00000000                             |....|
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 4
+MACHO-PPC-NEXT:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x68
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 632
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x2C4
+MACHO-PPC-NEXT:     RelocationCount: 1
+MACHO-PPC-NEXT:     Type: 0x7
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x2
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:       0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 00000000                             |....|
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT: ]
+
+
+MACHO-PPC64:  Sections [
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 0
+MACHO-PPC64-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x0
+MACHO-PPC64-NEXT:     Size: 0x3C
+MACHO-PPC64-NEXT:     Offset: 608
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x2D4
+MACHO-PPC64-NEXT:     RelocationCount: 5
+MACHO-PPC64-NEXT:     Type: 0x0
+MACHO-PPC64-NEXT:     Attributes [ (0x800004)
+MACHO-PPC64-NEXT:       PureInstructions (0x800000)
+MACHO-PPC64-NEXT:       SomeInstructions (0x4)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:       0x24 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:       0x0 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:       0x1C 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:       0x58 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:       0x18 1 2 0 0 _b
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:       Symbol {
+MACHO-PPC64-NEXT:         Name: _f (4)
+MACHO-PPC64-NEXT:         Type: 0xF
+MACHO-PPC64-NEXT:         Section: __text (0x1)
+MACHO-PPC64-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-PPC64-NEXT:         Flags [ (0x0)
+MACHO-PPC64-NEXT:         ]
+MACHO-PPC64-NEXT:         Value: 0x0
+MACHO-PPC64-NEXT:       }
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 7C0802A6 FBE1FFF8 429F0005 7FE802A6  ||.......B.......|
+MACHO-PPC64-NEXT:       0010: F8010010 F821FF81 48000029 3C5F0000  |.....!..H..)<_..|
+MACHO-PPC64-NEXT:       0020: 38210080 E8420058 E8010010 EBE1FFF8  |8!...B.X........|
+MACHO-PPC64-NEXT:       0030: 7C0803A6 E8620002 4E800020           ||....b..N.. |
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 1
+MACHO-PPC64-NEXT:     Name: __picsymbolstub1 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 31)
+MACHO-PPC64-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x40
+MACHO-PPC64-NEXT:     Size: 0x20
+MACHO-PPC64-NEXT:     Offset: 672
+MACHO-PPC64-NEXT:     Alignment: 5
+MACHO-PPC64-NEXT:     RelocationOffset: 0x2FC
+MACHO-PPC64-NEXT:     RelocationCount: 4
+MACHO-PPC64-NEXT:     Type: 0x8
+MACHO-PPC64-NEXT:     Attributes [ (0x800004)
+MACHO-PPC64-NEXT:       PureInstructions (0x800000)
+MACHO-PPC64-NEXT:       SomeInstructions (0x4)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x20
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:       0x14 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:       0x0 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:       0xC 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:       0x24 0 2 n/a 1 _b
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 7C0802A6 429F0005 7D6802A6 3D6B0000  ||...B...}h..=k..|
+MACHO-PPC64-NEXT:       0010: 7C0803A6 E98B0025 7D8903A6 4E800420  ||......%}...N.. |
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 2
+MACHO-PPC64-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x60
+MACHO-PPC64-NEXT:     Size: 0x4
+MACHO-PPC64-NEXT:     Offset: 704
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x0
+MACHO-PPC64-NEXT:     RelocationCount: 0
+MACHO-PPC64-NEXT:     Type: 0x0
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:       Symbol {
+MACHO-PPC64-NEXT:         Name: _b (1)
+MACHO-PPC64-NEXT:         Type: 0xF
+MACHO-PPC64-NEXT:         Section: __data (0x3)
+MACHO-PPC64-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-PPC64-NEXT:         Flags [ (0x0)
+MACHO-PPC64-NEXT:         ]
+MACHO-PPC64-NEXT:         Value: 0x60
+MACHO-PPC64-NEXT:       }
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 0000002A                             |...*|
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 3
+MACHO-PPC64-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x64
+MACHO-PPC64-NEXT:     Size: 0x8
+MACHO-PPC64-NEXT:     Offset: 708
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x0
+MACHO-PPC64-NEXT:     RelocationCount: 0
+MACHO-PPC64-NEXT:     Type: 0x6
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x1
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 00000000 00000000                    |........|
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 4
+MACHO-PPC64-NEXT:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x6C
+MACHO-PPC64-NEXT:     Size: 0x8
+MACHO-PPC64-NEXT:     Offset: 716
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x31C
+MACHO-PPC64-NEXT:     RelocationCount: 1
+MACHO-PPC64-NEXT:     Type: 0x7
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x2
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:       0x0 0 3 1 0 dyld_stub_binding_helper
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 00000000 00000000                    |........|
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT: ]
+
+MACHO-ARM:      Sections [
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 0
+MACHO-ARM-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x0
+MACHO-ARM-NEXT:     Size: 0x3C
+MACHO-ARM-NEXT:     Offset: 664
+MACHO-ARM-NEXT:     Alignment: 2
+MACHO-ARM-NEXT:     RelocationOffset: 0x2E0
+MACHO-ARM-NEXT:     RelocationCount: 9
+MACHO-ARM-NEXT:     Type: 0x0
+MACHO-ARM-NEXT:     Attributes [ (0x800004)
+MACHO-ARM-NEXT:       PureInstructions (0x800000)
+MACHO-ARM-NEXT:       SomeInstructions (0x4)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x38
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: N/A
+MACHO-ARM-NEXT:         Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM-NEXT:         Symbol: _b
+MACHO-ARM-NEXT:         Scattered: 1
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x0
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: N/A
+MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:         Symbol: _b
+MACHO-ARM-NEXT:         Scattered: 1
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x20
+MACHO-ARM-NEXT:         PCRel: 1
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: 1
+MACHO-ARM-NEXT:         Type: ARM_RELOC_BR24 (5)
+MACHO-ARM-NEXT:         Symbol: _g
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x1C
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 1
+MACHO-ARM-NEXT:         Extern: 1
+MACHO-ARM-NEXT:         Type: ARM_RELOC_HALF (8)
+MACHO-ARM-NEXT:         Symbol: _g
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x0
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 1
+MACHO-ARM-NEXT:         Extern: 0
+MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:         Symbol: _b
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x18
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 0
+MACHO-ARM-NEXT:         Extern: 1
+MACHO-ARM-NEXT:         Type: ARM_RELOC_HALF (8)
+MACHO-ARM-NEXT:         Symbol: _g
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x0
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 0
+MACHO-ARM-NEXT:         Extern: 0
+MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:         Symbol: _b
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0xC
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: N/A
+MACHO-ARM-NEXT:         Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM-NEXT:         Symbol: _b
+MACHO-ARM-NEXT:         Scattered: 1
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x0
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: N/A
+MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:         Symbol: _b
+MACHO-ARM-NEXT:         Scattered: 1
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:       Symbol {
+MACHO-ARM-NEXT:         Name: _f (4)
+MACHO-ARM-NEXT:         Type: 0xF
+MACHO-ARM-NEXT:         Section: __text (0x1)
+MACHO-ARM-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-ARM-NEXT:         Flags [ (0x0)
+MACHO-ARM-NEXT:         ]
+MACHO-ARM-NEXT:         Value: 0x10
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Symbol {
+MACHO-ARM-NEXT:         Name: _h (1)
+MACHO-ARM-NEXT:         Type: 0xF
+MACHO-ARM-NEXT:         Section: __text (0x1)
+MACHO-ARM-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-ARM-NEXT:         Flags [ (0x0)
+MACHO-ARM-NEXT:         ]
+MACHO-ARM-NEXT:         Value: 0x0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:       0000: 04009FE5 00009FE7 1EFF2FE1 38000000  |........../.8...|
+MACHO-ARM-NEXT:       0010: 80402DE9 0D70A0E1 000000E3 000040E3  |.@-..p........@.|
+MACHO-ARM-NEXT:       0020: F6FFFFEB 0C009FE5 00009FE7 000090E5  |................|
+MACHO-ARM-NEXT:       0030: 8040BDE8 1EFF2FE1 10000000           |.@..../.....|
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 1
+MACHO-ARM-NEXT:     Name: __textcoal_nt (5F 5F 74 65 78 74 63 6F 61 6C 5F 6E 74 00 00 00)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x0
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 0
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0xB
+MACHO-ARM-NEXT:     Attributes [ (0x800000)
+MACHO-ARM-NEXT:       PureInstructions (0x800000)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 2
+MACHO-ARM-NEXT:     Name: __const_coal (5F 5F 63 6F 6E 73 74 5F 63 6F 61 6C 00 00 00 00)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x0
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 0
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0xB
+MACHO-ARM-NEXT:     Attributes [ (0x0)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 3
+MACHO-ARM-NEXT:     Name: __picsymbolstub4 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 34)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x0
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 0
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0x8
+MACHO-ARM-NEXT:     Attributes [ (0x0)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x10
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 4
+MACHO-ARM-NEXT:     Name: __StaticInit (5F 5F 53 74 61 74 69 63 49 6E 69 74 00 00 00 00)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x0
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 0
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0x0
+MACHO-ARM-NEXT:     Attributes [ (0x800000)
+MACHO-ARM-NEXT:       PureInstructions (0x800000)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 5
+MACHO-ARM-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x4
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 2
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0x0
+MACHO-ARM-NEXT:     Attributes [ (0x0)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:       Symbol {
+MACHO-ARM-NEXT:         Name: _b (10)
+MACHO-ARM-NEXT:         Type: 0xF
+MACHO-ARM-NEXT:         Section: __data (0x6)
+MACHO-ARM-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-ARM-NEXT:         Flags [ (0x0)
+MACHO-ARM-NEXT:         ]
+MACHO-ARM-NEXT:         Value: 0x3C
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:       0000: 2A000000                             |*...|
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 6
+MACHO-ARM-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-ARM-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x40
+MACHO-ARM-NEXT:     Size: 0x8
+MACHO-ARM-NEXT:     Offset: 728
+MACHO-ARM-NEXT:     Alignment: 2
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0x6
+MACHO-ARM-NEXT:     Attributes [ (0x0)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:       0000: 00000000 00000000                    |........|
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT: ]
diff --git a/test/tools/llvm-readobj/sections.test b/test/tools/llvm-readobj/sections.test
new file mode 100644
index 000000000000..16f1131e05bd
--- /dev/null
+++ b/test/tools/llvm-readobj/sections.test
@@ -0,0 +1,452 @@
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-i386 \
+RUN:   | FileCheck %s -check-prefix MACHO-I386
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-x86-64 \
+RUN:   | FileCheck %s -check-prefix MACHO-X86-64
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-ppc \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-ppc64 \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC64
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-arm \
+RUN:   | FileCheck %s -check-prefix MACHO-ARM
+
+COFF:      Sections [
+COFF-NEXT:   Section {
+COFF-NEXT:     Number: 1
+COFF-NEXT:     Name: .text (2E 74 65 78 74 00 00 00)
+COFF-NEXT:     VirtualSize: 0x0
+COFF-NEXT:     VirtualAddress: 0x0
+COFF-NEXT:     RawDataSize: 22
+COFF-NEXT:     PointerToRawData: 0x64
+COFF-NEXT:     PointerToRelocations: 0x7A
+COFF-NEXT:     PointerToLineNumbers: 0x0
+COFF-NEXT:     RelocationCount: 3
+COFF-NEXT:     LineNumberCount: 0
+COFF-NEXT:     Characteristics [ (0x60500020)
+COFF-NEXT:       IMAGE_SCN_ALIGN_16BYTES (0x500000)
+COFF-NEXT:       IMAGE_SCN_CNT_CODE (0x20)
+COFF-NEXT:       IMAGE_SCN_MEM_EXECUTE (0x20000000)
+COFF-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+COFF-NEXT:     ]
+COFF-NEXT:   }
+COFF-NEXT:   Section {
+COFF-NEXT:     Number: 2
+COFF-NEXT:     Name: .data (2E 64 61 74 61 00 00 00)
+COFF-NEXT:     VirtualSize: 0x0
+COFF-NEXT:     VirtualAddress: 0x0
+COFF-NEXT:     RawDataSize: 13
+COFF-NEXT:     PointerToRawData: 0x98
+COFF-NEXT:     PointerToRelocations: 0x0
+COFF-NEXT:     PointerToLineNumbers: 0x0
+COFF-NEXT:     RelocationCount: 0
+COFF-NEXT:     LineNumberCount: 0
+COFF-NEXT:     Characteristics [ (0xC0300040)
+COFF-NEXT:       IMAGE_SCN_ALIGN_4BYTES (0x300000)
+COFF-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA (0x40)
+COFF-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+COFF-NEXT:       IMAGE_SCN_MEM_WRITE (0x80000000)
+COFF-NEXT:     ]
+COFF-NEXT:   }
+COFF-NEXT: ]
+
+ELF:      Sections [
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 0
+ELF-NEXT:     Name:  (0)
+ELF-NEXT:     Type: SHT_NULL (0x0)
+ELF-NEXT:     Flags [ (0x0)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x0
+ELF-NEXT:     Size: 0
+ELF-NEXT:     Link: 0
+ELF-NEXT:     Info: 0
+ELF-NEXT:     AddressAlignment: 0
+ELF-NEXT:     EntrySize: 0
+ELF-NEXT:   }
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 1
+ELF-NEXT:     Name: .text (5)
+ELF-NEXT:     Type: SHT_PROGBITS (0x1)
+ELF-NEXT:     Flags [ (0x6)
+ELF-NEXT:       SHF_ALLOC (0x2)
+ELF-NEXT:       SHF_EXECINSTR (0x4)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x40
+ELF-NEXT:     Size: 42
+ELF-NEXT:     Link: 0
+ELF-NEXT:     Info: 0
+ELF-NEXT:     AddressAlignment: 16
+ELF-NEXT:     EntrySize: 0
+ELF-NEXT:   }
+
+MACHO-I386:      Sections [
+MACHO-I386-NEXT:   Section {
+MACHO-I386-NEXT:     Index: 0
+MACHO-I386-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Address: 0x0
+MACHO-I386-NEXT:     Size: 0x22
+MACHO-I386-NEXT:     Offset: 324
+MACHO-I386-NEXT:     Alignment: 4
+MACHO-I386-NEXT:     RelocationOffset: 0x174
+MACHO-I386-NEXT:     RelocationCount: 4
+MACHO-I386-NEXT:     Type: 0x0
+MACHO-I386-NEXT:     Attributes [ (0x800004)
+MACHO-I386-NEXT:       PureInstructions (0x800000)
+MACHO-I386-NEXT:       SomeInstructions (0x4)
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     Reserved1: 0x0
+MACHO-I386-NEXT:     Reserved2: 0x0
+MACHO-I386-NEXT:   }
+MACHO-I386-NEXT:   Section {
+MACHO-I386-NEXT:     Index: 1
+MACHO-I386-NEXT:     Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Address: 0x22
+MACHO-I386-NEXT:     Size: 0xD
+MACHO-I386-NEXT:     Offset: 358
+MACHO-I386-NEXT:     Alignment: 0
+MACHO-I386-NEXT:     RelocationOffset: 0x0
+MACHO-I386-NEXT:     RelocationCount: 0
+MACHO-I386-NEXT:     Type: ExtReloc (0x2)
+MACHO-I386-NEXT:     Attributes [ (0x0)
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     Reserved1: 0x0
+MACHO-I386-NEXT:     Reserved2: 0x0
+MACHO-I386-NEXT:   }
+
+
+MACHO-X86-64:     Sections [
+MACHO-X86-64-NEXT:  Section {
+MACHO-X86-64-NEXT:    Index: 0
+MACHO-X86-64-NEXT:    Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Address: 0x0
+MACHO-X86-64-NEXT:    Size: 0x16
+MACHO-X86-64-NEXT:    Offset: 368
+MACHO-X86-64-NEXT:    Alignment: 4
+MACHO-X86-64-NEXT:    RelocationOffset: 0x194
+MACHO-X86-64-NEXT:    RelocationCount: 3
+MACHO-X86-64-NEXT:    Type: 0x0
+MACHO-X86-64-NEXT:    Attributes [ (0x800004)
+MACHO-X86-64-NEXT:      PureInstructions (0x800000)
+MACHO-X86-64-NEXT:      SomeInstructions (0x4)
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Reserved1: 0x0
+MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:  Section {
+MACHO-X86-64-NEXT:    Index: 1
+MACHO-X86-64-NEXT:    Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Address: 0x16
+MACHO-X86-64-NEXT:    Size: 0xD
+MACHO-X86-64-NEXT:    Offset: 390
+MACHO-X86-64-NEXT:    Alignment: 0
+MACHO-X86-64-NEXT:    RelocationOffset: 0x0
+MACHO-X86-64-NEXT:    RelocationCount: 0
+MACHO-X86-64-NEXT:    Type: ExtReloc (0x2)
+MACHO-X86-64-NEXT:    Attributes [ (0x0)
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Reserved1: 0x0
+MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:]
+
+MACHO-PPC: Sections [
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 0
+MACHO-PPC-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x0
+MACHO-PPC-NEXT:     Size: 0x3C
+MACHO-PPC-NEXT:     Offset: 528
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x27C
+MACHO-PPC-NEXT:     RelocationCount: 5
+MACHO-PPC-NEXT:     Type: 0x0
+MACHO-PPC-NEXT:     Attributes [ (0x800004)
+MACHO-PPC-NEXT:       PureInstructions (0x800000)
+MACHO-PPC-NEXT:       SomeInstructions (0x4)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 1
+MACHO-PPC-NEXT:     Name: __picsymbolstub1 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 31)
+MACHO-PPC-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x40
+MACHO-PPC-NEXT:     Size: 0x20
+MACHO-PPC-NEXT:     Offset: 592
+MACHO-PPC-NEXT:     Alignment: 5
+MACHO-PPC-NEXT:     RelocationOffset: 0x2A4
+MACHO-PPC-NEXT:     RelocationCount: 4
+MACHO-PPC-NEXT:     Type: 0x8
+MACHO-PPC-NEXT:     Attributes [ (0x800004)
+MACHO-PPC-NEXT:       PureInstructions (0x800000)
+MACHO-PPC-NEXT:       SomeInstructions (0x4)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x20
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 2
+MACHO-PPC-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x60
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 624
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x0
+MACHO-PPC-NEXT:     RelocationCount: 0
+MACHO-PPC-NEXT:     Type: 0x0
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 3
+MACHO-PPC-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x64
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 628
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x0
+MACHO-PPC-NEXT:     RelocationCount: 0
+MACHO-PPC-NEXT:     Type: 0x6
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x1
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 4
+MACHO-PPC-NEXT:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x68
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 632
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x2C4
+MACHO-PPC-NEXT:     RelocationCount: 1
+MACHO-PPC-NEXT:     Type: 0x7
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x2
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT: ]
+
+MACHO-PPC64: Sections [
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 0
+MACHO-PPC64-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x0
+MACHO-PPC64-NEXT:     Size: 0x3C
+MACHO-PPC64-NEXT:     Offset: 608
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x2D4
+MACHO-PPC64-NEXT:     RelocationCount: 5
+MACHO-PPC64-NEXT:     Type: 0x0
+MACHO-PPC64-NEXT:     Attributes [ (0x800004)
+MACHO-PPC64-NEXT:       PureInstructions (0x800000)
+MACHO-PPC64-NEXT:       SomeInstructions (0x4)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 1
+MACHO-PPC64-NEXT:     Name: __picsymbolstub1 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 31)
+MACHO-PPC64-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x40
+MACHO-PPC64-NEXT:     Size: 0x20
+MACHO-PPC64-NEXT:     Offset: 672
+MACHO-PPC64-NEXT:     Alignment: 5
+MACHO-PPC64-NEXT:     RelocationOffset: 0x2FC
+MACHO-PPC64-NEXT:     RelocationCount: 4
+MACHO-PPC64-NEXT:     Type: 0x8
+MACHO-PPC64-NEXT:     Attributes [ (0x800004)
+MACHO-PPC64-NEXT:       PureInstructions (0x800000)
+MACHO-PPC64-NEXT:       SomeInstructions (0x4)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x20
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 2
+MACHO-PPC64-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x60
+MACHO-PPC64-NEXT:     Size: 0x4
+MACHO-PPC64-NEXT:     Offset: 704
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x0
+MACHO-PPC64-NEXT:     RelocationCount: 0
+MACHO-PPC64-NEXT:     Type: 0x0
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 3
+MACHO-PPC64-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x64
+MACHO-PPC64-NEXT:     Size: 0x8
+MACHO-PPC64-NEXT:     Offset: 708
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x0
+MACHO-PPC64-NEXT:     RelocationCount: 0
+MACHO-PPC64-NEXT:     Type: 0x6
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x1
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 4
+MACHO-PPC64-NEXT:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x6C
+MACHO-PPC64-NEXT:     Size: 0x8
+MACHO-PPC64-NEXT:     Offset: 716
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x31C
+MACHO-PPC64-NEXT:     RelocationCount: 1
+MACHO-PPC64-NEXT:     Type: 0x7
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x2
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT: ]
+
+MACHO-ARM:      Sections [
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:    Index: 0
+MACHO-ARM-NEXT:    Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x0
+MACHO-ARM-NEXT:    Size: 0x3C
+MACHO-ARM-NEXT:    Offset: 664
+MACHO-ARM-NEXT:    Alignment: 2
+MACHO-ARM-NEXT:    RelocationOffset: 0x2E0
+MACHO-ARM-NEXT:    RelocationCount: 9
+MACHO-ARM-NEXT:    Type: 0x0
+MACHO-ARM-NEXT:    Attributes [ (0x800004)
+MACHO-ARM-NEXT:      PureInstructions (0x800000)
+MACHO-ARM-NEXT:      SomeInstructions (0x4)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 1
+MACHO-ARM-NEXT:    Name: __textcoal_nt (5F 5F 74 65 78 74 63 6F 61 6C 5F 6E 74 00 00 00)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x0
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 0
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0xB
+MACHO-ARM-NEXT:    Attributes [ (0x800000)
+MACHO-ARM-NEXT:      PureInstructions (0x800000)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 2
+MACHO-ARM-NEXT:    Name: __const_coal (5F 5F 63 6F 6E 73 74 5F 63 6F 61 6C 00 00 00 00)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x0
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 0
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0xB
+MACHO-ARM-NEXT:    Attributes [ (0x0)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 3
+MACHO-ARM-NEXT:    Name: __picsymbolstub4 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 34)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x0
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 0
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0x8
+MACHO-ARM-NEXT:    Attributes [ (0x0)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x10
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 4
+MACHO-ARM-NEXT:    Name: __StaticInit (5F 5F 53 74 61 74 69 63 49 6E 69 74 00 00 00 00)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x0
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 0
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0x0
+MACHO-ARM-NEXT:    Attributes [ (0x800000)
+MACHO-ARM-NEXT:      PureInstructions (0x800000)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 5
+MACHO-ARM-NEXT:    Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x4
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 2
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0x0
+MACHO-ARM-NEXT:    Attributes [ (0x0)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 6
+MACHO-ARM-NEXT:    Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-ARM-NEXT:    Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x40
+MACHO-ARM-NEXT:    Size: 0x8
+MACHO-ARM-NEXT:    Offset: 728
+MACHO-ARM-NEXT:    Alignment: 2
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0x6
+MACHO-ARM-NEXT:    Attributes [ (0x0)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:]
diff --git a/test/tools/llvm-readobj/symbols.test b/test/tools/llvm-readobj/symbols.test
new file mode 100644
index 000000000000..d33bd8ed2cd0
--- /dev/null
+++ b/test/tools/llvm-readobj/symbols.test
@@ -0,0 +1,44 @@
+RUN: llvm-readobj -t %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -t %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF
+
+COFF:      Symbols [
+COFF-NEXT:   Symbol {
+COFF-NEXT:     Name: .text
+COFF-NEXT:     Value: 0
+COFF-NEXT:     Section: .text (1)
+COFF-NEXT:     BaseType: Null (0x0)
+COFF-NEXT:     ComplexType: Null (0x0)
+COFF-NEXT:     StorageClass: Static (0x3)
+COFF-NEXT:     AuxSymbolCount: 1
+COFF-NEXT:     AuxSectionDef {
+COFF-NEXT:       Length: 22
+COFF-NEXT:       RelocationCount: 3
+COFF-NEXT:       LineNumberCount: 0
+COFF-NEXT:       Checksum: 0x0
+COFF-NEXT:       Number: 1
+COFF-NEXT:       Selection: 0x0
+COFF-NEXT:       Unused: (00 00 00)
+COFF-NEXT:     }
+COFF-NEXT:   }
+
+ELF:      Symbols [
+ELF-NEXT:   Symbol {
+ELF-NEXT:     Name: trivial.ll (1)
+ELF-NEXT:     Value: 0x0
+ELF-NEXT:     Size: 0
+ELF-NEXT:     Binding: Local (0x0)
+ELF-NEXT:     Type: File (0x4)
+ELF-NEXT:     Other: 0
+ELF-NEXT:     Section:  (0xFFF1)
+ELF-NEXT:   }
+ELF-NEXT:   Symbol {
+ELF-NEXT:     Name: .L.str (39)
+ELF-NEXT:     Value: 0x0
+ELF-NEXT:     Size: 13
+ELF-NEXT:     Binding: Local (0x0)
+ELF-NEXT:     Type: Object (0x1)
+ELF-NEXT:     Other: 0
+ELF-NEXT:     Section: .rodata.str1.1 (0x5)
+ELF-NEXT:   }
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 5e9560491e98..6b7c884516a5 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -43,6 +43,9 @@ add_subdirectory(llvm-mcmarkup)
 
 add_subdirectory(llvm-symbolizer)
 
+add_subdirectory(obj2yaml)
+add_subdirectory(yaml2obj)
+
 if( NOT WIN32 )
   add_subdirectory(lto)
 endif()
diff --git a/tools/Makefile b/tools/Makefile
index c405868cead5..eaf9ed357727 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -35,7 +35,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llvm-diff macho-dump llvm-objdump llvm-readobj \
 	         llvm-rtdyld llvm-dwarfdump llvm-cov \
 	         llvm-size llvm-stress llvm-mcmarkup \
-	         llvm-symbolizer
+	         llvm-symbolizer obj2yaml yaml2obj
 
 # If Intel JIT Events support is configured, build an extra tool to test it.
 ifeq ($(USE_INTEL_JITEVENTS), 1)
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index cede4ac3ae41..937d86a5b8aa 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -16,12 +16,12 @@
 #include "BugDriver.h"
 #include "ToolRunner.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <memory>
@@ -122,7 +122,7 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
   outs() << "Read input file      : '" << Filenames[0] << "'\n";
 
   for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
-    std::auto_ptr<Module> M(ParseInputFile(Filenames[i], Context));
+    OwningPtr<Module> M(ParseInputFile(Filenames[i], Context));
     if (M.get() == 0) return true;
 
     outs() << "Linking in input file: '" << Filenames[i] << "'\n";
diff --git a/tools/bugpoint/CMakeLists.txt b/tools/bugpoint/CMakeLists.txt
index 3c5e64fdab12..0000d977acf3 100644
--- a/tools/bugpoint/CMakeLists.txt
+++ b/tools/bugpoint/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(LLVM_LINK_COMPONENTS asmparser instrumentation scalaropts ipo
-  linker bitreader bitwriter vectorize objcarcopts)
+  linker bitreader bitwriter irreader vectorize objcarcopts)
 
 add_llvm_tool(bugpoint
   BugDriver.cpp
@@ -12,3 +12,4 @@ add_llvm_tool(bugpoint
   ToolRunner.cpp
   bugpoint.cpp
   )
+set_target_properties(bugpoint PROPERTIES ENABLE_EXPORTS 1)
diff --git a/tools/bugpoint/LLVMBuild.txt b/tools/bugpoint/LLVMBuild.txt
index e03c594bf936..01643553c5b5 100644
--- a/tools/bugpoint/LLVMBuild.txt
+++ b/tools/bugpoint/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = bugpoint
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Linker Scalar ObjCARC
+required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Linker Scalar ObjCARC
diff --git a/tools/bugpoint/Makefile b/tools/bugpoint/Makefile
index 65ffc13022f2..20493218b064 100644
--- a/tools/bugpoint/Makefile
+++ b/tools/bugpoint/Makefile
@@ -10,6 +10,6 @@
 LEVEL := ../..
 TOOLNAME := bugpoint
 LINK_COMPONENTS := asmparser instrumentation scalaropts ipo linker bitreader \
-                   bitwriter vectorize objcarcopts
+                   bitwriter irreader vectorize objcarcopts
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt
index 683f29862d5c..e5a5550e9ea6 100644
--- a/tools/llc/CMakeLists.txt
+++ b/tools/llc/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser irreader)
 
 add_llvm_tool(llc
   llc.cpp
diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt
index 8c8794f62069..45cdc6498f86 100644
--- a/tools/llc/LLVMBuild.txt
+++ b/tools/llc/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llc
 parent = Tools
-required_libraries = AsmParser BitReader all-targets
+required_libraries = AsmParser BitReader IRReader all-targets
diff --git a/tools/llc/Makefile b/tools/llc/Makefile
index b32d5575d53e..c24f378bc538 100644
--- a/tools/llc/Makefile
+++ b/tools/llc/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llc
-LINK_COMPONENTS := all-targets bitreader asmparser
+LINK_COMPONENTS := all-targets bitreader asmparser irreader
 
 include $(LEVEL)/Makefile.common
 
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index aa652234731a..8a462c608a84 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
@@ -28,11 +29,11 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
@@ -199,7 +200,7 @@ int main(int argc, char **argv) {
 static int compileModule(char **argv, LLVMContext &Context) {
   // Load the module to be compiled...
   SMDiagnostic Err;
-  std::auto_ptr<Module> M;
+  OwningPtr<Module> M;
   Module *mod = 0;
   Triple TheTriple;
 
@@ -280,7 +281,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
   Options.UseInitArray = UseInitArray;
   Options.SSPBufferSize = SSPBufferSize;
 
-  std::auto_ptr<TargetMachine>
+  OwningPtr<TargetMachine>
     target(TheTarget->createTargetMachine(TheTriple.getTriple(),
                                           MCPU, FeaturesStr, Options,
                                           RelocModel, CMModel, OLvl));
diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
index 356233f39742..aaa6598e7118 100644
--- a/tools/lli/CMakeLists.txt
+++ b/tools/lli/CMakeLists.txt
@@ -1,5 +1,5 @@
 
-set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag native)
+set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag native)
 
 if( LLVM_USE_OPROFILE )
   set(LLVM_LINK_COMPONENTS
diff --git a/tools/lli/LLVMBuild.txt b/tools/lli/LLVMBuild.txt
index 36ceb39b1270..5823792ff01c 100644
--- a/tools/lli/LLVMBuild.txt
+++ b/tools/lli/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = lli
 parent = Tools
-required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG Native
+required_libraries = AsmParser BitReader IRReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG Native
diff --git a/tools/lli/Makefile b/tools/lli/Makefile
index 85ac6b46bb5b..a6530584a2e9 100644
--- a/tools/lli/Makefile
+++ b/tools/lli/Makefile
@@ -12,7 +12,7 @@ TOOLNAME := lli
 
 include $(LEVEL)/Makefile.config
 
-LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag native
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag native
 
 # If Intel JIT Events support is confiured, link against the LLVM Intel JIT
 # Events interface library
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 332660fc1e0b..297763fcfbd8 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -29,11 +29,11 @@
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Memory.h"
@@ -42,6 +42,7 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cerrno>
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 273c4274b5d8..d6f191961daa 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -94,7 +94,7 @@ int main(int argc, char **argv) {
 
   // Parse the file now...
   SMDiagnostic Err;
-  std::auto_ptr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
+  OwningPtr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
   if (M.get() == 0) {
     Err.print(argv[0], errs());
     return 1;
diff --git a/tools/llvm-diff/CMakeLists.txt b/tools/llvm-diff/CMakeLists.txt
index c59d69ea0d45..0df8b9ed79e2 100644
--- a/tools/llvm-diff/CMakeLists.txt
+++ b/tools/llvm-diff/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS support asmparser bitreader)
+set(LLVM_LINK_COMPONENTS support asmparser bitreader irreader)
 
 add_llvm_tool(llvm-diff
   llvm-diff.cpp
diff --git a/tools/llvm-diff/LLVMBuild.txt b/tools/llvm-diff/LLVMBuild.txt
index fa06a03353bb..5adfdc2bd6e9 100644
--- a/tools/llvm-diff/LLVMBuild.txt
+++ b/tools/llvm-diff/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-diff
 parent = Tools
-required_libraries = AsmParser BitReader
+required_libraries = AsmParser BitReader IRReader
diff --git a/tools/llvm-diff/Makefile b/tools/llvm-diff/Makefile
index f7fa7159c54f..bd97a6a9f5e9 100644
--- a/tools/llvm-diff/Makefile
+++ b/tools/llvm-diff/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-diff
-LINK_COMPONENTS := asmparser bitreader
+LINK_COMPONENTS := asmparser bitreader irreader
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
index 0b9e92b1b8e2..6eca1e2bfcde 100644
--- a/tools/llvm-diff/llvm-diff.cpp
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -19,8 +19,8 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 2baa91da509e..067955e5cc61 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -123,7 +123,7 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
 
   std::string ErrorMessage;
-  std::auto_ptr<Module> M;
+  OwningPtr<Module> M;
 
   // Use the bitcode streaming interface
   DataStreamer *streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
diff --git a/tools/llvm-extract/CMakeLists.txt b/tools/llvm-extract/CMakeLists.txt
index a4e3266e3532..3163c4bbbddb 100644
--- a/tools/llvm-extract/CMakeLists.txt
+++ b/tools/llvm-extract/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS asmparser ipo bitreader bitwriter)
+set(LLVM_LINK_COMPONENTS asmparser ipo bitreader bitwriter irreader)
 
 add_llvm_tool(llvm-extract
   llvm-extract.cpp
diff --git a/tools/llvm-extract/LLVMBuild.txt b/tools/llvm-extract/LLVMBuild.txt
index 1b1a4c36cdd1..70e3507a73eb 100644
--- a/tools/llvm-extract/LLVMBuild.txt
+++ b/tools/llvm-extract/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-extract
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IPO
+required_libraries = AsmParser BitReader BitWriter IRReader IPO
diff --git a/tools/llvm-extract/Makefile b/tools/llvm-extract/Makefile
index a1e93f5ce468..d371c5475926 100644
--- a/tools/llvm-extract/Makefile
+++ b/tools/llvm-extract/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-extract
-LINK_COMPONENTS := ipo bitreader bitwriter asmparser
+LINK_COMPONENTS := ipo bitreader bitwriter asmparser irreader
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 85a921118a85..2f45b4eae5be 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -19,13 +19,14 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Transforms/IPO.h"
@@ -99,7 +100,7 @@ int main(int argc, char **argv) {
 
   // Use lazy loading, since we only care about selected global values.
   SMDiagnostic Err;
-  std::auto_ptr<Module> M;
+  OwningPtr<Module> M;
   M.reset(getLazyIRFileModule(InputFilename, Err, Context));
 
   if (M.get() == 0) {
diff --git a/tools/llvm-jitlistener/CMakeLists.txt b/tools/llvm-jitlistener/CMakeLists.txt
index d429af928f09..c9704fb22489 100644
--- a/tools/llvm-jitlistener/CMakeLists.txt
+++ b/tools/llvm-jitlistener/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS
   debuginfo
   inteljitevents
   interpreter
+  irreader
   jit
   mcjit
   nativecodegen
diff --git a/tools/llvm-jitlistener/LLVMBuild.txt b/tools/llvm-jitlistener/LLVMBuild.txt
index c436dd90f980..1ce78acecbb6 100644
--- a/tools/llvm-jitlistener/LLVMBuild.txt
+++ b/tools/llvm-jitlistener/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-jitlistener
 parent = Tools
-required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen Object SelectionDAG Native
+required_libraries = AsmParser BitReader IRReader Interpreter JIT MCJIT NativeCodeGen Object SelectionDAG Native
diff --git a/tools/llvm-jitlistener/Makefile b/tools/llvm-jitlistener/Makefile
index 30182355c9c0..b13222731745 100644
--- a/tools/llvm-jitlistener/Makefile
+++ b/tools/llvm-jitlistener/Makefile
@@ -12,7 +12,7 @@ TOOLNAME := llvm-jitlistener
 
 include $(LEVEL)/Makefile.config
 
-LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag Object
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag Object
 
 # If Intel JIT Events support is configured, link against the LLVM Intel JIT
 # Events interface library.  If not, this tool will do nothing useful, but it
diff --git a/tools/llvm-jitlistener/llvm-jitlistener.cpp b/tools/llvm-jitlistener/llvm-jitlistener.cpp
index d6f5032d6e62..dbaf075e91cf 100644
--- a/tools/llvm-jitlistener/llvm-jitlistener.cpp
+++ b/tools/llvm-jitlistener/llvm-jitlistener.cpp
@@ -22,9 +22,9 @@
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/ObjectImage.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
diff --git a/tools/llvm-link/CMakeLists.txt b/tools/llvm-link/CMakeLists.txt
index 11933f7f959e..4df53564e182 100644
--- a/tools/llvm-link/CMakeLists.txt
+++ b/tools/llvm-link/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser)
+set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser irreader)
 
 add_llvm_tool(llvm-link
   llvm-link.cpp
diff --git a/tools/llvm-link/LLVMBuild.txt b/tools/llvm-link/LLVMBuild.txt
index 6399dede784e..2e386f3c2316 100644
--- a/tools/llvm-link/LLVMBuild.txt
+++ b/tools/llvm-link/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-link
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter Linker
+required_libraries = AsmParser BitReader BitWriter IRReader Linker
diff --git a/tools/llvm-link/Makefile b/tools/llvm-link/Makefile
index 2553db0cd39c..ed30d2d256b8 100644
--- a/tools/llvm-link/Makefile
+++ b/tools/llvm-link/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-link
-LINK_COMPONENTS := linker bitreader bitwriter asmparser
+LINK_COMPONENTS := linker bitreader bitwriter asmparser irreader
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index f6c9f11a5e3c..3a2d84a53311 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -17,12 +17,13 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include <memory>
@@ -52,13 +53,12 @@ DumpAsm("d", cl::desc("Print assembly as linked"), cl::Hidden);
 // LoadFile - Read the specified bitcode file in and return it.  This routine
 // searches the link path for the specified file to try to find it...
 //
-static inline std::auto_ptr<Module> LoadFile(const char *argv0,
-                                             const std::string &FN, 
-                                             LLVMContext& Context) {
+static inline Module *LoadFile(const char *argv0, const std::string &FN,
+                               LLVMContext& Context) {
   sys::Path Filename;
   if (!Filename.set(FN)) {
     errs() << "Invalid file name: '" << FN << "'\n";
-    return std::auto_ptr<Module>();
+    return NULL;
   }
 
   SMDiagnostic Err;
@@ -67,10 +67,10 @@ static inline std::auto_ptr<Module> LoadFile(const char *argv0,
   
   const std::string &FNStr = Filename.str();
   Result = ParseIRFile(FNStr, Err, Context);
-  if (Result) return std::auto_ptr<Module>(Result);   // Load successful!
+  if (Result) return Result;   // Load successful!
 
   Err.print(argv0, errs());
-  return std::auto_ptr<Module>();
+  return NULL;
 }
 
 int main(int argc, char **argv) {
@@ -85,8 +85,8 @@ int main(int argc, char **argv) {
   unsigned BaseArg = 0;
   std::string ErrorMessage;
 
-  std::auto_ptr<Module> Composite(LoadFile(argv[0],
-                                           InputFilenames[BaseArg], Context));
+  OwningPtr<Module> Composite(LoadFile(argv[0],
+                                       InputFilenames[BaseArg], Context));
   if (Composite.get() == 0) {
     errs() << argv[0] << ": error loading file '"
            << InputFilenames[BaseArg] << "'\n";
@@ -94,8 +94,7 @@ int main(int argc, char **argv) {
   }
 
   for (unsigned i = BaseArg+1; i < InputFilenames.size(); ++i) {
-    std::auto_ptr<Module> M(LoadFile(argv[0],
-                                     InputFilenames[i], Context));
+    OwningPtr<Module> M(LoadFile(argv[0], InputFilenames[i], Context));
     if (M.get() == 0) {
       errs() << argv[0] << ": error loading file '" <<InputFilenames[i]<< "'\n";
       return 1;
@@ -111,9 +110,6 @@ int main(int argc, char **argv) {
     }
   }
 
-  // TODO: Iterate over the -l list and link in any modules containing
-  // global symbols that have not been resolved so far.
-
   if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;
 
   std::string ErrorInfo;
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index c324ff13a6b8..d78d7f31a6ca 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -27,6 +27,7 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
@@ -52,27 +53,11 @@ static cl::opt<bool>
 static cl::opt<std::string>
   DSYMFile("dsym", cl::desc("Use .dSYM file for debug info"));
 
-static const Target *GetTarget(const MachOObject *MachOObj) {
+static const Target *GetTarget(const MachOObjectFile *MachOObj) {
   // Figure out the target triple.
   if (TripleName.empty()) {
     llvm::Triple TT("unknown-unknown-unknown");
-    switch (MachOObj->getHeader().CPUType) {
-    case llvm::MachO::CPUTypeI386:
-      TT.setArch(Triple::ArchType(Triple::x86));
-      break;
-    case llvm::MachO::CPUTypeX86_64:
-      TT.setArch(Triple::ArchType(Triple::x86_64));
-      break;
-    case llvm::MachO::CPUTypeARM:
-      TT.setArch(Triple::ArchType(Triple::arm));
-      break;
-    case llvm::MachO::CPUTypePowerPC:
-      TT.setArch(Triple::ArchType(Triple::ppc));
-      break;
-    case llvm::MachO::CPUTypePowerPC64:
-      TT.setArch(Triple::ArchType(Triple::ppc64));
-      break;
-    }
+    TT.setArch(Triple::ArchType(MachOObj->getArch()));
     TripleName = TT.str();
   }
 
@@ -108,7 +93,7 @@ struct SymbolSorter {
 
 // Print additional information about an address, if available.
 static void DumpAddress(uint64_t Address, ArrayRef<SectionRef> Sections,
-                        MachOObject *MachOObj, raw_ostream &OS) {
+                        const MachOObjectFile *MachOObj, raw_ostream &OS) {
   for (unsigned i = 0; i != Sections.size(); ++i) {
     uint64_t SectAddr = 0, SectSize = 0;
     Sections[i].getAddress(SectAddr);
@@ -199,12 +184,12 @@ static void emitDOTFile(const char *FileName, const MCFunction &f,
   Out << "}\n";
 }
 
-static void getSectionsAndSymbols(const macho::Header &Header,
-                                  MachOObjectFile *MachOObj,
-                             InMemoryStruct<macho::SymtabLoadCommand> *SymtabLC,
-                                  std::vector<SectionRef> &Sections,
-                                  std::vector<SymbolRef> &Symbols,
-                                  SmallVectorImpl<uint64_t> &FoundFns) {
+static void
+getSectionsAndSymbols(const macho::Header Header,
+                      MachOObjectFile *MachOObj,
+                      std::vector<SectionRef> &Sections,
+                      std::vector<SymbolRef> &Symbols,
+                      SmallVectorImpl<uint64_t> &FoundFns) {
   error_code ec;
   for (symbol_iterator SI = MachOObj->begin_symbols(),
        SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
@@ -218,20 +203,28 @@ static void getSectionsAndSymbols(const macho::Header &Header,
     Sections.push_back(*SI);
   }
 
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI =
-       MachOObj->getObject()->getLoadCommandInfo(i);
-    if (LCI.Command.Type == macho::LCT_FunctionStarts) {
+  MachOObjectFile::LoadCommandInfo Command =
+    MachOObj->getFirstLoadCommandInfo();
+  for (unsigned i = 0; ; ++i) {
+    if (Command.C.Type == macho::LCT_FunctionStarts) {
       // We found a function starts segment, parse the addresses for later
       // consumption.
-      InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
-      MachOObj->getObject()->ReadLinkeditDataLoadCommand(LCI, LLC);
+      macho::LinkeditDataLoadCommand LLC =
+        MachOObj->getLinkeditDataLoadCommand(Command);
 
-      MachOObj->getObject()->ReadULEB128s(LLC->DataOffset, FoundFns);
+      MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
     }
+
+    if (i == Header.NumLoadCommands - 1)
+      break;
+    else
+      Command = MachOObj->getNextLoadCommandInfo(Command);
   }
 }
 
+static void DisassembleInputMachO2(StringRef Filename,
+                                   MachOObjectFile *MachOOF);
+
 void llvm::DisassembleInputMachO(StringRef Filename) {
   OwningPtr<MemoryBuffer> Buff;
 
@@ -242,9 +235,13 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
 
   OwningPtr<MachOObjectFile> MachOOF(static_cast<MachOObjectFile*>(
         ObjectFile::createMachOObjectFile(Buff.take())));
-  MachOObject *MachOObj = MachOOF->getObject();
 
-  const Target *TheTarget = GetTarget(MachOObj);
+  DisassembleInputMachO2(Filename, MachOOF.get());
+}
+
+static void DisassembleInputMachO2(StringRef Filename,
+                                   MachOObjectFile *MachOOF) {
+  const Target *TheTarget = GetTarget(MachOOF);
   if (!TheTarget) {
     // GetTarget prints out stuff.
     return;
@@ -272,31 +269,13 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
 
   outs() << '\n' << Filename << ":\n\n";
 
-  const macho::Header &Header = MachOObj->getHeader();
-
-  const MachOObject::LoadCommandInfo *SymtabLCI = 0;
-  // First, find the symbol table segment.
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
-    if (LCI.Command.Type == macho::LCT_Symtab) {
-      SymtabLCI = &LCI;
-      break;
-    }
-  }
-
-  // Read and register the symbol table data.
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
-  if (SymtabLCI) {
-    MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
-    MachOObj->RegisterStringTable(*SymtabLC);
-  }
+  macho::Header Header = MachOOF->getHeader();
 
   std::vector<SectionRef> Sections;
   std::vector<SymbolRef> Symbols;
   SmallVector<uint64_t, 8> FoundFns;
 
-  getSectionsAndSymbols(Header, MachOOF.get(), &SymtabLC, Sections, Symbols,
-                        FoundFns);
+  getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
 
   // Make a copy of the unsorted symbol list. FIXME: duplication
   std::vector<SymbolRef> UnsortedSymbols(Symbols);
@@ -310,7 +289,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
 #endif
 
   OwningPtr<DIContext> diContext;
-  ObjectFile *DbgObj = MachOOF.get();
+  ObjectFile *DbgObj = MachOOF;
   // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
     // A separate DSym file path was specified, parse it as a macho file,
@@ -337,10 +316,9 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
         SectName != "__text")
       continue; // Skip non-text sections
 
-    StringRef SegmentName;
     DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
-    if (MachOOF->getSectionFinalSegmentName(DR, SegmentName) ||
-        SegmentName != "__TEXT")
+    StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
+    if (SegmentName != "__TEXT")
       continue;
 
     // Insert the functions from the function starts segment into our map.
@@ -600,7 +578,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
                 Relocs[j].second.getName(SymName);
 
                 outs() << "\t# " << SymName << ' ';
-                DumpAddress(Addr, Sections, MachOObj, outs());
+                DumpAddress(Addr, Sections, MachOOF, outs());
               }
 
             // If this instructions contains an address, see if we can evaluate
@@ -609,7 +587,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
                                                           Inst.Address,
                                                           Inst.Size);
             if (targ != -1ULL)
-              DumpAddress(targ, Sections, MachOObj, outs());
+              DumpAddress(targ, Sections, MachOOF, outs());
 
             // Print debug info.
             if (diContext) {
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 322bd21b289d..99855995651e 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -228,6 +228,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       if (!error(i->containsSymbol(*si, contains)) && contains) {
         uint64_t Address;
         if (error(si->getAddress(Address))) break;
+        if (Address == UnknownAddressOrSize) continue;
         Address -= SectionAddr;
 
         StringRef Name;
@@ -254,10 +255,10 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
 
     StringRef SegmentName = "";
-    if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj)) {
+    if (const MachOObjectFile *MachO =
+        dyn_cast<const MachOObjectFile>(Obj)) {
       DataRefImpl DR = i->getRawDataRefImpl();
-      if (error(MachO->getSectionFinalSegmentName(DR, SegmentName)))
-        break;
+      SegmentName = MachO->getSectionFinalSegmentName(DR);
     }
     StringRef name;
     if (error(i->getName(name))) break;
@@ -459,11 +460,19 @@ static void PrintSectionContents(const ObjectFile *o) {
     StringRef Name;
     StringRef Contents;
     uint64_t BaseAddr;
+    bool BSS;
     if (error(si->getName(Name))) continue;
     if (error(si->getContents(Contents))) continue;
     if (error(si->getAddress(BaseAddr))) continue;
+    if (error(si->isBSS(BSS))) continue;
 
     outs() << "Contents of section " << Name << ":\n";
+    if (BSS) {
+      outs() << format("<skipping contents of bss section at [%04" PRIx64
+                       ", %04" PRIx64 ")>\n", BaseAddr,
+                       BaseAddr + Contents.size());
+      continue;
+    }
 
     // Dump out the content as hex and printable ascii characters.
     for (std::size_t addr = 0, end = Contents.size(); addr < end; addr += 16) {
@@ -591,11 +600,10 @@ static void PrintSymbolTable(const ObjectFile *o) {
       else if (Section == o->end_sections())
         outs() << "*UND*";
       else {
-        if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(o)) {
-          StringRef SegmentName;
+        if (const MachOObjectFile *MachO =
+            dyn_cast<const MachOObjectFile>(o)) {
           DataRefImpl DR = Section->getRawDataRefImpl();
-          if (error(MachO->getSectionFinalSegmentName(DR, SegmentName)))
-            SegmentName = "";
+          StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
           outs() << SegmentName << ",";
         }
         StringRef SectionName;
diff --git a/tools/llvm-ranlib/llvm-ranlib.cpp b/tools/llvm-ranlib/llvm-ranlib.cpp
index fe9d3e2954af..e3e3bad8d638 100644
--- a/tools/llvm-ranlib/llvm-ranlib.cpp
+++ b/tools/llvm-ranlib/llvm-ranlib.cpp
@@ -78,7 +78,7 @@ int main(int argc, char **argv) {
   }
 
   std::string err_msg;
-  std::auto_ptr<Archive>
+  OwningPtr<Archive>
     AutoArchive(Archive::OpenAndLoad(ArchivePath, Context, &err_msg));
   Archive* TheArchive = AutoArchive.get();
   if (!TheArchive) {
diff --git a/tools/llvm-readobj/CMakeLists.txt b/tools/llvm-readobj/CMakeLists.txt
index 676c23d7ae69..3d20def8f51c 100644
--- a/tools/llvm-readobj/CMakeLists.txt
+++ b/tools/llvm-readobj/CMakeLists.txt
@@ -1,6 +1,15 @@
-set(LLVM_LINK_COMPONENTS archive bitreader object)
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  archive
+  bitreader
+  object)
 
 add_llvm_tool(llvm-readobj
-  ELF.cpp
   llvm-readobj.cpp
+  ObjDumper.cpp
+  COFFDumper.cpp
+  ELFDumper.cpp
+  MachODumper.cpp
+  Error.cpp
+  StreamWriter.cpp
   )
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
new file mode 100644
index 000000000000..d600bc3fd2f6
--- /dev/null
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -0,0 +1,1021 @@
+//===-- COFFDumper.cpp - COFF-specific dumper -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the COFF-specific dumper for llvm-readobj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm-readobj.h"
+#include "ObjDumper.h"
+
+#include "Error.h"
+#include "StreamWriter.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/Win64EH.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+#include <algorithm>
+#include <cstring>
+#include <time.h>
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::Win64EH;
+
+namespace {
+
+class COFFDumper : public ObjDumper {
+public:
+  COFFDumper(const llvm::object::COFFObjectFile *Obj, StreamWriter& Writer)
+    : ObjDumper(Writer)
+    , Obj(Obj) {
+    cacheRelocations();
+  }
+
+  virtual void printFileHeaders() LLVM_OVERRIDE;
+  virtual void printSections() LLVM_OVERRIDE;
+  virtual void printRelocations() LLVM_OVERRIDE;
+  virtual void printSymbols() LLVM_OVERRIDE;
+  virtual void printDynamicSymbols() LLVM_OVERRIDE;
+  virtual void printUnwindInfo() LLVM_OVERRIDE;
+
+private:
+  void printSymbol(symbol_iterator SymI);
+
+  void printRelocation(section_iterator SecI, relocation_iterator RelI);
+
+  void printX64UnwindInfo();
+
+  void printRuntimeFunction(
+    const RuntimeFunction& RTF,
+    uint64_t OffsetInSection,
+    const std::vector<RelocationRef> &Rels);
+
+  void printUnwindInfo(
+    const Win64EH::UnwindInfo& UI,
+    uint64_t OffsetInSection,
+    const std::vector<RelocationRef> &Rels);
+
+  void printUnwindCode(const Win64EH::UnwindInfo& UI, ArrayRef<UnwindCode> UCs);
+
+  void cacheRelocations();
+
+  error_code getSectionContents(
+    const std::vector<RelocationRef> &Rels,
+    uint64_t Offset,
+    ArrayRef<uint8_t> &Contents,
+    uint64_t &Addr);
+
+  error_code getSection(
+    const std::vector<RelocationRef> &Rels,
+    uint64_t Offset,
+    const coff_section **Section,
+    uint64_t *AddrPtr);
+
+  typedef DenseMap<const coff_section*, std::vector<RelocationRef> > RelocMapTy;
+
+  const llvm::object::COFFObjectFile *Obj;
+  RelocMapTy RelocMap;
+  std::vector<RelocationRef> EmptyRelocs;
+};
+
+} // namespace
+
+
+namespace llvm {
+
+error_code createCOFFDumper(const object::ObjectFile *Obj,
+                            StreamWriter& Writer,
+                            OwningPtr<ObjDumper> &Result) {
+  const COFFObjectFile *COFFObj = dyn_cast<COFFObjectFile>(Obj);
+  if (!COFFObj)
+    return readobj_error::unsupported_obj_file_format;
+
+  Result.reset(new COFFDumper(COFFObj, Writer));
+  return readobj_error::success;
+}
+
+} // namespace llvm
+
+
+// Returns the name of the unwind code.
+static StringRef getUnwindCodeTypeName(uint8_t Code) {
+  switch(Code) {
+  default: llvm_unreachable("Invalid unwind code");
+  case UOP_PushNonVol: return "PUSH_NONVOL";
+  case UOP_AllocLarge: return "ALLOC_LARGE";
+  case UOP_AllocSmall: return "ALLOC_SMALL";
+  case UOP_SetFPReg: return "SET_FPREG";
+  case UOP_SaveNonVol: return "SAVE_NONVOL";
+  case UOP_SaveNonVolBig: return "SAVE_NONVOL_FAR";
+  case UOP_SaveXMM128: return "SAVE_XMM128";
+  case UOP_SaveXMM128Big: return "SAVE_XMM128_FAR";
+  case UOP_PushMachFrame: return "PUSH_MACHFRAME";
+  }
+}
+
+// Returns the name of a referenced register.
+static StringRef getUnwindRegisterName(uint8_t Reg) {
+  switch(Reg) {
+  default: llvm_unreachable("Invalid register");
+  case 0: return "RAX";
+  case 1: return "RCX";
+  case 2: return "RDX";
+  case 3: return "RBX";
+  case 4: return "RSP";
+  case 5: return "RBP";
+  case 6: return "RSI";
+  case 7: return "RDI";
+  case 8: return "R8";
+  case 9: return "R9";
+  case 10: return "R10";
+  case 11: return "R11";
+  case 12: return "R12";
+  case 13: return "R13";
+  case 14: return "R14";
+  case 15: return "R15";
+  }
+}
+
+// Calculates the number of array slots required for the unwind code.
+static unsigned getNumUsedSlots(const UnwindCode &UnwindCode) {
+  switch (UnwindCode.getUnwindOp()) {
+  default: llvm_unreachable("Invalid unwind code");
+  case UOP_PushNonVol:
+  case UOP_AllocSmall:
+  case UOP_SetFPReg:
+  case UOP_PushMachFrame:
+    return 1;
+  case UOP_SaveNonVol:
+  case UOP_SaveXMM128:
+    return 2;
+  case UOP_SaveNonVolBig:
+  case UOP_SaveXMM128Big:
+    return 3;
+  case UOP_AllocLarge:
+    return (UnwindCode.getOpInfo() == 0) ? 2 : 3;
+  }
+}
+
+// Given a symbol sym this functions returns the address and section of it.
+static error_code resolveSectionAndAddress(const COFFObjectFile *Obj,
+                                           const SymbolRef &Sym,
+                                           const coff_section *&ResolvedSection,
+                                           uint64_t &ResolvedAddr) {
+  if (error_code EC = Sym.getAddress(ResolvedAddr))
+    return EC;
+
+  section_iterator iter(Obj->begin_sections());
+  if (error_code EC = Sym.getSection(iter))
+    return EC;
+
+  ResolvedSection = Obj->getCOFFSection(iter);
+  return object_error::success;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function returns the symbol used for the relocation at the offset.
+static error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
+                                uint64_t Offset, SymbolRef &Sym) {
+  for (std::vector<RelocationRef>::const_iterator RelI = Rels.begin(),
+                                                  RelE = Rels.end();
+                                                  RelI != RelE; ++RelI) {
+    uint64_t Ofs;
+    if (error_code EC = RelI->getOffset(Ofs))
+      return EC;
+
+    if (Ofs == Offset) {
+      if (error_code EC = RelI->getSymbol(Sym))
+        return EC;
+      return readobj_error::success;
+    }
+  }
+
+  return readobj_error::unknown_symbol;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function returns the name of the symbol used for the relocation at the
+// offset.
+static error_code resolveSymbolName(const std::vector<RelocationRef> &Rels,
+                                    uint64_t Offset, StringRef &Name) {
+  SymbolRef Sym;
+  if (error_code EC = resolveSymbol(Rels, Offset, Sym)) return EC;
+  if (error_code EC = Sym.getName(Name)) return EC;
+  return object_error::success;
+}
+
+static const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_UNKNOWN  ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AM33     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AMD64    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARM      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARMV7    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_EBC      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_I386     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_IA64     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_M32R     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_MIPS16   ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_MIPSFPU  ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_MIPSFPU16),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_POWERPC  ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_POWERPCFP),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_R4000    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_SH3      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_SH3DSP   ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_SH4      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_SH5      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_THUMB    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_WCEMIPSV2)
+};
+
+static const EnumEntry<COFF::Characteristics> ImageFileCharacteristics[] = {
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_RELOCS_STRIPPED        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_EXECUTABLE_IMAGE       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_LINE_NUMS_STRIPPED     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_LOCAL_SYMS_STRIPPED    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_AGGRESSIVE_WS_TRIM     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_LARGE_ADDRESS_AWARE    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_BYTES_REVERSED_LO      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_32BIT_MACHINE          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_DEBUG_STRIPPED         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_NET_RUN_FROM_SWAP      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_SYSTEM                 ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_DLL                    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_UP_SYSTEM_ONLY         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_BYTES_REVERSED_HI      )
+};
+
+static const EnumEntry<COFF::SectionCharacteristics>
+ImageSectionCharacteristics[] = {
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_TYPE_NO_PAD           ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_CODE              ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_INITIALIZED_DATA  ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_UNINITIALIZED_DATA),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_OTHER             ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_INFO              ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_REMOVE            ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_COMDAT            ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_GPREL                 ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_PURGEABLE         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_16BIT             ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_LOCKED            ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_PRELOAD           ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_1BYTES          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_2BYTES          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_4BYTES          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_8BYTES          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_16BYTES         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_32BYTES         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_64BYTES         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_128BYTES        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_256BYTES        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_512BYTES        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_1024BYTES       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_2048BYTES       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_4096BYTES       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_8192BYTES       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_NRELOC_OVFL       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_DISCARDABLE       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_NOT_CACHED        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_NOT_PAGED         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_SHARED            ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_EXECUTE           ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_READ              ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_WRITE             )
+};
+
+static const EnumEntry<COFF::SymbolBaseType> ImageSymType[] = {
+  { "Null"  , COFF::IMAGE_SYM_TYPE_NULL   },
+  { "Void"  , COFF::IMAGE_SYM_TYPE_VOID   },
+  { "Char"  , COFF::IMAGE_SYM_TYPE_CHAR   },
+  { "Short" , COFF::IMAGE_SYM_TYPE_SHORT  },
+  { "Int"   , COFF::IMAGE_SYM_TYPE_INT    },
+  { "Long"  , COFF::IMAGE_SYM_TYPE_LONG   },
+  { "Float" , COFF::IMAGE_SYM_TYPE_FLOAT  },
+  { "Double", COFF::IMAGE_SYM_TYPE_DOUBLE },
+  { "Struct", COFF::IMAGE_SYM_TYPE_STRUCT },
+  { "Union" , COFF::IMAGE_SYM_TYPE_UNION  },
+  { "Enum"  , COFF::IMAGE_SYM_TYPE_ENUM   },
+  { "MOE"   , COFF::IMAGE_SYM_TYPE_MOE    },
+  { "Byte"  , COFF::IMAGE_SYM_TYPE_BYTE   },
+  { "Word"  , COFF::IMAGE_SYM_TYPE_WORD   },
+  { "UInt"  , COFF::IMAGE_SYM_TYPE_UINT   },
+  { "DWord" , COFF::IMAGE_SYM_TYPE_DWORD  }
+};
+
+static const EnumEntry<COFF::SymbolComplexType> ImageSymDType[] = {
+  { "Null"    , COFF::IMAGE_SYM_DTYPE_NULL     },
+  { "Pointer" , COFF::IMAGE_SYM_DTYPE_POINTER  },
+  { "Function", COFF::IMAGE_SYM_DTYPE_FUNCTION },
+  { "Array"   , COFF::IMAGE_SYM_DTYPE_ARRAY    }
+};
+
+static const EnumEntry<COFF::SymbolStorageClass> ImageSymClass[] = {
+  { "EndOfFunction"  , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION  },
+  { "Null"           , COFF::IMAGE_SYM_CLASS_NULL             },
+  { "Automatic"      , COFF::IMAGE_SYM_CLASS_AUTOMATIC        },
+  { "External"       , COFF::IMAGE_SYM_CLASS_EXTERNAL         },
+  { "Static"         , COFF::IMAGE_SYM_CLASS_STATIC           },
+  { "Register"       , COFF::IMAGE_SYM_CLASS_REGISTER         },
+  { "ExternalDef"    , COFF::IMAGE_SYM_CLASS_EXTERNAL_DEF     },
+  { "Label"          , COFF::IMAGE_SYM_CLASS_LABEL            },
+  { "UndefinedLabel" , COFF::IMAGE_SYM_CLASS_UNDEFINED_LABEL  },
+  { "MemberOfStruct" , COFF::IMAGE_SYM_CLASS_MEMBER_OF_STRUCT },
+  { "Argument"       , COFF::IMAGE_SYM_CLASS_ARGUMENT         },
+  { "StructTag"      , COFF::IMAGE_SYM_CLASS_STRUCT_TAG       },
+  { "MemberOfUnion"  , COFF::IMAGE_SYM_CLASS_MEMBER_OF_UNION  },
+  { "UnionTag"       , COFF::IMAGE_SYM_CLASS_UNION_TAG        },
+  { "TypeDefinition" , COFF::IMAGE_SYM_CLASS_TYPE_DEFINITION  },
+  { "UndefinedStatic", COFF::IMAGE_SYM_CLASS_UNDEFINED_STATIC },
+  { "EnumTag"        , COFF::IMAGE_SYM_CLASS_ENUM_TAG         },
+  { "MemberOfEnum"   , COFF::IMAGE_SYM_CLASS_MEMBER_OF_ENUM   },
+  { "RegisterParam"  , COFF::IMAGE_SYM_CLASS_REGISTER_PARAM   },
+  { "BitField"       , COFF::IMAGE_SYM_CLASS_BIT_FIELD        },
+  { "Block"          , COFF::IMAGE_SYM_CLASS_BLOCK            },
+  { "Function"       , COFF::IMAGE_SYM_CLASS_FUNCTION         },
+  { "EndOfStruct"    , COFF::IMAGE_SYM_CLASS_END_OF_STRUCT    },
+  { "File"           , COFF::IMAGE_SYM_CLASS_FILE             },
+  { "Section"        , COFF::IMAGE_SYM_CLASS_SECTION          },
+  { "WeakExternal"   , COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL    },
+  { "CLRToken"       , COFF::IMAGE_SYM_CLASS_CLR_TOKEN        }
+};
+
+static const EnumEntry<COFF::COMDATType> ImageCOMDATSelect[] = {
+  { "NoDuplicates", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES },
+  { "Any"         , COFF::IMAGE_COMDAT_SELECT_ANY          },
+  { "SameSize"    , COFF::IMAGE_COMDAT_SELECT_SAME_SIZE    },
+  { "ExactMatch"  , COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH  },
+  { "Associative" , COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE  },
+  { "Largest"     , COFF::IMAGE_COMDAT_SELECT_LARGEST      },
+  { "Newest"      , COFF::IMAGE_COMDAT_SELECT_NEWEST       }
+};
+
+static const EnumEntry<COFF::WeakExternalCharacteristics>
+WeakExternalCharacteristics[] = {
+  { "NoLibrary", COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY },
+  { "Library"  , COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY   },
+  { "Alias"    , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS     }
+};
+
+static const EnumEntry<unsigned> UnwindFlags[] = {
+  { "ExceptionHandler", Win64EH::UNW_ExceptionHandler },
+  { "TerminateHandler", Win64EH::UNW_TerminateHandler },
+  { "ChainInfo"       , Win64EH::UNW_ChainInfo        }
+};
+
+static const EnumEntry<unsigned> UnwindOpInfo[] = {
+  { "RAX",  0 },
+  { "RCX",  1 },
+  { "RDX",  2 },
+  { "RBX",  3 },
+  { "RSP",  4 },
+  { "RBP",  5 },
+  { "RSI",  6 },
+  { "RDI",  7 },
+  { "R8",   8 },
+  { "R9",   9 },
+  { "R10", 10 },
+  { "R11", 11 },
+  { "R12", 12 },
+  { "R13", 13 },
+  { "R14", 14 },
+  { "R15", 15 }
+};
+
+// Some additional COFF structures not defined by llvm::object.
+namespace {
+  struct coff_aux_function_definition {
+    support::ulittle32_t TagIndex;
+    support::ulittle32_t TotalSize;
+    support::ulittle32_t PointerToLineNumber;
+    support::ulittle32_t PointerToNextFunction;
+    uint8_t Unused[2];
+  };
+
+  struct coff_aux_weak_external_definition {
+    support::ulittle32_t TagIndex;
+    support::ulittle32_t Characteristics;
+    uint8_t Unused[10];
+  };
+
+  struct coff_aux_file_record {
+    char FileName[18];
+  };
+
+  struct coff_aux_clr_token {
+    support::ulittle8_t AuxType;
+    support::ulittle8_t Reserved;
+    support::ulittle32_t SymbolTableIndex;
+    uint8_t Unused[12];
+  };
+} // namespace
+
+static uint64_t getOffsetOfLSDA(const Win64EH::UnwindInfo& UI) {
+  return static_cast<const char*>(UI.getLanguageSpecificData())
+         - reinterpret_cast<const char*>(&UI);
+}
+
+static uint32_t getLargeSlotValue(ArrayRef<UnwindCode> UCs) {
+  if (UCs.size() < 3)
+    return 0;
+
+  return UCs[1].FrameOffset + (static_cast<uint32_t>(UCs[2].FrameOffset) << 16);
+}
+
+template<typename T>
+static error_code getSymbolAuxData(const COFFObjectFile *Obj,
+                                   const coff_symbol *Symbol, const T* &Aux) {
+  ArrayRef<uint8_t> AuxData = Obj->getSymbolAuxData(Symbol);
+  Aux = reinterpret_cast<const T*>(AuxData.data());
+  return readobj_error::success;
+}
+
+static std::string formatSymbol(const std::vector<RelocationRef> &Rels,
+                                uint64_t Offset, uint32_t Disp) {
+  std::string Buffer;
+  raw_string_ostream Str(Buffer);
+
+  StringRef Sym;
+  if (resolveSymbolName(Rels, Offset, Sym)) {
+    Str << format(" (0x%X)", Offset);
+    return Str.str();
+  }
+
+  Str << Sym;
+  if (Disp > 0) {
+    Str << format(" +0x%X (0x%X)", Disp, Offset);
+  } else {
+    Str << format(" (0x%X)", Offset);
+  }
+
+  return Str.str();
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function resolves the symbol used for the relocation at the offset and
+// returns the section content and the address inside the content pointed to
+// by the symbol.
+error_code COFFDumper::getSectionContents(
+    const std::vector<RelocationRef> &Rels, uint64_t Offset,
+    ArrayRef<uint8_t> &Contents, uint64_t &Addr) {
+
+  SymbolRef Sym;
+  const coff_section *Section;
+
+  if (error_code EC = resolveSymbol(Rels, Offset, Sym))
+    return EC;
+  if (error_code EC = resolveSectionAndAddress(Obj, Sym, Section, Addr))
+    return EC;
+  if (error_code EC = Obj->getSectionContents(Section, Contents))
+    return EC;
+
+  return object_error::success;
+}
+
+error_code COFFDumper::getSection(
+    const std::vector<RelocationRef> &Rels, uint64_t Offset,
+    const coff_section **SectionPtr, uint64_t *AddrPtr) {
+
+  SymbolRef Sym;
+  if (error_code EC = resolveSymbol(Rels, Offset, Sym))
+    return EC;
+
+  const coff_section *Section;
+  uint64_t Addr;
+  if (error_code EC = resolveSectionAndAddress(Obj, Sym, Section, Addr))
+    return EC;
+
+  if (SectionPtr)
+    *SectionPtr = Section;
+  if (AddrPtr)
+    *AddrPtr = Addr;
+
+  return object_error::success;
+}
+
+void COFFDumper::cacheRelocations() {
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC))
+      break;
+
+    const coff_section *Section = Obj->getCOFFSection(SecI);
+
+    for (relocation_iterator RelI = SecI->begin_relocations(),
+                             RelE = SecI->end_relocations();
+                             RelI != RelE; RelI.increment(EC)) {
+      if (error(EC))
+        break;
+
+      RelocMap[Section].push_back(*RelI);
+    }
+
+    // Sort relocations by address.
+    std::sort(RelocMap[Section].begin(), RelocMap[Section].end(),
+              relocAddressLess);
+  }
+}
+
+void COFFDumper::printFileHeaders() {
+  const coff_file_header *Header = 0;
+  if (error(Obj->getHeader(Header)))
+    return;
+
+  time_t TDS = Header->TimeDateStamp;
+  char FormattedTime[20] = { };
+  strftime(FormattedTime, 20, "%Y-%m-%d %H:%M:%S", gmtime(&TDS));
+
+  {
+    DictScope D(W, "ImageFileHeader");
+    W.printEnum  ("Machine", Header->Machine,
+                    makeArrayRef(ImageFileMachineType));
+    W.printNumber("SectionCount", Header->NumberOfSections);
+    W.printHex   ("TimeDateStamp", FormattedTime, Header->TimeDateStamp);
+    W.printHex   ("PointerToSymbolTable", Header->PointerToSymbolTable);
+    W.printNumber("SymbolCount", Header->NumberOfSymbols);
+    W.printNumber("OptionalHeaderSize", Header->SizeOfOptionalHeader);
+    W.printFlags ("Characteristics", Header->Characteristics,
+                    makeArrayRef(ImageFileCharacteristics));
+  }
+}
+
+void COFFDumper::printSections() {
+  error_code EC;
+
+  ListScope SectionsD(W, "Sections");
+  int SectionNumber = 0;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC))
+      break;
+
+    ++SectionNumber;
+    const coff_section *Section = Obj->getCOFFSection(SecI);
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+        Name = "";
+
+    DictScope D(W, "Section");
+    W.printNumber("Number", SectionNumber);
+    W.printBinary("Name", Name, Section->Name);
+    W.printHex   ("VirtualSize", Section->VirtualSize);
+    W.printHex   ("VirtualAddress", Section->VirtualAddress);
+    W.printNumber("RawDataSize", Section->SizeOfRawData);
+    W.printHex   ("PointerToRawData", Section->PointerToRawData);
+    W.printHex   ("PointerToRelocations", Section->PointerToRelocations);
+    W.printHex   ("PointerToLineNumbers", Section->PointerToLinenumbers);
+    W.printNumber("RelocationCount", Section->NumberOfRelocations);
+    W.printNumber("LineNumberCount", Section->NumberOfLinenumbers);
+    W.printFlags ("Characteristics", Section->Characteristics,
+                    makeArrayRef(ImageSectionCharacteristics),
+                    COFF::SectionCharacteristics(0x00F00000));
+
+    if (opts::SectionRelocations) {
+      ListScope D(W, "Relocations");
+      for (relocation_iterator RelI = SecI->begin_relocations(),
+                               RelE = SecI->end_relocations();
+                               RelI != RelE; RelI.increment(EC)) {
+        if (error(EC)) break;
+
+        printRelocation(SecI, RelI);
+      }
+    }
+
+    if (opts::SectionSymbols) {
+      ListScope D(W, "Symbols");
+      for (symbol_iterator SymI = Obj->begin_symbols(),
+                           SymE = Obj->end_symbols();
+                           SymI != SymE; SymI.increment(EC)) {
+        if (error(EC)) break;
+
+        bool Contained = false;
+        if (SecI->containsSymbol(*SymI, Contained) || !Contained)
+          continue;
+
+        printSymbol(SymI);
+      }
+    }
+
+    if (opts::SectionData) {
+      StringRef Data;
+      if (error(SecI->getContents(Data))) break;
+
+      W.printBinaryBlock("SectionData", Data);
+    }
+  }
+}
+
+void COFFDumper::printRelocations() {
+  ListScope D(W, "Relocations");
+
+  error_code EC;
+  int SectionNumber = 0;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    ++SectionNumber;
+    if (error(EC))
+      break;
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+      continue;
+
+    bool PrintedGroup = false;
+    for (relocation_iterator RelI = SecI->begin_relocations(),
+                             RelE = SecI->end_relocations();
+                             RelI != RelE; RelI.increment(EC)) {
+      if (error(EC)) break;
+
+      if (!PrintedGroup) {
+        W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n";
+        W.indent();
+        PrintedGroup = true;
+      }
+
+      printRelocation(SecI, RelI);
+    }
+
+    if (PrintedGroup) {
+      W.unindent();
+      W.startLine() << "}\n";
+    }
+  }
+}
+
+void COFFDumper::printRelocation(section_iterator SecI,
+                                 relocation_iterator RelI) {
+  uint64_t Offset;
+  uint64_t RelocType;
+  SmallString<32> RelocName;
+  SymbolRef Symbol;
+  StringRef SymbolName;
+  StringRef Contents;
+  if (error(RelI->getOffset(Offset))) return;
+  if (error(RelI->getType(RelocType))) return;
+  if (error(RelI->getTypeName(RelocName))) return;
+  if (error(RelI->getSymbol(Symbol))) return;
+  if (error(Symbol.getName(SymbolName))) return;
+  if (error(SecI->getContents(Contents))) return;
+
+  if (opts::ExpandRelocs) {
+    DictScope Group(W, "Relocation");
+    W.printHex("Offset", Offset);
+    W.printNumber("Type", RelocName, RelocType);
+    W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
+  } else {
+    raw_ostream& OS = W.startLine();
+    OS << W.hex(Offset)
+       << " " << RelocName
+       << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+       << "\n";
+  }
+}
+
+void COFFDumper::printSymbols() {
+  ListScope Group(W, "Symbols");
+
+  error_code EC;
+  for (symbol_iterator SymI = Obj->begin_symbols(),
+                       SymE = Obj->end_symbols();
+                       SymI != SymE; SymI.increment(EC)) {
+    if (error(EC)) break;
+
+    printSymbol(SymI);
+  }
+}
+
+void COFFDumper::printDynamicSymbols() {
+  ListScope Group(W, "DynamicSymbols");
+}
+
+void COFFDumper::printSymbol(symbol_iterator SymI) {
+  DictScope D(W, "Symbol");
+
+  const coff_symbol *Symbol = Obj->getCOFFSymbol(SymI);
+  const coff_section *Section;
+  if (error_code EC = Obj->getSection(Symbol->SectionNumber, Section)) {
+    W.startLine() << "Invalid section number: " << EC.message() << "\n";
+    W.flush();
+    return;
+  }
+
+  StringRef SymbolName;
+  if (Obj->getSymbolName(Symbol, SymbolName))
+    SymbolName = "";
+
+  StringRef SectionName;
+  if (Section && Obj->getSectionName(Section, SectionName))
+    SectionName = "";
+
+  W.printString("Name", SymbolName);
+  W.printNumber("Value", Symbol->Value);
+  W.printNumber("Section", SectionName, Symbol->SectionNumber);
+  W.printEnum  ("BaseType", Symbol->getBaseType(), makeArrayRef(ImageSymType));
+  W.printEnum  ("ComplexType", Symbol->getComplexType(),
+                                                   makeArrayRef(ImageSymDType));
+  W.printEnum  ("StorageClass", Symbol->StorageClass,
+                                                   makeArrayRef(ImageSymClass));
+  W.printNumber("AuxSymbolCount", Symbol->NumberOfAuxSymbols);
+
+  for (unsigned I = 0; I < Symbol->NumberOfAuxSymbols; ++I) {
+    if (Symbol->StorageClass     == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+        Symbol->getBaseType()    == COFF::IMAGE_SYM_TYPE_NULL &&
+        Symbol->getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION &&
+        Symbol->SectionNumber > 0) {
+      const coff_aux_function_definition *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+      DictScope AS(W, "AuxFunctionDef");
+      W.printNumber("TagIndex", Aux->TagIndex);
+      W.printNumber("TotalSize", Aux->TotalSize);
+      W.printHex("PointerToLineNumber", Aux->PointerToLineNumber);
+      W.printHex("PointerToNextFunction", Aux->PointerToNextFunction);
+      W.printBinary("Unused", makeArrayRef(Aux->Unused));
+
+    } else if (
+        Symbol->StorageClass   == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL ||
+        (Symbol->StorageClass  == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+         Symbol->SectionNumber == 0 &&
+         Symbol->Value         == 0)) {
+      const coff_aux_weak_external_definition *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+      const coff_symbol *Linked;
+      StringRef LinkedName;
+      error_code EC;
+      if ((EC = Obj->getSymbol(Aux->TagIndex, Linked)) ||
+          (EC = Obj->getSymbolName(Linked, LinkedName))) {
+        LinkedName = "";
+        error(EC);
+      }
+
+      DictScope AS(W, "AuxWeakExternal");
+      W.printNumber("Linked", LinkedName, Aux->TagIndex);
+      W.printEnum  ("Search", Aux->Characteristics,
+                    makeArrayRef(WeakExternalCharacteristics));
+      W.printBinary("Unused", Aux->Unused);
+
+    } else if (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_FILE) {
+      const coff_aux_file_record *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+    } else if (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC) {
+      const coff_aux_section_definition *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+      DictScope AS(W, "AuxSectionDef");
+      W.printNumber("Length", Aux->Length);
+      W.printNumber("RelocationCount", Aux->NumberOfRelocations);
+      W.printNumber("LineNumberCount", Aux->NumberOfLinenumbers);
+      W.printHex("Checksum", Aux->CheckSum);
+      W.printNumber("Number", Aux->Number);
+      W.printEnum("Selection", Aux->Selection, makeArrayRef(ImageCOMDATSelect));
+      W.printBinary("Unused", makeArrayRef(Aux->Unused));
+
+      if (Section->Characteristics & COFF::IMAGE_SCN_LNK_COMDAT
+          && Aux->Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+        const coff_section *Assoc;
+        StringRef AssocName;
+        error_code EC;
+        if ((EC = Obj->getSection(Aux->Number, Assoc)) ||
+            (EC = Obj->getSectionName(Assoc, AssocName))) {
+          AssocName = "";
+          error(EC);
+        }
+
+        W.printNumber("AssocSection", AssocName, Aux->Number);
+      }
+    } else if (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_CLR_TOKEN) {
+      const coff_aux_clr_token *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+      DictScope AS(W, "AuxCLRToken");
+      W.printNumber("AuxType", Aux->AuxType);
+      W.printNumber("Reserved", Aux->Reserved);
+      W.printNumber("SymbolTableIndex", Aux->SymbolTableIndex);
+      W.printBinary("Unused", Aux->Unused);
+
+    } else {
+      W.startLine() << "<unhandled auxiliary record>\n";
+    }
+  }
+}
+
+void COFFDumper::printUnwindInfo() {
+  const coff_file_header *Header;
+  if (error(Obj->getHeader(Header)))
+    return;
+
+  ListScope D(W, "UnwindInformation");
+  if (Header->Machine != COFF::IMAGE_FILE_MACHINE_AMD64) {
+    W.startLine() << "Unsupported image machine type "
+              "(currently only AMD64 is supported).\n";
+    return;
+  }
+
+  printX64UnwindInfo();
+}
+
+void COFFDumper::printX64UnwindInfo() {
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+      continue;
+    if (Name != ".pdata" && !Name.startswith(".pdata$"))
+      continue;
+
+    const coff_section *PData = Obj->getCOFFSection(SecI);
+
+    ArrayRef<uint8_t> Contents;
+    if (error(Obj->getSectionContents(PData, Contents)) ||
+        Contents.empty())
+      continue;
+
+    ArrayRef<RuntimeFunction> RFs(
+      reinterpret_cast<const RuntimeFunction *>(Contents.data()),
+      Contents.size() / sizeof(RuntimeFunction));
+
+    for (const RuntimeFunction *I = RFs.begin(), *E = RFs.end(); I < E; ++I) {
+      const uint64_t OffsetInSection = std::distance(RFs.begin(), I)
+                                     * sizeof(RuntimeFunction);
+
+      printRuntimeFunction(*I, OffsetInSection, RelocMap[PData]);
+    }
+  }
+}
+
+void COFFDumper::printRuntimeFunction(
+    const RuntimeFunction& RTF,
+    uint64_t OffsetInSection,
+    const std::vector<RelocationRef> &Rels) {
+
+  DictScope D(W, "RuntimeFunction");
+  W.printString("StartAddress",
+                formatSymbol(Rels, OffsetInSection + 0, RTF.StartAddress));
+  W.printString("EndAddress",
+                formatSymbol(Rels, OffsetInSection + 4, RTF.EndAddress));
+  W.printString("UnwindInfoAddress",
+                formatSymbol(Rels, OffsetInSection + 8, RTF.UnwindInfoOffset));
+
+  const coff_section* XData = 0;
+  uint64_t UnwindInfoOffset = 0;
+  if (error(getSection(Rels, OffsetInSection + 8, &XData, &UnwindInfoOffset)))
+    return;
+
+  ArrayRef<uint8_t> XContents;
+  if (error(Obj->getSectionContents(XData, XContents)) || XContents.empty())
+    return;
+
+  UnwindInfoOffset += RTF.UnwindInfoOffset;
+  if (UnwindInfoOffset > XContents.size())
+    return;
+
+  const Win64EH::UnwindInfo *UI =
+    reinterpret_cast<const Win64EH::UnwindInfo *>(
+      XContents.data() + UnwindInfoOffset);
+
+  printUnwindInfo(*UI, UnwindInfoOffset, RelocMap[XData]);
+}
+
+void COFFDumper::printUnwindInfo(
+    const Win64EH::UnwindInfo& UI,
+    uint64_t OffsetInSection,
+    const std::vector<RelocationRef> &Rels) {
+  DictScope D(W, "UnwindInfo");
+  W.printNumber("Version", UI.getVersion());
+  W.printFlags("Flags", UI.getFlags(), makeArrayRef(UnwindFlags));
+  W.printNumber("PrologSize", UI.PrologSize);
+  if (UI.getFrameRegister() != 0) {
+    W.printEnum("FrameRegister", UI.getFrameRegister(),
+                makeArrayRef(UnwindOpInfo));
+    W.printHex("FrameOffset", UI.getFrameOffset());
+  } else {
+    W.printString("FrameRegister", StringRef("-"));
+    W.printString("FrameOffset", StringRef("-"));
+  }
+
+  W.printNumber("UnwindCodeCount", UI.NumCodes);
+  {
+    ListScope CodesD(W, "UnwindCodes");
+    ArrayRef<UnwindCode> UCs(&UI.UnwindCodes[0], UI.NumCodes);
+    for (const UnwindCode *I = UCs.begin(), *E = UCs.end(); I < E; ++I) {
+      unsigned UsedSlots = getNumUsedSlots(*I);
+      if (UsedSlots > UCs.size()) {
+        errs() << "Corrupt unwind data";
+        return;
+      }
+      printUnwindCode(UI, ArrayRef<UnwindCode>(I, E));
+      I += UsedSlots - 1;
+    }
+  }
+
+  uint64_t LSDAOffset = OffsetInSection + getOffsetOfLSDA(UI);
+  if (UI.getFlags() & (UNW_ExceptionHandler | UNW_TerminateHandler)) {
+    W.printString("Handler", formatSymbol(Rels, LSDAOffset,
+                                        UI.getLanguageSpecificHandlerOffset()));
+  } else if (UI.getFlags() & UNW_ChainInfo) {
+    const RuntimeFunction *Chained = UI.getChainedFunctionEntry();
+    if (Chained) {
+      DictScope D(W, "Chained");
+      W.printString("StartAddress", formatSymbol(Rels, LSDAOffset + 0,
+                                                        Chained->StartAddress));
+      W.printString("EndAddress", formatSymbol(Rels, LSDAOffset + 4,
+                                                          Chained->EndAddress));
+      W.printString("UnwindInfoAddress", formatSymbol(Rels, LSDAOffset + 8,
+                                                    Chained->UnwindInfoOffset));
+    }
+  }
+}
+
+// Prints one unwind code. Because an unwind code can occupy up to 3 slots in
+// the unwind codes array, this function requires that the correct number of
+// slots is provided.
+void COFFDumper::printUnwindCode(const Win64EH::UnwindInfo& UI,
+                                 ArrayRef<UnwindCode> UCs) {
+  assert(UCs.size() >= getNumUsedSlots(UCs[0]));
+
+  W.startLine() << format("0x%02X: ", unsigned(UCs[0].u.CodeOffset))
+                << getUnwindCodeTypeName(UCs[0].getUnwindOp());
+
+  uint32_t AllocSize = 0;
+
+  switch (UCs[0].getUnwindOp()) {
+  case UOP_PushNonVol:
+    outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo());
+    break;
+
+  case UOP_AllocLarge:
+    if (UCs[0].getOpInfo() == 0) {
+      AllocSize = UCs[1].FrameOffset * 8;
+    } else {
+      AllocSize = getLargeSlotValue(UCs);
+    }
+    outs() << " size=" << AllocSize;
+    break;
+  case UOP_AllocSmall:
+    outs() << " size=" << ((UCs[0].getOpInfo() + 1) * 8);
+    break;
+  case UOP_SetFPReg:
+    if (UI.getFrameRegister() == 0) {
+      outs() << " reg=<invalid>";
+    } else {
+      outs() << " reg=" << getUnwindRegisterName(UI.getFrameRegister())
+             << format(", offset=0x%X", UI.getFrameOffset() * 16);
+    }
+    break;
+  case UOP_SaveNonVol:
+    outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo())
+           << format(", offset=0x%X", UCs[1].FrameOffset * 8);
+    break;
+  case UOP_SaveNonVolBig:
+    outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo())
+           << format(", offset=0x%X", getLargeSlotValue(UCs));
+    break;
+  case UOP_SaveXMM128:
+    outs() << " reg=XMM" << static_cast<uint32_t>(UCs[0].getOpInfo())
+           << format(", offset=0x%X", UCs[1].FrameOffset * 16);
+    break;
+  case UOP_SaveXMM128Big:
+    outs() << " reg=XMM" << static_cast<uint32_t>(UCs[0].getOpInfo())
+           << format(", offset=0x%X", getLargeSlotValue(UCs));
+    break;
+  case UOP_PushMachFrame:
+    outs() << " errcode=" << (UCs[0].getOpInfo() == 0 ? "no" : "yes");
+    break;
+  }
+
+  outs() << "\n";
+}
diff --git a/tools/llvm-readobj/ELF.cpp b/tools/llvm-readobj/ELF.cpp
deleted file mode 100644
index 07f15b3a6d42..000000000000
--- a/tools/llvm-readobj/ELF.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//===- llvm-readobj/ELF.cpp - ELF Specific Dumper -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm-readobj.h"
-
-#include "llvm/Object/ELF.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Format.h"
-
-namespace llvm {
-using namespace object;
-using namespace ELF;
-
-const char *getTypeString(uint64_t Type) {
-  switch (Type) {
-  case DT_BIND_NOW:
-    return "(BIND_NOW)";
-  case DT_DEBUG:
-    return "(DEBUG)";
-  case DT_FINI:
-    return "(FINI)";
-  case DT_FINI_ARRAY:
-    return "(FINI_ARRAY)";
-  case DT_FINI_ARRAYSZ:
-    return "(FINI_ARRAYSZ)";
-  case DT_FLAGS:
-    return "(FLAGS)";
-  case DT_HASH:
-    return "(HASH)";
-  case DT_INIT:
-    return "(INIT)";
-  case DT_INIT_ARRAY:
-    return "(INIT_ARRAY)";
-  case DT_INIT_ARRAYSZ:
-    return "(INIT_ARRAYSZ)";
-  case DT_PREINIT_ARRAY:
-    return "(PREINIT_ARRAY)";
-  case DT_PREINIT_ARRAYSZ:
-    return "(PREINIT_ARRAYSZ)";
-  case DT_JMPREL:
-    return "(JMPREL)";
-  case DT_NEEDED:
-    return "(NEEDED)";
-  case DT_NULL:
-    return "(NULL)";
-  case DT_PLTGOT:
-    return "(PLTGOT)";
-  case DT_PLTREL:
-    return "(PLTREL)";
-  case DT_PLTRELSZ:
-    return "(PLTRELSZ)";
-  case DT_REL:
-    return "(REL)";
-  case DT_RELA:
-    return "(RELA)";
-  case DT_RELENT:
-    return "(RELENT)";
-  case DT_RELSZ:
-    return "(RELSZ)";
-  case DT_RELAENT:
-    return "(RELAENT)";
-  case DT_RELASZ:
-    return "(RELASZ)";
-  case DT_RPATH:
-    return "(RPATH)";
-  case DT_RUNPATH:
-    return "(RUNPATH)";
-  case DT_SONAME:
-    return "(SONAME)";
-  case DT_STRSZ:
-    return "(STRSZ)";
-  case DT_STRTAB:
-    return "(STRTAB)";
-  case DT_SYMBOLIC:
-    return "(SYMBOLIC)";
-  case DT_SYMENT:
-    return "(SYMENT)";
-  case DT_SYMTAB:
-    return "(SYMTAB)";
-  case DT_TEXTREL:
-    return "(TEXTREL)";
-  default:
-    return "unknown";
-  }
-}
-
-template <class ELFT>
-void printValue(const ELFObjectFile<ELFT> *O, uint64_t Type, uint64_t Value,
-                bool Is64, raw_ostream &OS) {
-  switch (Type) {
-  case DT_PLTREL:
-    if (Value == DT_REL) {
-      OS << "REL";
-      break;
-    } else if (Value == DT_RELA) {
-      OS << "RELA";
-      break;
-    }
-  // Fallthrough.
-  case DT_PLTGOT:
-  case DT_HASH:
-  case DT_STRTAB:
-  case DT_SYMTAB:
-  case DT_RELA:
-  case DT_INIT:
-  case DT_FINI:
-  case DT_REL:
-  case DT_JMPREL:
-  case DT_INIT_ARRAY:
-  case DT_FINI_ARRAY:
-  case DT_PREINIT_ARRAY:
-  case DT_DEBUG:
-  case DT_NULL:
-    OS << format("0x%" PRIx64, Value);
-    break;
-  case DT_PLTRELSZ:
-  case DT_RELASZ:
-  case DT_RELAENT:
-  case DT_STRSZ:
-  case DT_SYMENT:
-  case DT_RELSZ:
-  case DT_RELENT:
-  case DT_INIT_ARRAYSZ:
-  case DT_FINI_ARRAYSZ:
-  case DT_PREINIT_ARRAYSZ:
-    OS << Value << " (bytes)";
-    break;
-  case DT_NEEDED:
-    OS << "Shared library: ["
-       << O->getString(O->getDynamicStringTableSectionHeader(), Value) << "]";
-    break;
-  case DT_SONAME:
-    OS << "Library soname: ["
-       << O->getString(O->getDynamicStringTableSectionHeader(), Value) << "]";
-    break;
-  }
-}
-
-template <class ELFT>
-ErrorOr<void> dumpDynamicTable(const ELFObjectFile<ELFT> *O, raw_ostream &OS) {
-  typedef ELFObjectFile<ELFT> ELFO;
-  typedef typename ELFO::Elf_Dyn_iterator EDI;
-  EDI Start = O->begin_dynamic_table(),
-      End = O->end_dynamic_table(true);
-
-  if (Start == End)
-    return error_code::success();
-
-  ptrdiff_t Total = std::distance(Start, End);
-  OS << "Dynamic section contains " << Total << " entries\n";
-
-  bool Is64 = O->getBytesInAddress() == 8;
-
-  OS << "  Tag" << (Is64 ? "                " : "        ") << "Type"
-     << "                 " << "Name/Value\n";
-  for (; Start != End; ++Start) {
-    OS << " "
-       << format(Is64 ? "0x%016" PRIx64 : "0x%08" PRIx64, Start->getTag())
-       << " " << format("%-21s", getTypeString(Start->getTag()));
-    printValue(O, Start->getTag(), Start->getVal(), Is64, OS);
-    OS << "\n";
-  }
-
-  OS << "  Total: " << Total << "\n\n";
-  return error_code::success();
-}
-
-ErrorOr<void> dumpELFDynamicTable(ObjectFile *O, raw_ostream &OS) {
-  // Little-endian 32-bit
-  if (const ELFObjectFile<ELFType<support::little, 4, false> > *ELFObj =
-          dyn_cast<ELFObjectFile<ELFType<support::little, 4, false> > >(O))
-    return dumpDynamicTable(ELFObj, OS);
-
-  // Big-endian 32-bit
-  if (const ELFObjectFile<ELFType<support::big, 4, false> > *ELFObj =
-          dyn_cast<ELFObjectFile<ELFType<support::big, 4, false> > >(O))
-    return dumpDynamicTable(ELFObj, OS);
-
-  // Little-endian 64-bit
-  if (const ELFObjectFile<ELFType<support::little, 8, true> > *ELFObj =
-          dyn_cast<ELFObjectFile<ELFType<support::little, 8, true> > >(O))
-    return dumpDynamicTable(ELFObj, OS);
-
-  // Big-endian 64-bit
-  if (const ELFObjectFile<ELFType<support::big, 8, true> > *ELFObj =
-          dyn_cast<ELFObjectFile<ELFType<support::big, 8, true> > >(O))
-    return dumpDynamicTable(ELFObj, OS);
-  return error_code(object_error::invalid_file_type);
-}
-} // end namespace llvm
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
new file mode 100644
index 000000000000..3757b09c394e
--- /dev/null
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -0,0 +1,856 @@
+//===-- ELFDumper.cpp - ELF-specific dumper ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the ELF-specific dumper for llvm-readobj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm-readobj.h"
+#include "Error.h"
+#include "ObjDumper.h"
+#include "StreamWriter.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace ELF;
+
+
+#define LLVM_READOBJ_ENUM_CASE(ns, enum) \
+  case ns::enum: return #enum;
+
+namespace {
+
+template<typename ELFT>
+class ELFDumper : public ObjDumper {
+public:
+  ELFDumper(const ELFObjectFile<ELFT> *Obj, StreamWriter& Writer)
+    : ObjDumper(Writer)
+    , Obj(Obj) { }
+
+  virtual void printFileHeaders() LLVM_OVERRIDE;
+  virtual void printSections() LLVM_OVERRIDE;
+  virtual void printRelocations() LLVM_OVERRIDE;
+  virtual void printSymbols() LLVM_OVERRIDE;
+  virtual void printDynamicSymbols() LLVM_OVERRIDE;
+  virtual void printUnwindInfo() LLVM_OVERRIDE;
+
+  virtual void printDynamicTable() LLVM_OVERRIDE;
+  virtual void printNeededLibraries() LLVM_OVERRIDE;
+  virtual void printProgramHeaders() LLVM_OVERRIDE;
+
+private:
+  typedef ELFObjectFile<ELFT> ELFO;
+  typedef typename ELFO::Elf_Shdr Elf_Shdr;
+  typedef typename ELFO::Elf_Sym Elf_Sym;
+
+  void printSymbol(symbol_iterator SymI, bool IsDynamic = false);
+
+  void printRelocation(section_iterator SecI, relocation_iterator RelI);
+
+  const ELFO *Obj;
+};
+
+} // namespace
+
+
+namespace llvm {
+
+error_code createELFDumper(const object::ObjectFile *Obj,
+                           StreamWriter& Writer,
+                           OwningPtr<ObjDumper> &Result) {
+  typedef ELFType<support::little, 4, false> Little32ELF;
+  typedef ELFType<support::big,    4, false> Big32ELF;
+  typedef ELFType<support::little, 4, true > Little64ELF;
+  typedef ELFType<support::big,    8, true > Big64ELF;
+
+  typedef ELFObjectFile<Little32ELF> LittleELF32Obj;
+  typedef ELFObjectFile<Big32ELF   > BigELF32Obj;
+  typedef ELFObjectFile<Little64ELF> LittleELF64Obj;
+  typedef ELFObjectFile<Big64ELF   > BigELF64Obj;
+
+  // Little-endian 32-bit
+  if (const LittleELF32Obj *ELFObj = dyn_cast<LittleELF32Obj>(Obj)) {
+    Result.reset(new ELFDumper<Little32ELF>(ELFObj, Writer));
+    return readobj_error::success;
+  }
+
+  // Big-endian 32-bit
+  if (const BigELF32Obj *ELFObj = dyn_cast<BigELF32Obj>(Obj)) {
+    Result.reset(new ELFDumper<Big32ELF>(ELFObj, Writer));
+    return readobj_error::success;
+  }
+
+  // Little-endian 64-bit
+  if (const LittleELF64Obj *ELFObj = dyn_cast<LittleELF64Obj>(Obj)) {
+    Result.reset(new ELFDumper<Little64ELF>(ELFObj, Writer));
+    return readobj_error::success;
+  }
+
+  // Big-endian 64-bit
+  if (const BigELF64Obj *ELFObj = dyn_cast<BigELF64Obj>(Obj)) {
+    Result.reset(new ELFDumper<Big64ELF>(ELFObj, Writer));
+    return readobj_error::success;
+  }
+
+  return readobj_error::unsupported_obj_file_format;
+}
+
+} // namespace llvm
+
+
+static const EnumEntry<unsigned> ElfClass[] = {
+  { "None",   ELF::ELFCLASSNONE },
+  { "32-bit", ELF::ELFCLASS32   },
+  { "64-bit", ELF::ELFCLASS64   },
+};
+
+static const EnumEntry<unsigned> ElfDataEncoding[] = {
+  { "None",         ELF::ELFDATANONE },
+  { "LittleEndian", ELF::ELFDATA2LSB },
+  { "BigEndian",    ELF::ELFDATA2MSB },
+};
+
+static const EnumEntry<unsigned> ElfObjectFileType[] = {
+  { "None",         ELF::ET_NONE },
+  { "Relocatable",  ELF::ET_REL  },
+  { "Executable",   ELF::ET_EXEC },
+  { "SharedObject", ELF::ET_DYN  },
+  { "Core",         ELF::ET_CORE },
+};
+
+static const EnumEntry<unsigned> ElfOSABI[] = {
+  { "SystemV",      ELF::ELFOSABI_NONE         },
+  { "HPUX",         ELF::ELFOSABI_HPUX         },
+  { "NetBSD",       ELF::ELFOSABI_NETBSD       },
+  { "GNU/Linux",    ELF::ELFOSABI_LINUX        },
+  { "GNU/Hurd",     ELF::ELFOSABI_HURD         },
+  { "Solaris",      ELF::ELFOSABI_SOLARIS      },
+  { "AIX",          ELF::ELFOSABI_AIX          },
+  { "IRIX",         ELF::ELFOSABI_IRIX         },
+  { "FreeBSD",      ELF::ELFOSABI_FREEBSD      },
+  { "TRU64",        ELF::ELFOSABI_TRU64        },
+  { "Modesto",      ELF::ELFOSABI_MODESTO      },
+  { "OpenBSD",      ELF::ELFOSABI_OPENBSD      },
+  { "OpenVMS",      ELF::ELFOSABI_OPENVMS      },
+  { "NSK",          ELF::ELFOSABI_NSK          },
+  { "AROS",         ELF::ELFOSABI_AROS         },
+  { "FenixOS",      ELF::ELFOSABI_FENIXOS      },
+  { "C6000_ELFABI", ELF::ELFOSABI_C6000_ELFABI },
+  { "C6000_LINUX" , ELF::ELFOSABI_C6000_LINUX  },
+  { "ARM",          ELF::ELFOSABI_ARM          },
+  { "Standalone"  , ELF::ELFOSABI_STANDALONE   }
+};
+
+static const EnumEntry<unsigned> ElfMachineType[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NONE         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_M32          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SPARC        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_386          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68K          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_88K          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_486          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_860          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MIPS         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_S370         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MIPS_RS3_LE  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PARISC       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VPP500       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SPARC32PLUS  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_960          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PPC          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PPC64        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_S390         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SPU          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_V800         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_FR20         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RH32         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RCE          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARM          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ALPHA        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SH           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SPARCV9      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TRICORE      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARC          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_H8_300       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_H8_300H      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_H8S          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_H8_500       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_IA_64        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MIPS_X       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_COLDFIRE     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC12       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MMA          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PCP          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NCPU         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NDR1         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_STARCORE     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ME16         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST100        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TINYJ        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_X86_64       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PDSP         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PDP10        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PDP11        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_FX66         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST9PLUS      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST7          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC16       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC11       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC08       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC05       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SVX          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST19         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VAX          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CRIS         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_JAVELIN      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_FIREPATH     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ZSP          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MMIX         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_HUANY        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PRISM        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_AVR          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_FR30         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_D10V         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_D30V         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_V850         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_M32R         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MN10300      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MN10200      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PJ           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_OPENRISC     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARC_COMPACT  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_XTENSA       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE    ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TMM_GPP      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NS32K        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TPC          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SNP1K        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST200        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_IP2K         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MAX          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CR           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_F2MC16       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MSP430       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_BLACKFIN     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SE_C33       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SEP          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARCA         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_UNICORE      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_EXCESS       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_DXP          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ALTERA_NIOS2 ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CRX          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_XGATE        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_C166         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_M16C         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_DSPIC30F     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CE           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_M32C         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TSK3000      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RS08         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SHARC        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ECOG2        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SCORE7       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_DSP24        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE3   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_LATTICEMICO32),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SE_C17       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TI_C6000     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TI_C2000     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TI_C5500     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MMDSP_PLUS   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CYPRESS_M8C  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_R32C         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TRIMEDIA     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_HEXAGON      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_8051         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_STXP7X       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NDS32        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ECOG1        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ECOG1X       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MAXQ30       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_XIMO16       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MANIK        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CRAYNV2      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RX           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_METAG        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MCST_ELBRUS  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ECOG16       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CR16         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ETPU         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SLE9X        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_L10M         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_K10M         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_AARCH64      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_AVR32        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_STM8         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TILE64       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TILEPRO      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MICROBLAZE   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CUDA         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TILEGX       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CLOUDSHIELD  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_COREA_1ST    ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_COREA_2ND    ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARC_COMPACT2 ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_OPEN8        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RL78         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE5   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_78KOR        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_56800EX      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MBLAZE       )
+};
+
+static const EnumEntry<unsigned> ElfSymbolBindings[] = {
+  { "Local",  ELF::STB_LOCAL  },
+  { "Global", ELF::STB_GLOBAL },
+  { "Weak",   ELF::STB_WEAK   }
+};
+
+static const EnumEntry<unsigned> ElfSymbolTypes[] = {
+  { "None",      ELF::STT_NOTYPE    },
+  { "Object",    ELF::STT_OBJECT    },
+  { "Function",  ELF::STT_FUNC      },
+  { "Section",   ELF::STT_SECTION   },
+  { "File",      ELF::STT_FILE      },
+  { "Common",    ELF::STT_COMMON    },
+  { "TLS",       ELF::STT_TLS       },
+  { "GNU_IFunc", ELF::STT_GNU_IFUNC }
+};
+
+static const char *getElfSectionType(unsigned Arch, unsigned Type) {
+  switch (Arch) {
+  case Triple::arm:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_EXIDX);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION);
+    }
+  case Triple::hexagon:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_HEX_ORDERED);
+    }
+  case Triple::x86_64:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_X86_64_UNWIND);
+    }
+  case Triple::mips:
+  case Triple::mipsel:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_MIPS_REGINFO);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_MIPS_OPTIONS);
+    }
+  }
+
+  switch (Type) {
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NULL              );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_PROGBITS          );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SYMTAB            );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_STRTAB            );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_RELA              );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_HASH              );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_DYNAMIC           );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NOTE              );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NOBITS            );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_REL               );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SHLIB             );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_DYNSYM            );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_INIT_ARRAY        );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_FINI_ARRAY        );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_PREINIT_ARRAY     );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GROUP             );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX      );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES    );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_HASH          );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_verdef        );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_verneed       );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_versym        );
+  default: return "";
+  }
+}
+
+static const EnumEntry<unsigned> ElfSectionFlags[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_WRITE           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_ALLOC           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_EXECINSTR       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_MERGE           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_STRINGS         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_INFO_LINK       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_LINK_ORDER      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_OS_NONCONFORMING),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_GROUP           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_TLS             ),
+  LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_CP_SECTION),
+  LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_DP_SECTION),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NOSTRIP    )
+};
+
+static const EnumEntry<unsigned> ElfSegmentTypes[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_NULL   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_LOAD   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_DYNAMIC),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_INTERP ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_NOTE   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_SHLIB  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_PHDR   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_TLS    ),
+
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_GNU_EH_FRAME),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_SUNW_EH_FRAME),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_SUNW_UNWIND),
+
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_GNU_STACK),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_GNU_RELRO),
+
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_ARM_EXIDX),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_ARM_UNWIND)
+};
+
+static const EnumEntry<unsigned> ElfSegmentFlags[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, PF_X),
+  LLVM_READOBJ_ENUM_ENT(ELF, PF_W),
+  LLVM_READOBJ_ENUM_ENT(ELF, PF_R)
+};
+
+
+template<class ELFT>
+void ELFDumper<ELFT>::printFileHeaders() {
+  error_code EC;
+
+  const typename ELFO::Elf_Ehdr *Header = Obj->getElfHeader();
+
+  {
+    DictScope D(W, "ElfHeader");
+    {
+      DictScope D(W, "Ident");
+      W.printBinary("Magic", makeArrayRef(Header->e_ident).slice(ELF::EI_MAG0,
+                                                                 4));
+      W.printEnum  ("Class", Header->e_ident[ELF::EI_CLASS],
+                      makeArrayRef(ElfClass));
+      W.printEnum  ("DataEncoding", Header->e_ident[ELF::EI_DATA],
+                      makeArrayRef(ElfDataEncoding));
+      W.printNumber("FileVersion", Header->e_ident[ELF::EI_VERSION]);
+      W.printEnum  ("OS/ABI", Header->e_ident[ELF::EI_OSABI],
+                      makeArrayRef(ElfOSABI));
+      W.printNumber("ABIVersion", Header->e_ident[ELF::EI_ABIVERSION]);
+      W.printBinary("Unused", makeArrayRef(Header->e_ident).slice(ELF::EI_PAD));
+    }
+
+    W.printEnum  ("Type", Header->e_type, makeArrayRef(ElfObjectFileType));
+    W.printEnum  ("Machine", Header->e_machine, makeArrayRef(ElfMachineType));
+    W.printNumber("Version", Header->e_version);
+    W.printHex   ("Entry", Header->e_entry);
+    W.printHex   ("ProgramHeaderOffset", Header->e_phoff);
+    W.printHex   ("SectionHeaderOffset", Header->e_shoff);
+    W.printFlags ("Flags", Header->e_flags);
+    W.printNumber("HeaderSize", Header->e_ehsize);
+    W.printNumber("ProgramHeaderEntrySize", Header->e_phentsize);
+    W.printNumber("ProgramHeaderCount", Header->e_phnum);
+    W.printNumber("SectionHeaderEntrySize", Header->e_shentsize);
+    W.printNumber("SectionHeaderCount", Header->e_shnum);
+    W.printNumber("StringTableSectionIndex", Header->e_shstrndx);
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printSections() {
+  ListScope SectionsD(W, "Sections");
+
+  int SectionIndex = -1;
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    ++SectionIndex;
+
+    const Elf_Shdr *Section = Obj->getElfSection(SecI);
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+        Name = "";
+
+    DictScope SectionD(W, "Section");
+    W.printNumber("Index", SectionIndex);
+    W.printNumber("Name", Name, Section->sh_name);
+    W.printHex   ("Type", getElfSectionType(Obj->getArch(), Section->sh_type),
+                    Section->sh_type);
+    W.printFlags ("Flags", Section->sh_flags, makeArrayRef(ElfSectionFlags));
+    W.printHex   ("Address", Section->sh_addr);
+    W.printHex   ("Offset", Section->sh_offset);
+    W.printNumber("Size", Section->sh_size);
+    W.printNumber("Link", Section->sh_link);
+    W.printNumber("Info", Section->sh_info);
+    W.printNumber("AddressAlignment", Section->sh_addralign);
+    W.printNumber("EntrySize", Section->sh_entsize);
+
+    if (opts::SectionRelocations) {
+      ListScope D(W, "Relocations");
+      for (relocation_iterator RelI = SecI->begin_relocations(),
+                               RelE = SecI->end_relocations();
+                               RelI != RelE; RelI.increment(EC)) {
+        if (error(EC)) break;
+
+        printRelocation(SecI, RelI);
+      }
+    }
+
+    if (opts::SectionSymbols) {
+      ListScope D(W, "Symbols");
+      for (symbol_iterator SymI = Obj->begin_symbols(),
+                           SymE = Obj->end_symbols();
+                           SymI != SymE; SymI.increment(EC)) {
+        if (error(EC)) break;
+
+        bool Contained = false;
+        if (SecI->containsSymbol(*SymI, Contained) || !Contained)
+          continue;
+
+        printSymbol(SymI);
+      }
+    }
+
+    if (opts::SectionData) {
+      StringRef Data;
+      if (error(SecI->getContents(Data))) break;
+
+      W.printBinaryBlock("SectionData", Data);
+    }
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printRelocations() {
+  ListScope D(W, "Relocations");
+
+  error_code EC;
+  int SectionNumber = -1;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    ++SectionNumber;
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+      continue;
+
+    bool PrintedGroup = false;
+    for (relocation_iterator RelI = SecI->begin_relocations(),
+                             RelE = SecI->end_relocations();
+                             RelI != RelE; RelI.increment(EC)) {
+      if (error(EC)) break;
+
+      if (!PrintedGroup) {
+        W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n";
+        W.indent();
+        PrintedGroup = true;
+      }
+
+      printRelocation(SecI, RelI);
+    }
+
+    if (PrintedGroup) {
+      W.unindent();
+      W.startLine() << "}\n";
+    }
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printRelocation(section_iterator Sec,
+                                      relocation_iterator RelI) {
+  uint64_t Offset;
+  uint64_t RelocType;
+  SmallString<32> RelocName;
+  int64_t Info;
+  StringRef SymbolName;
+  SymbolRef Symbol;
+  if (error(RelI->getOffset(Offset))) return;
+  if (error(RelI->getType(RelocType))) return;
+  if (error(RelI->getTypeName(RelocName))) return;
+  if (error(RelI->getAdditionalInfo(Info))) return;
+  if (error(RelI->getSymbol(Symbol))) return;
+  if (error(Symbol.getName(SymbolName))) return;
+
+  if (opts::ExpandRelocs) {
+    DictScope Group(W, "Relocation");
+    W.printHex("Offset", Offset);
+    W.printNumber("Type", RelocName, RelocType);
+    W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
+    W.printHex("Info", Info);
+  } else {
+    raw_ostream& OS = W.startLine();
+    OS << W.hex(Offset)
+       << " " << RelocName
+       << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+       << " " << W.hex(Info)
+       << "\n";
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printSymbols() {
+  ListScope Group(W, "Symbols");
+
+  error_code EC;
+  for (symbol_iterator SymI = Obj->begin_symbols(), SymE = Obj->end_symbols();
+                       SymI != SymE; SymI.increment(EC)) {
+    if (error(EC)) break;
+
+    printSymbol(SymI);
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printDynamicSymbols() {
+  ListScope Group(W, "DynamicSymbols");
+
+  error_code EC;
+  for (symbol_iterator SymI = Obj->begin_dynamic_symbols(),
+                       SymE = Obj->end_dynamic_symbols();
+                       SymI != SymE; SymI.increment(EC)) {
+    if (error(EC)) break;
+
+    printSymbol(SymI, true);
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printSymbol(symbol_iterator SymI, bool IsDynamic) {
+  error_code EC;
+
+  const Elf_Sym *Symbol = Obj->getElfSymbol(SymI);
+  const Elf_Shdr *Section = Obj->getSection(Symbol);
+
+  StringRef SymbolName;
+  if (SymI->getName(SymbolName))
+    SymbolName = "";
+
+  StringRef SectionName;
+  if (Section && Obj->getSectionName(Section, SectionName))
+    SectionName = "";
+
+  std::string FullSymbolName(SymbolName);
+  if (IsDynamic) {
+    StringRef Version;
+    bool IsDefault;
+    if (error(Obj->getSymbolVersion(*SymI, Version, IsDefault)))
+      return;
+    if (!Version.empty()) {
+      FullSymbolName += (IsDefault ? "@@" : "@");
+      FullSymbolName += Version;
+    }
+  }
+
+  DictScope D(W, "Symbol");
+  W.printNumber("Name", FullSymbolName, Symbol->st_name);
+  W.printHex   ("Value", Symbol->st_value);
+  W.printNumber("Size", Symbol->st_size);
+  W.printEnum  ("Binding", Symbol->getBinding(),
+                  makeArrayRef(ElfSymbolBindings));
+  W.printEnum  ("Type", Symbol->getType(), makeArrayRef(ElfSymbolTypes));
+  W.printNumber("Other", Symbol->st_other);
+  W.printHex   ("Section", SectionName, Symbol->st_shndx);
+}
+
+#define LLVM_READOBJ_TYPE_CASE(name) \
+  case DT_##name: return #name
+
+static const char *getTypeString(uint64_t Type) {
+  switch (Type) {
+  LLVM_READOBJ_TYPE_CASE(BIND_NOW);
+  LLVM_READOBJ_TYPE_CASE(DEBUG);
+  LLVM_READOBJ_TYPE_CASE(FINI);
+  LLVM_READOBJ_TYPE_CASE(FINI_ARRAY);
+  LLVM_READOBJ_TYPE_CASE(FINI_ARRAYSZ);
+  LLVM_READOBJ_TYPE_CASE(FLAGS);
+  LLVM_READOBJ_TYPE_CASE(HASH);
+  LLVM_READOBJ_TYPE_CASE(INIT);
+  LLVM_READOBJ_TYPE_CASE(INIT_ARRAY);
+  LLVM_READOBJ_TYPE_CASE(INIT_ARRAYSZ);
+  LLVM_READOBJ_TYPE_CASE(PREINIT_ARRAY);
+  LLVM_READOBJ_TYPE_CASE(PREINIT_ARRAYSZ);
+  LLVM_READOBJ_TYPE_CASE(JMPREL);
+  LLVM_READOBJ_TYPE_CASE(NEEDED);
+  LLVM_READOBJ_TYPE_CASE(NULL);
+  LLVM_READOBJ_TYPE_CASE(PLTGOT);
+  LLVM_READOBJ_TYPE_CASE(PLTREL);
+  LLVM_READOBJ_TYPE_CASE(PLTRELSZ);
+  LLVM_READOBJ_TYPE_CASE(REL);
+  LLVM_READOBJ_TYPE_CASE(RELA);
+  LLVM_READOBJ_TYPE_CASE(RELENT);
+  LLVM_READOBJ_TYPE_CASE(RELSZ);
+  LLVM_READOBJ_TYPE_CASE(RELAENT);
+  LLVM_READOBJ_TYPE_CASE(RELASZ);
+  LLVM_READOBJ_TYPE_CASE(RPATH);
+  LLVM_READOBJ_TYPE_CASE(RUNPATH);
+  LLVM_READOBJ_TYPE_CASE(SONAME);
+  LLVM_READOBJ_TYPE_CASE(STRSZ);
+  LLVM_READOBJ_TYPE_CASE(STRTAB);
+  LLVM_READOBJ_TYPE_CASE(SYMBOLIC);
+  LLVM_READOBJ_TYPE_CASE(SYMENT);
+  LLVM_READOBJ_TYPE_CASE(SYMTAB);
+  LLVM_READOBJ_TYPE_CASE(TEXTREL);
+  default: return "unknown";
+  }
+}
+
+#undef LLVM_READOBJ_TYPE_CASE
+
+template<class ELFT>
+static void printValue(const ELFObjectFile<ELFT> *O, uint64_t Type,
+                       uint64_t Value, bool Is64, raw_ostream &OS) {
+  switch (Type) {
+  case DT_PLTREL:
+    if (Value == DT_REL) {
+      OS << "REL";
+      break;
+    } else if (Value == DT_RELA) {
+      OS << "RELA";
+      break;
+    }
+  // Fallthrough.
+  case DT_PLTGOT:
+  case DT_HASH:
+  case DT_STRTAB:
+  case DT_SYMTAB:
+  case DT_RELA:
+  case DT_INIT:
+  case DT_FINI:
+  case DT_REL:
+  case DT_JMPREL:
+  case DT_INIT_ARRAY:
+  case DT_FINI_ARRAY:
+  case DT_PREINIT_ARRAY:
+  case DT_DEBUG:
+  case DT_NULL:
+    OS << format("0x%" PRIX64, Value);
+    break;
+  case DT_PLTRELSZ:
+  case DT_RELASZ:
+  case DT_RELAENT:
+  case DT_STRSZ:
+  case DT_SYMENT:
+  case DT_RELSZ:
+  case DT_RELENT:
+  case DT_INIT_ARRAYSZ:
+  case DT_FINI_ARRAYSZ:
+  case DT_PREINIT_ARRAYSZ:
+    OS << Value << " (bytes)";
+    break;
+  case DT_NEEDED:
+    OS << "SharedLibrary ("
+       << O->getString(O->getDynamicStringTableSectionHeader(), Value) << ")";
+    break;
+  case DT_SONAME:
+    OS << "LibrarySoname ("
+       << O->getString(O->getDynamicStringTableSectionHeader(), Value) << ")";
+    break;
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printUnwindInfo() {
+  W.startLine() << "UnwindInfo not implemented.\n";
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printDynamicTable() {
+  typedef typename ELFO::Elf_Dyn_iterator EDI;
+  EDI Start = Obj->begin_dynamic_table(),
+      End = Obj->end_dynamic_table(true);
+
+  if (Start == End)
+    return;
+
+  ptrdiff_t Total = std::distance(Start, End);
+  raw_ostream &OS = W.getOStream();
+  W.startLine() << "DynamicSection [ (" << Total << " entries)\n";
+
+  bool Is64 = Obj->getBytesInAddress() == 8;
+
+  W.startLine()
+     << "  Tag" << (Is64 ? "                " : "        ") << "Type"
+     << "                 " << "Name/Value\n";
+  for (; Start != End; ++Start) {
+    W.startLine()
+       << "  "
+       << format(Is64 ? "0x%016" PRIX64 : "0x%08" PRIX64, Start->getTag())
+       << " " << format("%-21s", getTypeString(Start->getTag()));
+    printValue(Obj, Start->getTag(), Start->getVal(), Is64, OS);
+    OS << "\n";
+  }
+
+  W.startLine() << "]\n";
+}
+
+static bool compareLibraryName(const LibraryRef &L, const LibraryRef &R) {
+  StringRef LPath, RPath;
+  L.getPath(LPath);
+  R.getPath(RPath);
+  return LPath < RPath;
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printNeededLibraries() {
+  ListScope D(W, "NeededLibraries");
+
+  error_code EC;
+
+  typedef std::vector<LibraryRef> LibsTy;
+  LibsTy Libs;
+
+  for (library_iterator I = Obj->begin_libraries_needed(),
+                        E = Obj->end_libraries_needed();
+                        I != E; I.increment(EC)) {
+    if (EC)
+      report_fatal_error("Needed libraries iteration failed");
+
+    Libs.push_back(*I);
+  }
+
+  std::sort(Libs.begin(), Libs.end(), &compareLibraryName);
+
+  for (LibsTy::const_iterator I = Libs.begin(), E = Libs.end();
+                                  I != E; ++I) {
+    StringRef Path;
+    I->getPath(Path);
+    outs() << "  " << Path << "\n";
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printProgramHeaders() {
+  ListScope L(W, "ProgramHeaders");
+
+  for (typename ELFO::Elf_Phdr_Iter PI = Obj->begin_program_headers(),
+                                    PE = Obj->end_program_headers();
+                                    PI != PE; ++PI) {
+    DictScope P(W, "ProgramHeader");
+    W.printEnum  ("Type", PI->p_type, makeArrayRef(ElfSegmentTypes));
+    W.printHex   ("Offset", PI->p_offset);
+    W.printHex   ("VirtualAddress", PI->p_vaddr);
+    W.printHex   ("PhysicalAddress", PI->p_paddr);
+    W.printNumber("FileSize", PI->p_filesz);
+    W.printNumber("MemSize", PI->p_memsz);
+    W.printFlags ("Flags", PI->p_flags, makeArrayRef(ElfSegmentFlags));
+    W.printNumber("Alignment", PI->p_align);
+  }
+}
diff --git a/tools/llvm-readobj/Error.cpp b/tools/llvm-readobj/Error.cpp
new file mode 100644
index 000000000000..a6c61321c6dc
--- /dev/null
+++ b/tools/llvm-readobj/Error.cpp
@@ -0,0 +1,62 @@
+//===- Error.cpp - system_error extensions for llvm-readobj -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines a new error_category for the llvm-readobj tool.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Error.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class _readobj_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+  virtual error_condition default_error_condition(int ev) const;
+};
+} // namespace
+
+const char *_readobj_error_category::name() const {
+  return "llvm.readobj";
+}
+
+std::string _readobj_error_category::message(int ev) const {
+  switch (ev) {
+  case readobj_error::success: return "Success";
+  case readobj_error::file_not_found:
+    return "No such file.";
+  case readobj_error::unsupported_file_format:
+    return "The file was not recognized as a valid object file.";
+  case readobj_error::unrecognized_file_format:
+    return "Unrecognized file type.";
+  case readobj_error::unsupported_obj_file_format:
+    return "Unsupported object file format.";
+  case readobj_error::unknown_symbol:
+    return "Unknown symbol.";
+  default:
+    llvm_unreachable("An enumerator of readobj_error does not have a message "
+                     "defined.");
+  }
+}
+
+error_condition _readobj_error_category::default_error_condition(int ev) const {
+  if (ev == readobj_error::success)
+    return errc::success;
+  return errc::invalid_argument;
+}
+
+namespace llvm {
+const error_category &readobj_category() {
+  static _readobj_error_category o;
+  return o;
+}
+} // namespace llvm
diff --git a/tools/llvm-readobj/Error.h b/tools/llvm-readobj/Error.h
new file mode 100644
index 000000000000..cf68da89c1d3
--- /dev/null
+++ b/tools/llvm-readobj/Error.h
@@ -0,0 +1,48 @@
+//===- Error.h - system_error extensions for llvm-readobj -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares a new error_category for the llvm-readobj tool.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_READOBJ_ERROR_H
+#define LLVM_READOBJ_ERROR_H
+
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+const error_category &readobj_category();
+
+struct readobj_error {
+  enum _ {
+    success = 0,
+    file_not_found,
+    unsupported_file_format,
+    unrecognized_file_format,
+    unsupported_obj_file_format,
+    unknown_symbol
+  };
+  _ v_;
+
+  readobj_error(_ v) : v_(v) {}
+  explicit readobj_error(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+};
+
+inline error_code make_error_code(readobj_error e) {
+  return error_code(static_cast<int>(e), readobj_category());
+}
+
+template <> struct is_error_code_enum<readobj_error> : true_type { };
+template <> struct is_error_code_enum<readobj_error::_> : true_type { };
+
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-readobj/LLVMBuild.txt b/tools/llvm-readobj/LLVMBuild.txt
index c9f934f4b6fa..813c12b752bc 100644
--- a/tools/llvm-readobj/LLVMBuild.txt
+++ b/tools/llvm-readobj/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-readobj
 parent = Tools
-required_libraries = Archive BitReader Object
+required_libraries = all-targets Archive BitReader Object
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
new file mode 100644
index 000000000000..54de49c310e3
--- /dev/null
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -0,0 +1,433 @@
+//===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachO-specific dumper for llvm-readobj.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-readobj.h"
+#include "Error.h"
+#include "ObjDumper.h"
+#include "StreamWriter.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/Casting.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+
+class MachODumper : public ObjDumper {
+public:
+  MachODumper(const MachOObjectFile *Obj, StreamWriter& Writer)
+    : ObjDumper(Writer)
+    , Obj(Obj) { }
+
+  virtual void printFileHeaders() LLVM_OVERRIDE;
+  virtual void printSections() LLVM_OVERRIDE;
+  virtual void printRelocations() LLVM_OVERRIDE;
+  virtual void printSymbols() LLVM_OVERRIDE;
+  virtual void printDynamicSymbols() LLVM_OVERRIDE;
+  virtual void printUnwindInfo() LLVM_OVERRIDE;
+
+private:
+  void printSymbol(symbol_iterator SymI);
+
+  void printRelocation(section_iterator SecI, relocation_iterator RelI);
+
+  void printRelocation(const MachOObjectFile *Obj,
+                       section_iterator SecI, relocation_iterator RelI);
+
+  void printSections(const MachOObjectFile *Obj);
+
+  const MachOObjectFile *Obj;
+};
+
+} // namespace
+
+
+namespace llvm {
+
+error_code createMachODumper(const object::ObjectFile *Obj,
+                             StreamWriter& Writer,
+                             OwningPtr<ObjDumper> &Result) {
+  const MachOObjectFile *MachOObj = dyn_cast<MachOObjectFile>(Obj);
+  if (!MachOObj)
+    return readobj_error::unsupported_obj_file_format;
+
+  Result.reset(new MachODumper(MachOObj, Writer));
+  return readobj_error::success;
+}
+
+} // namespace llvm
+
+
+static const EnumEntry<unsigned> MachOSectionTypes[] = {
+  { "Regular"                        , 0x00 },
+  { "ZeroFill"                       , 0x01 },
+  { "CStringLiterals"                , 0x02 },
+  { "4ByteLiterals"                  , 0x03 },
+  { "8ByteLiterals"                  , 0x04 },
+  { "LiteralPointers"                , 0x05 },
+  { "NonLazySymbolPointers"          , 0x06 },
+  { "LazySymbolPointers"             , 0x07 },
+  { "SymbolStubs"                    , 0x08 },
+  { "ModInitFuncs"                   , 0x09 },
+  { "ModTermFuncs"                   , 0x0A },
+  { "Coalesced"                      , 0x0B },
+  { "GBZeroFill"                     , 0x0C },
+  { "Interposing"                    , 0x0D },
+  { "16ByteLiterals"                 , 0x0E },
+  { "DTraceDOF"                      , 0x0F },
+  { "LazyDylibSymbolPoints"          , 0x10 },
+  { "ThreadLocalRegular"             , 0x11 },
+  { "ThreadLocalZerofill"            , 0x12 },
+  { "ThreadLocalVariables"           , 0x13 },
+  { "ThreadLocalVariablePointers"    , 0x14 },
+  { "ThreadLocalInitFunctionPointers", 0x15 }
+};
+
+static const EnumEntry<unsigned> MachOSectionAttributes[] = {
+  { "LocReloc"         , 1 <<  0 /*S_ATTR_LOC_RELOC          */ },
+  { "ExtReloc"         , 1 <<  1 /*S_ATTR_EXT_RELOC          */ },
+  { "SomeInstructions" , 1 <<  2 /*S_ATTR_SOME_INSTRUCTIONS  */ },
+  { "Debug"            , 1 << 17 /*S_ATTR_DEBUG              */ },
+  { "SelfModifyingCode", 1 << 18 /*S_ATTR_SELF_MODIFYING_CODE*/ },
+  { "LiveSupport"      , 1 << 19 /*S_ATTR_LIVE_SUPPORT       */ },
+  { "NoDeadStrip"      , 1 << 20 /*S_ATTR_NO_DEAD_STRIP      */ },
+  { "StripStaticSyms"  , 1 << 21 /*S_ATTR_STRIP_STATIC_SYMS  */ },
+  { "NoTOC"            , 1 << 22 /*S_ATTR_NO_TOC             */ },
+  { "PureInstructions" , 1 << 23 /*S_ATTR_PURE_INSTRUCTIONS  */ },
+};
+
+static const EnumEntry<unsigned> MachOSymbolRefTypes[] = {
+  { "UndefinedNonLazy",                     0 },
+  { "ReferenceFlagUndefinedLazy",           1 },
+  { "ReferenceFlagDefined",                 2 },
+  { "ReferenceFlagPrivateDefined",          3 },
+  { "ReferenceFlagPrivateUndefinedNonLazy", 4 },
+  { "ReferenceFlagPrivateUndefinedLazy",    5 }
+};
+
+static const EnumEntry<unsigned> MachOSymbolFlags[] = {
+  { "ReferencedDynamically", 0x10 },
+  { "NoDeadStrip",           0x20 },
+  { "WeakRef",               0x40 },
+  { "WeakDef",               0x80 }
+};
+
+static const EnumEntry<unsigned> MachOSymbolTypes[] = {
+  { "Undef",           0x0 },
+  { "External",        0x1 },
+  { "Abs",             0x2 },
+  { "Indirect",        0xA },
+  { "PreboundUndef",   0xC },
+  { "Section",         0xE },
+  { "PrivateExternal", 0x10 }
+};
+
+namespace {
+  enum {
+    N_STAB = 0xE0
+  };
+
+  struct MachOSection {
+    ArrayRef<char> Name;
+    ArrayRef<char> SegmentName;
+    uint64_t Address;
+    uint64_t Size;
+    uint32_t Offset;
+    uint32_t Alignment;
+    uint32_t RelocationTableOffset;
+    uint32_t NumRelocationTableEntries;
+    uint32_t Flags;
+    uint32_t Reserved1;
+    uint32_t Reserved2;
+  };
+
+  struct MachOSymbol {
+    uint32_t StringIndex;
+    uint8_t Type;
+    uint8_t SectionIndex;
+    uint16_t Flags;
+    uint64_t Value;
+  };
+}
+
+static void getSection(const MachOObjectFile *Obj,
+                       DataRefImpl Sec,
+                       MachOSection &Section) {
+  if (!Obj->is64Bit()) {
+    macho::Section Sect = Obj->getSection(Sec);
+    Section.Address     = Sect.Address;
+    Section.Size        = Sect.Size;
+    Section.Offset      = Sect.Offset;
+    Section.Alignment   = Sect.Align;
+    Section.RelocationTableOffset = Sect.RelocationTableOffset;
+    Section.NumRelocationTableEntries = Sect.NumRelocationTableEntries;
+    Section.Flags       = Sect.Flags;
+    Section.Reserved1   = Sect.Reserved1;
+    Section.Reserved2   = Sect.Reserved2;
+    return;
+  }
+  macho::Section64 Sect = Obj->getSection64(Sec);
+  Section.Address     = Sect.Address;
+  Section.Size        = Sect.Size;
+  Section.Offset      = Sect.Offset;
+  Section.Alignment   = Sect.Align;
+  Section.RelocationTableOffset = Sect.RelocationTableOffset;
+  Section.NumRelocationTableEntries = Sect.NumRelocationTableEntries;
+  Section.Flags       = Sect.Flags;
+  Section.Reserved1   = Sect.Reserved1;
+  Section.Reserved2   = Sect.Reserved2;
+}
+
+
+static void getSymbol(const MachOObjectFile *Obj,
+                      DataRefImpl DRI,
+                      MachOSymbol &Symbol) {
+  if (!Obj->is64Bit()) {
+    macho::SymbolTableEntry Entry = Obj->getSymbolTableEntry(DRI);
+    Symbol.StringIndex  = Entry.StringIndex;
+    Symbol.Type         = Entry.Type;
+    Symbol.SectionIndex = Entry.SectionIndex;
+    Symbol.Flags        = Entry.Flags;
+    Symbol.Value        = Entry.Value;
+    return;
+  }
+  macho::Symbol64TableEntry Entry = Obj->getSymbol64TableEntry(DRI);
+  Symbol.StringIndex  = Entry.StringIndex;
+  Symbol.Type         = Entry.Type;
+  Symbol.SectionIndex = Entry.SectionIndex;
+  Symbol.Flags        = Entry.Flags;
+  Symbol.Value        = Entry.Value;
+}
+
+void MachODumper::printFileHeaders() {
+  W.startLine() << "FileHeaders not implemented.\n";
+}
+
+void MachODumper::printSections() {
+  return printSections(Obj);
+}
+
+void MachODumper::printSections(const MachOObjectFile *Obj) {
+  ListScope Group(W, "Sections");
+
+  int SectionIndex = -1;
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    ++SectionIndex;
+
+    MachOSection Section;
+    getSection(Obj, SecI->getRawDataRefImpl(), Section);
+    DataRefImpl DR = SecI->getRawDataRefImpl();
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+        Name = "";
+
+    ArrayRef<char> RawName = Obj->getSectionRawName(DR);
+    StringRef SegmentName = Obj->getSectionFinalSegmentName(DR);
+    ArrayRef<char> RawSegmentName = Obj->getSectionRawFinalSegmentName(DR);
+
+    DictScope SectionD(W, "Section");
+    W.printNumber("Index", SectionIndex);
+    W.printBinary("Name", Name, RawName);
+    W.printBinary("Segment", SegmentName, RawSegmentName);
+    W.printHex   ("Address", Section.Address);
+    W.printHex   ("Size", Section.Size);
+    W.printNumber("Offset", Section.Offset);
+    W.printNumber("Alignment", Section.Alignment);
+    W.printHex   ("RelocationOffset", Section.RelocationTableOffset);
+    W.printNumber("RelocationCount", Section.NumRelocationTableEntries);
+    W.printEnum  ("Type", Section.Flags & 0xFF,
+                  makeArrayRef(MachOSectionAttributes));
+    W.printFlags ("Attributes", Section.Flags >> 8,
+                  makeArrayRef(MachOSectionAttributes));
+    W.printHex   ("Reserved1", Section.Reserved1);
+    W.printHex   ("Reserved2", Section.Reserved2);
+
+    if (opts::SectionRelocations) {
+      ListScope D(W, "Relocations");
+      for (relocation_iterator RelI = SecI->begin_relocations(),
+                               RelE = SecI->end_relocations();
+                               RelI != RelE; RelI.increment(EC)) {
+        if (error(EC)) break;
+
+        printRelocation(SecI, RelI);
+      }
+    }
+
+    if (opts::SectionSymbols) {
+      ListScope D(W, "Symbols");
+      for (symbol_iterator SymI = Obj->begin_symbols(),
+                           SymE = Obj->end_symbols();
+                           SymI != SymE; SymI.increment(EC)) {
+        if (error(EC)) break;
+
+        bool Contained = false;
+        if (SecI->containsSymbol(*SymI, Contained) || !Contained)
+          continue;
+
+        printSymbol(SymI);
+      }
+    }
+
+    if (opts::SectionData) {
+      StringRef Data;
+      if (error(SecI->getContents(Data))) break;
+
+      W.printBinaryBlock("SectionData", Data);
+    }
+  }
+}
+
+void MachODumper::printRelocations() {
+  ListScope D(W, "Relocations");
+
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+      continue;
+
+    bool PrintedGroup = false;
+    for (relocation_iterator RelI = SecI->begin_relocations(),
+                             RelE = SecI->end_relocations();
+                             RelI != RelE; RelI.increment(EC)) {
+      if (error(EC)) break;
+
+      if (!PrintedGroup) {
+        W.startLine() << "Section " << Name << " {\n";
+        W.indent();
+        PrintedGroup = true;
+      }
+
+      printRelocation(SecI, RelI);
+    }
+
+    if (PrintedGroup) {
+      W.unindent();
+      W.startLine() << "}\n";
+    }
+  }
+}
+
+void MachODumper::printRelocation(section_iterator SecI,
+                                  relocation_iterator RelI) {
+  return printRelocation(Obj, SecI, RelI);
+}
+
+void MachODumper::printRelocation(const MachOObjectFile *Obj,
+                                  section_iterator SecI,
+                                  relocation_iterator RelI) {
+  uint64_t Offset;
+  SmallString<32> RelocName;
+  StringRef SymbolName;
+  SymbolRef Symbol;
+  if (error(RelI->getOffset(Offset))) return;
+  if (error(RelI->getTypeName(RelocName))) return;
+  if (error(RelI->getSymbol(Symbol))) return;
+  if (error(Symbol.getName(SymbolName))) return;
+
+  DataRefImpl DR = RelI->getRawDataRefImpl();
+  macho::RelocationEntry RE = Obj->getRelocation(DR);
+  bool IsScattered = Obj->isRelocationScattered(RE);
+
+  if (opts::ExpandRelocs) {
+    DictScope Group(W, "Relocation");
+    W.printHex("Offset", Offset);
+    W.printNumber("PCRel", Obj->getAnyRelocationPCRel(RE));
+    W.printNumber("Length", Obj->getAnyRelocationLength(RE));
+    if (IsScattered)
+      W.printString("Extern", StringRef("N/A"));
+    else
+      W.printNumber("Extern", Obj->getPlainRelocationExternal(RE));
+    W.printNumber("Type", RelocName, Obj->getAnyRelocationType(RE));
+    W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
+    W.printNumber("Scattered", IsScattered);
+  } else {
+    raw_ostream& OS = W.startLine();
+    OS << W.hex(Offset)
+       << " " << Obj->getAnyRelocationPCRel(RE)
+       << " " << Obj->getAnyRelocationLength(RE);
+    if (IsScattered)
+      OS << " n/a";
+    else
+      OS << " " << Obj->getPlainRelocationExternal(RE);
+    OS << " " << RelocName
+       << " " << IsScattered
+       << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+       << "\n";
+  }
+}
+
+void MachODumper::printSymbols() {
+  ListScope Group(W, "Symbols");
+
+  error_code EC;
+  for (symbol_iterator SymI = Obj->begin_symbols(),
+                       SymE = Obj->end_symbols();
+                       SymI != SymE; SymI.increment(EC)) {
+    if (error(EC)) break;
+
+    printSymbol(SymI);
+  }
+}
+
+void MachODumper::printDynamicSymbols() {
+  ListScope Group(W, "DynamicSymbols");
+}
+
+void MachODumper::printSymbol(symbol_iterator SymI) {
+  error_code EC;
+
+  StringRef SymbolName;
+  if (SymI->getName(SymbolName))
+    SymbolName = "";
+
+  MachOSymbol Symbol;
+  getSymbol(Obj, SymI->getRawDataRefImpl(), Symbol);
+
+  StringRef SectionName;
+  section_iterator SecI(Obj->end_sections());
+  if (error(SymI->getSection(SecI)) ||
+      SecI == Obj->end_sections() ||
+      error(SecI->getName(SectionName)))
+    SectionName = "";
+
+  DictScope D(W, "Symbol");
+  W.printNumber("Name", SymbolName, Symbol.StringIndex);
+  if (Symbol.Type & N_STAB) {
+    W.printHex ("Type", "SymDebugTable", Symbol.Type);
+  } else {
+    W.printEnum("Type", Symbol.Type, makeArrayRef(MachOSymbolTypes));
+  }
+  W.printHex   ("Section", SectionName, Symbol.SectionIndex);
+  W.printEnum  ("RefType", static_cast<uint16_t>(Symbol.Flags & 0xF),
+                  makeArrayRef(MachOSymbolRefTypes));
+  W.printFlags ("Flags", static_cast<uint16_t>(Symbol.Flags & ~0xF),
+                  makeArrayRef(MachOSymbolFlags));
+  W.printHex   ("Value", Symbol.Value);
+}
+
+void MachODumper::printUnwindInfo() {
+  W.startLine() << "UnwindInfo not implemented.\n";
+}
diff --git a/tools/llvm-readobj/Makefile b/tools/llvm-readobj/Makefile
index a7a7de356303..1bb72955f08f 100644
--- a/tools/llvm-readobj/Makefile
+++ b/tools/llvm-readobj/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-readobj
-LINK_COMPONENTS := archive bitreader object
+LINK_COMPONENTS := archive bitreader object all-targets
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-readobj/ObjDumper.cpp b/tools/llvm-readobj/ObjDumper.cpp
new file mode 100644
index 000000000000..61f511740a23
--- /dev/null
+++ b/tools/llvm-readobj/ObjDumper.cpp
@@ -0,0 +1,33 @@
+//===-- ObjDumper.cpp - Base dumper class -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements ObjDumper.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjDumper.h"
+
+#include "Error.h"
+#include "StreamWriter.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+ObjDumper::ObjDumper(StreamWriter& Writer)
+  : W(Writer) {
+}
+
+ObjDumper::~ObjDumper() {
+}
+
+} // namespace llvm
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
new file mode 100644
index 000000000000..6918e28cb93a
--- /dev/null
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -0,0 +1,61 @@
+//===-- ObjDumper.h -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_READOBJ_OBJDUMPER_H
+#define LLVM_READOBJ_OBJDUMPER_H
+
+namespace llvm {
+
+namespace object {
+  class ObjectFile;
+}
+
+class error_code;
+
+template<typename T>
+class OwningPtr;
+
+class StreamWriter;
+
+class ObjDumper {
+public:
+  ObjDumper(StreamWriter& Writer);
+  virtual ~ObjDumper();
+
+  virtual void printFileHeaders() = 0;
+  virtual void printSections() = 0;
+  virtual void printRelocations() = 0;
+  virtual void printSymbols() = 0;
+  virtual void printDynamicSymbols() = 0;
+  virtual void printUnwindInfo() = 0;
+
+  // Only implemented for ELF at this time.
+  virtual void printDynamicTable() { }
+  virtual void printNeededLibraries() { }
+  virtual void printProgramHeaders() { }
+
+protected:
+  StreamWriter& W;
+};
+
+error_code createCOFFDumper(const object::ObjectFile *Obj,
+                            StreamWriter& Writer,
+                            OwningPtr<ObjDumper> &Result);
+
+error_code createELFDumper(const object::ObjectFile *Obj,
+                           StreamWriter& Writer,
+                           OwningPtr<ObjDumper> &Result);
+
+error_code createMachODumper(const object::ObjectFile *Obj,
+                             StreamWriter& Writer,
+                             OwningPtr<ObjDumper> &Result);
+
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-readobj/StreamWriter.cpp b/tools/llvm-readobj/StreamWriter.cpp
new file mode 100644
index 000000000000..871811233a65
--- /dev/null
+++ b/tools/llvm-readobj/StreamWriter.cpp
@@ -0,0 +1,79 @@
+#include "StreamWriter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
+#include <cctype>
+
+using namespace llvm::support;
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const HexNumber& Value) {
+  uint64_t N = Value.Value;
+  // Zero is a special case.
+  if (N == 0)
+    return OS << "0x0";
+
+  char NumberBuffer[20];
+  char *EndPtr = NumberBuffer + sizeof(NumberBuffer);
+  char *CurPtr = EndPtr;
+
+  while (N) {
+    uintptr_t X = N % 16;
+    *--CurPtr = (X < 10 ? '0' + X : 'A' + X - 10);
+    N /= 16;
+  }
+
+  OS << "0x";
+  return OS.write(CurPtr, EndPtr - CurPtr);
+}
+
+void StreamWriter::printBinaryImpl(StringRef Label, StringRef Str,
+                                   ArrayRef<uint8_t> Data, bool Block) {
+  if (Data.size() > 16)
+    Block = true;
+
+  if (Block) {
+    startLine() << Label;
+    if (Str.size() > 0)
+      OS << ": " << Str;
+    OS << " (\n";
+    for (size_t addr = 0, end = Data.size(); addr < end; addr += 16) {
+      startLine() << format("  %04" PRIX64 ": ", uint64_t(addr));
+      // Dump line of hex.
+      for (size_t i = 0; i < 16; ++i) {
+        if (i != 0 && i % 4 == 0)
+          OS << ' ';
+        if (addr + i < end)
+          OS << hexdigit((Data[addr + i] >> 4) & 0xF, false)
+             << hexdigit(Data[addr + i] & 0xF, false);
+        else
+          OS << "  ";
+      }
+      // Print ascii.
+      OS << "  |";
+      for (std::size_t i = 0; i < 16 && addr + i < end; ++i) {
+        if (std::isprint(Data[addr + i] & 0xFF))
+          OS << Data[addr + i];
+        else
+          OS << ".";
+      }
+      OS << "|\n";
+    }
+
+    startLine() << ")\n";
+  } else {
+    startLine() << Label << ":";
+    if (Str.size() > 0)
+      OS << " " << Str;
+    OS << " (";
+    for (size_t i = 0; i < Data.size(); ++i) {
+      if (i > 0)
+        OS << " ";
+
+      OS << format("%02X", static_cast<int>(Data[i]));
+    }
+    OS << ")\n";
+  }
+}
+
+} // namespace llvm
diff --git a/tools/llvm-readobj/StreamWriter.h b/tools/llvm-readobj/StreamWriter.h
new file mode 100644
index 000000000000..129f6e79336e
--- /dev/null
+++ b/tools/llvm-readobj/StreamWriter.h
@@ -0,0 +1,282 @@
+//===-- StreamWriter.h ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_READOBJ_STREAMWRITER_H
+#define LLVM_READOBJ_STREAMWRITER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvm::support;
+
+namespace llvm {
+
+template<typename T>
+struct EnumEntry {
+  StringRef Name;
+  T Value;
+};
+
+struct HexNumber {
+  // To avoid sign-extension we have to explicitly cast to the appropriate
+  // unsigned type. The overloads are here so that every type that is implicitly
+  // convertible to an integer (including enums and endian helpers) can be used
+  // without requiring type traits or call-site changes.
+  HexNumber(int8_t   Value) : Value(static_cast<uint8_t >(Value)) { }
+  HexNumber(int16_t  Value) : Value(static_cast<uint16_t>(Value)) { }
+  HexNumber(int32_t  Value) : Value(static_cast<uint32_t>(Value)) { }
+  HexNumber(int64_t  Value) : Value(static_cast<uint64_t>(Value)) { }
+  HexNumber(uint8_t  Value) : Value(Value) { }
+  HexNumber(uint16_t Value) : Value(Value) { }
+  HexNumber(uint32_t Value) : Value(Value) { }
+  HexNumber(uint64_t Value) : Value(Value) { }
+  uint64_t Value;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const HexNumber& Value);
+
+class StreamWriter {
+public:
+  StreamWriter(raw_ostream &OS)
+    : OS(OS)
+    , IndentLevel(0) {
+  }
+
+  void flush() {
+    OS.flush();
+  }
+
+  void indent(int Levels = 1) {
+    IndentLevel += Levels;
+  }
+
+  void unindent(int Levels = 1) {
+    IndentLevel = std::max(0, IndentLevel - Levels);
+  }
+
+  void printIndent() {
+    for (int i = 0; i < IndentLevel; ++i)
+      OS << "  ";
+  }
+
+  template<typename T>
+  HexNumber hex(T Value) {
+    return HexNumber(Value);
+  }
+
+  template<typename T, typename TEnum>
+  void printEnum(StringRef Label, T Value,
+                 ArrayRef<EnumEntry<TEnum> > EnumValues) {
+    StringRef Name;
+    bool Found = false;
+    for (size_t i = 0; i < EnumValues.size(); ++i) {
+      if (EnumValues[i].Value == Value) {
+        Name = EnumValues[i].Name;
+        Found = true;
+        break;
+      }
+    }
+
+    if (Found) {
+      startLine() << Label << ": " << Name << " (" << hex(Value) << ")\n";
+    } else {
+      startLine() << Label << ": " << hex(Value) << "\n";
+    }
+  }
+
+  template<typename T, typename TFlag>
+  void printFlags(StringRef Label, T Value, ArrayRef<EnumEntry<TFlag> > Flags,
+                  TFlag EnumMask = TFlag(0)) {
+    typedef EnumEntry<TFlag> FlagEntry;
+    typedef SmallVector<FlagEntry, 10> FlagVector;
+    FlagVector SetFlags;
+
+    for (typename ArrayRef<FlagEntry>::const_iterator I = Flags.begin(),
+                                                 E = Flags.end(); I != E; ++I) {
+      if (I->Value == 0)
+        continue;
+
+      bool IsEnum = (I->Value & EnumMask) != 0;
+      if ((!IsEnum && (Value & I->Value) == I->Value) ||
+          (IsEnum  && (Value & EnumMask) == I->Value)) {
+        SetFlags.push_back(*I);
+      }
+    }
+
+    std::sort(SetFlags.begin(), SetFlags.end(), &flagName<TFlag>);
+
+    startLine() << Label << " [ (" << hex(Value) << ")\n";
+    for (typename FlagVector::const_iterator I = SetFlags.begin(),
+                                             E = SetFlags.end();
+                                             I != E; ++I) {
+      startLine() << "  " << I->Name << " (" << hex(I->Value) << ")\n";
+    }
+    startLine() << "]\n";
+  }
+
+  template<typename T>
+  void printFlags(StringRef Label, T Value) {
+    startLine() << Label << " [ (" << hex(Value) << ")\n";
+    uint64_t Flag = 1;
+    uint64_t Curr = Value;
+    while (Curr > 0) {
+      if (Curr & 1)
+        startLine() << "  " << hex(Flag) << "\n";
+      Curr >>= 1;
+      Flag <<= 1;
+    }
+    startLine() << "]\n";
+  }
+
+  void printNumber(StringRef Label, uint64_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, uint32_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, uint16_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, uint8_t Value) {
+    startLine() << Label << ": " << unsigned(Value) << "\n";
+  }
+
+  void printNumber(StringRef Label, int64_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, int32_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, int16_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, int8_t Value) {
+    startLine() << Label << ": " << int(Value) << "\n";
+  }
+
+  template<typename T>
+  void printHex(StringRef Label, T Value) {
+    startLine() << Label << ": " << hex(Value) << "\n";
+  }
+
+  template<typename T>
+  void printHex(StringRef Label, StringRef Str, T Value) {
+    startLine() << Label << ": " << Str << " (" << hex(Value) << ")\n";
+  }
+
+  void printString(StringRef Label, StringRef Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printString(StringRef Label, const std::string &Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  template<typename T>
+  void printNumber(StringRef Label, StringRef Str, T Value) {
+    startLine() << Label << ": " << Str << " (" << Value << ")\n";
+  }
+
+  void printBinary(StringRef Label, StringRef Str, ArrayRef<uint8_t> Value) {
+    printBinaryImpl(Label, Str, Value, false);
+  }
+
+  void printBinary(StringRef Label, StringRef Str, ArrayRef<char> Value) {
+    ArrayRef<uint8_t> V(reinterpret_cast<const uint8_t*>(Value.data()),
+                        Value.size());
+    printBinaryImpl(Label, Str, V, false);
+  }
+
+  void printBinary(StringRef Label, ArrayRef<uint8_t> Value) {
+    printBinaryImpl(Label, StringRef(), Value, false);
+  }
+
+  void printBinary(StringRef Label, ArrayRef<char> Value) {
+    ArrayRef<uint8_t> V(reinterpret_cast<const uint8_t*>(Value.data()),
+                        Value.size());
+    printBinaryImpl(Label, StringRef(), V, false);
+  }
+
+  void printBinary(StringRef Label, StringRef Value) {
+    ArrayRef<uint8_t> V(reinterpret_cast<const uint8_t*>(Value.data()),
+                        Value.size());
+    printBinaryImpl(Label, StringRef(), V, false);
+  }
+
+  void printBinaryBlock(StringRef Label, StringRef Value) {
+    ArrayRef<uint8_t> V(reinterpret_cast<const uint8_t*>(Value.data()),
+                        Value.size());
+    printBinaryImpl(Label, StringRef(), V, true);
+  }
+
+  raw_ostream& startLine() {
+    printIndent();
+    return OS;
+  }
+
+  raw_ostream& getOStream() {
+    return OS;
+  }
+
+private:
+  template<typename T>
+  static bool flagName(const EnumEntry<T>& lhs, const EnumEntry<T>& rhs) {
+    return lhs.Name < rhs.Name;
+  }
+
+  void printBinaryImpl(StringRef Label, StringRef Str, ArrayRef<uint8_t> Value,
+                       bool Block);
+
+  raw_ostream &OS;
+  int IndentLevel;
+};
+
+struct DictScope {
+  DictScope(StreamWriter& W, StringRef N) : W(W) {
+    W.startLine() << N << " {\n";
+    W.indent();
+  }
+
+  ~DictScope() {
+    W.unindent();
+    W.startLine() << "}\n";
+  }
+
+  StreamWriter& W;
+};
+
+struct ListScope {
+  ListScope(StreamWriter& W, StringRef N) : W(W) {
+    W.startLine() << N << " [\n";
+    W.indent();
+  }
+
+  ~ListScope() {
+    W.unindent();
+    W.startLine() << "]\n";
+  }
+
+  StreamWriter& W;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 8f0917fc91a5..7a4b4e4431c2 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -21,261 +21,273 @@
 
 #include "llvm-readobj.h"
 
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Object/ELF.h"
+#include "Error.h"
+#include "ObjDumper.h"
+#include "StreamWriter.h"
+
+#include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/system_error.h"
 
-using namespace llvm;
-using namespace llvm::object;
-
-static cl::opt<std::string>
-InputFilename(cl::Positional, cl::desc("<input object>"), cl::init(""));
-
-static void dumpSymbolHeader() {
-  outs() << format("  %-32s", (const char*)"Name")
-         << format("  %-4s", (const char*)"Type")
-         << format("  %-16s", (const char*)"Address")
-         << format("  %-16s", (const char*)"Size")
-         << format("  %-16s", (const char*)"FileOffset")
-         << format("  %-26s", (const char*)"Flags")
-         << "\n";
-}
+#include <string>
 
-static void dumpSectionHeader() {
-  outs() << format("  %-24s", (const char*)"Name")
-         << format("  %-16s", (const char*)"Address")
-         << format("  %-16s", (const char*)"Size")
-         << format("  %-8s", (const char*)"Align")
-         << format("  %-26s", (const char*)"Flags")
-         << "\n";
-}
 
-static const char *getTypeStr(SymbolRef::Type Type) {
-  switch (Type) {
-  case SymbolRef::ST_Unknown: return "?";
-  case SymbolRef::ST_Data: return "DATA";
-  case SymbolRef::ST_Debug: return "DBG";
-  case SymbolRef::ST_File: return "FILE";
-  case SymbolRef::ST_Function: return "FUNC";
-  case SymbolRef::ST_Other: return "-";
-  }
-  return "INV";
-}
+using namespace llvm;
+using namespace llvm::object;
 
-static std::string getSymbolFlagStr(uint32_t Flags) {
-  std::string result;
-  if (Flags & SymbolRef::SF_Undefined)
-    result += "undef,";
-  if (Flags & SymbolRef::SF_Global)
-    result += "global,";
-  if (Flags & SymbolRef::SF_Weak)
-    result += "weak,";
-  if (Flags & SymbolRef::SF_Absolute)
-    result += "absolute,";
-  if (Flags & SymbolRef::SF_ThreadLocal)
-    result += "threadlocal,";
-  if (Flags & SymbolRef::SF_Common)
-    result += "common,";
-  if (Flags & SymbolRef::SF_FormatSpecific)
-    result += "formatspecific,";
-
-  // Remove trailing comma
-  if (result.size() > 0) {
-    result.erase(result.size() - 1);
-  }
-  return result;
+namespace opts {
+  cl::list<std::string> InputFilenames(cl::Positional,
+    cl::desc("<input object files>"),
+    cl::ZeroOrMore);
+
+  // -file-headers, -h
+  cl::opt<bool> FileHeaders("file-headers",
+    cl::desc("Display file headers "));
+  cl::alias FileHeadersShort("h",
+    cl::desc("Alias for --file-headers"),
+    cl::aliasopt(FileHeaders));
+
+  // -sections, -s
+  cl::opt<bool> Sections("sections",
+    cl::desc("Display all sections."));
+  cl::alias SectionsShort("s",
+    cl::desc("Alias for --sections"),
+    cl::aliasopt(Sections));
+
+  // -section-relocations, -sr
+  cl::opt<bool> SectionRelocations("section-relocations",
+    cl::desc("Display relocations for each section shown."));
+  cl::alias SectionRelocationsShort("sr",
+    cl::desc("Alias for --section-relocations"),
+    cl::aliasopt(SectionRelocations));
+
+  // -section-symbols, -st
+  cl::opt<bool> SectionSymbols("section-symbols",
+    cl::desc("Display symbols for each section shown."));
+  cl::alias SectionSymbolsShort("st",
+    cl::desc("Alias for --section-symbols"),
+    cl::aliasopt(SectionSymbols));
+
+  // -section-data, -sd
+  cl::opt<bool> SectionData("section-data",
+    cl::desc("Display section data for each section shown."));
+  cl::alias SectionDataShort("sd",
+    cl::desc("Alias for --section-data"),
+    cl::aliasopt(SectionData));
+
+  // -relocations, -r
+  cl::opt<bool> Relocations("relocations",
+    cl::desc("Display the relocation entries in the file"));
+  cl::alias RelocationsShort("r",
+    cl::desc("Alias for --relocations"),
+    cl::aliasopt(Relocations));
+
+  // -symbols, -t
+  cl::opt<bool> Symbols("symbols",
+    cl::desc("Display the symbol table"));
+  cl::alias SymbolsShort("t",
+    cl::desc("Alias for --symbols"),
+    cl::aliasopt(Symbols));
+
+  // -dyn-symbols, -dt
+  cl::opt<bool> DynamicSymbols("dyn-symbols",
+    cl::desc("Display the dynamic symbol table"));
+  cl::alias DynamicSymbolsShort("dt",
+    cl::desc("Alias for --dyn-symbols"),
+    cl::aliasopt(DynamicSymbols));
+
+  // -unwind, -u
+  cl::opt<bool> UnwindInfo("unwind",
+    cl::desc("Display unwind information"));
+  cl::alias UnwindInfoShort("u",
+    cl::desc("Alias for --unwind"),
+    cl::aliasopt(UnwindInfo));
+
+  // -dynamic-table
+  cl::opt<bool> DynamicTable("dynamic-table",
+    cl::desc("Display the ELF .dynamic section table"));
+
+  // -needed-libs
+  cl::opt<bool> NeededLibraries("needed-libs",
+    cl::desc("Display the needed libraries"));
+
+  // -program-headers
+  cl::opt<bool> ProgramHeaders("program-headers",
+    cl::desc("Display ELF program headers"));
+
+  // -expand-relocs
+  cl::opt<bool> ExpandRelocs("expand-relocs",
+    cl::desc("Expand each shown relocation to multiple lines"));
+} // namespace opts
+
+namespace llvm {
+
+bool error(error_code EC) {
+  if (!EC)
+    return false;
+
+  outs() << "\nError reading file: " << EC.message() << ".\n";
+  outs().flush();
+  return true;
 }
 
-static void checkError(error_code ec, const char *msg) {
-  if (ec)
-    report_fatal_error(std::string(msg) + ": " + ec.message());
+bool relocAddressLess(RelocationRef a, RelocationRef b) {
+  uint64_t a_addr, b_addr;
+  if (error(a.getAddress(a_addr))) return false;
+  if (error(b.getAddress(b_addr))) return false;
+  return a_addr < b_addr;
 }
 
-static std::string getSectionFlagStr(const SectionRef &Section) {
-  const struct {
-    error_code (SectionRef::*MemF)(bool &) const;
-    const char *FlagStr, *ErrorStr;
-  } Work[] =
-      {{ &SectionRef::isText, "text,", "Section.isText() failed" },
-       { &SectionRef::isData, "data,", "Section.isData() failed" },
-       { &SectionRef::isBSS, "bss,", "Section.isBSS() failed"  },
-       { &SectionRef::isRequiredForExecution, "required,",
-         "Section.isRequiredForExecution() failed" },
-       { &SectionRef::isVirtual, "virtual,", "Section.isVirtual() failed" },
-       { &SectionRef::isZeroInit, "zeroinit,", "Section.isZeroInit() failed" },
-       { &SectionRef::isReadOnlyData, "rodata,",
-         "Section.isReadOnlyData() failed" }};
-
-  std::string result;
-  for (uint32_t I = 0; I < sizeof(Work)/sizeof(*Work); ++I) {
-    bool B;
-    checkError((Section.*Work[I].MemF)(B), Work[I].ErrorStr);
-    if (B)
-      result += Work[I].FlagStr;
-  }
+} // namespace llvm
 
-  // Remove trailing comma
-  if (result.size() > 0) {
-    result.erase(result.size() - 1);
-  }
-  return result;
-}
 
-static void
-dumpSymbol(const SymbolRef &Sym, const ObjectFile *obj, bool IsDynamic) {
-  StringRef Name;
-  SymbolRef::Type Type;
-  uint32_t Flags;
-  uint64_t Address;
-  uint64_t Size;
-  uint64_t FileOffset;
-  checkError(Sym.getName(Name), "SymbolRef.getName() failed");
-  checkError(Sym.getAddress(Address), "SymbolRef.getAddress() failed");
-  checkError(Sym.getSize(Size), "SymbolRef.getSize() failed");
-  checkError(Sym.getFileOffset(FileOffset),
-             "SymbolRef.getFileOffset() failed");
-  checkError(Sym.getType(Type), "SymbolRef.getType() failed");
-  checkError(Sym.getFlags(Flags), "SymbolRef.getFlags() failed");
-  std::string FullName = Name;
-
-  // If this is a dynamic symbol from an ELF object, append
-  // the symbol's version to the name.
-  if (IsDynamic && obj->isELF()) {
-    StringRef Version;
-    bool IsDefault;
-    GetELFSymbolVersion(obj, Sym, Version, IsDefault);
-    if (!Version.empty()) {
-      FullName += (IsDefault ? "@@" : "@");
-      FullName += Version;
-    }
-  }
+static void reportError(StringRef Input, error_code EC) {
+  if (Input == "-")
+    Input = "<stdin>";
 
-  // format() can't handle StringRefs
-  outs() << format("  %-32s", FullName.c_str())
-         << format("  %-4s", getTypeStr(Type))
-         << format("  %16" PRIx64, Address)
-         << format("  %16" PRIx64, Size)
-         << format("  %16" PRIx64, FileOffset)
-         << "  " << getSymbolFlagStr(Flags)
-         << "\n";
+  errs() << Input << ": " << EC.message() << "\n";
+  errs().flush();
 }
 
-static void dumpStaticSymbol(const SymbolRef &Sym, const ObjectFile *obj) {
-  return dumpSymbol(Sym, obj, false);
-}
+static void reportError(StringRef Input, StringRef Message) {
+  if (Input == "-")
+    Input = "<stdin>";
 
-static void dumpDynamicSymbol(const SymbolRef &Sym, const ObjectFile *obj) {
-  return dumpSymbol(Sym, obj, true);
+  errs() << Input << ": " << Message << "\n";
 }
 
-static void dumpSection(const SectionRef &Section, const ObjectFile *obj) {
-  StringRef Name;
-  checkError(Section.getName(Name), "SectionRef::getName() failed");
-  uint64_t Addr, Size, Align;
-  checkError(Section.getAddress(Addr), "SectionRef::getAddress() failed");
-  checkError(Section.getSize(Size), "SectionRef::getSize() failed");
-  checkError(Section.getAlignment(Align), "SectionRef::getAlignment() failed");
-  outs() << format("  %-24s", std::string(Name).c_str())
-         << format("  %16" PRIx64, Addr)
-         << format("  %16" PRIx64, Size)
-         << format("  %8" PRIx64, Align)
-         << "  " << getSectionFlagStr(Section)
-         << "\n";
+/// @brief Creates an format-specific object file dumper.
+static error_code createDumper(const ObjectFile *Obj,
+                               StreamWriter &Writer,
+                               OwningPtr<ObjDumper> &Result) {
+  if (!Obj)
+    return readobj_error::unsupported_file_format;
+
+  if (Obj->isCOFF())
+    return createCOFFDumper(Obj, Writer, Result);
+  if (Obj->isELF())
+    return createELFDumper(Obj, Writer, Result);
+  if (Obj->isMachO())
+    return createMachODumper(Obj, Writer, Result);
+
+  return readobj_error::unsupported_obj_file_format;
 }
 
-static void dumpLibrary(const LibraryRef &lib, const ObjectFile *obj) {
-  StringRef path;
-  lib.getPath(path);
-  outs() << "  " << path << "\n";
-}
 
-template<typename Iterator, typename Func>
-static void dump(const ObjectFile *obj, Func f, Iterator begin, Iterator end,
-                 const char *errStr) {
-  error_code ec;
-  uint32_t count = 0;
-  Iterator it = begin, ie = end;
-  while (it != ie) {
-    f(*it, obj);
-    it.increment(ec);
-    if (ec)
-      report_fatal_error(errStr);
-    ++count;
+/// @brief Dumps the specified object file.
+static void dumpObject(const ObjectFile *Obj) {
+  StreamWriter Writer(outs());
+  OwningPtr<ObjDumper> Dumper;
+  if (error_code EC = createDumper(Obj, Writer, Dumper)) {
+    reportError(Obj->getFileName(), EC);
+    return;
   }
-  outs() << "  Total: " << count << "\n\n";
-}
 
-static void dumpHeaders(const ObjectFile *obj) {
-  outs() << "File Format : " << obj->getFileFormatName() << "\n";
-  outs() << "Arch        : "
-         << Triple::getArchTypeName((llvm::Triple::ArchType)obj->getArch())
+  outs() << '\n';
+  outs() << "File: " << Obj->getFileName() << "\n";
+  outs() << "Format: " << Obj->getFileFormatName() << "\n";
+  outs() << "Arch: "
+         << Triple::getArchTypeName((llvm::Triple::ArchType)Obj->getArch())
          << "\n";
-  outs() << "Address Size: " << (8*obj->getBytesInAddress()) << " bits\n";
-  outs() << "Load Name   : " << obj->getLoadName() << "\n";
-  outs() << "\n";
+  outs() << "AddressSize: " << (8*Obj->getBytesInAddress()) << "bit\n";
+  if (Obj->isELF())
+    outs() << "LoadName: " << Obj->getLoadName() << "\n";
+
+  if (opts::FileHeaders)
+    Dumper->printFileHeaders();
+  if (opts::Sections)
+    Dumper->printSections();
+  if (opts::Relocations)
+    Dumper->printRelocations();
+  if (opts::Symbols)
+    Dumper->printSymbols();
+  if (opts::DynamicSymbols)
+    Dumper->printDynamicSymbols();
+  if (opts::UnwindInfo)
+    Dumper->printUnwindInfo();
+  if (opts::DynamicTable)
+    Dumper->printDynamicTable();
+  if (opts::NeededLibraries)
+    Dumper->printNeededLibraries();
+  if (opts::ProgramHeaders)
+    Dumper->printProgramHeaders();
 }
 
-int main(int argc, char** argv) {
-  error_code ec;
-  sys::PrintStackTraceOnErrorSignal();
-  PrettyStackTraceProgram X(argc, argv);
 
-  cl::ParseCommandLineOptions(argc, argv,
-                              "LLVM Object Reader\n");
+/// @brief Dumps each object file in \a Arc;
+static void dumpArchive(const Archive *Arc) {
+  for (Archive::child_iterator ArcI = Arc->begin_children(),
+                               ArcE = Arc->end_children();
+                               ArcI != ArcE; ++ArcI) {
+    OwningPtr<Binary> child;
+    if (error_code EC = ArcI->getAsBinary(child)) {
+      // Ignore non-object files.
+      if (EC != object_error::invalid_file_type)
+        reportError(Arc->getFileName(), EC.message());
+      continue;
+    }
 
-  if (InputFilename.empty()) {
-    errs() << "Please specify an input filename\n";
-    return 1;
+    if (ObjectFile *Obj = dyn_cast<ObjectFile>(child.get()))
+      dumpObject(Obj);
+    else
+      reportError(Arc->getFileName(), readobj_error::unrecognized_file_format);
   }
+}
 
-  // Open the object file
-  OwningPtr<MemoryBuffer> File;
-  if (MemoryBuffer::getFile(InputFilename, File)) {
-    errs() << InputFilename << ": Open failed\n";
-    return 1;
+
+/// @brief Opens \a File and dumps it.
+static void dumpInput(StringRef File) {
+  // If file isn't stdin, check that it exists.
+  if (File != "-" && !sys::fs::exists(File)) {
+    reportError(File, readobj_error::file_not_found);
+    return;
   }
 
-  OwningPtr<ObjectFile> o(ObjectFile::createObjectFile(File.take()));
-  ObjectFile *obj = o.get();
-  if (!obj) {
-    errs() << InputFilename << ": Object type not recognized\n";
+  // Attempt to open the binary.
+  OwningPtr<Binary> Binary;
+  if (error_code EC = createBinary(File, Binary)) {
+    reportError(File, EC);
+    return;
   }
 
-  dumpHeaders(obj);
+  if (Archive *Arc = dyn_cast<Archive>(Binary.get()))
+    dumpArchive(Arc);
+  else if (ObjectFile *Obj = dyn_cast<ObjectFile>(Binary.get()))
+    dumpObject(Obj);
+  else
+    reportError(File, readobj_error::unrecognized_file_format);
+}
 
-  outs() << "Symbols:\n";
-  dumpSymbolHeader();
-  dump(obj, dumpStaticSymbol, obj->begin_symbols(), obj->end_symbols(),
-       "Symbol iteration failed");
 
-  outs() << "Dynamic Symbols:\n";
-  dumpSymbolHeader();
-  dump(obj, dumpDynamicSymbol, obj->begin_dynamic_symbols(),
-       obj->end_dynamic_symbols(), "Symbol iteration failed");
+int main(int argc, const char *argv[]) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;
 
-  outs() << "Sections:\n";
-  dumpSectionHeader();
-  dump(obj, &dumpSection, obj->begin_sections(), obj->end_sections(),
-       "Section iteration failed");
+  // Initialize targets.
+  llvm::InitializeAllTargetInfos();
 
-  if (obj->isELF()) {
-    if (ErrorOr<void> e = dumpELFDynamicTable(obj, outs()))
-      ;
-    else
-      errs() << "InputFilename" << ": " << error_code(e).message() << "\n";
-  }
+  // Register the target printer for --version.
+  cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
+
+  cl::ParseCommandLineOptions(argc, argv, "LLVM Object Reader\n");
 
-  outs() << "Libraries needed:\n";
-  dump(obj, &dumpLibrary, obj->begin_libraries_needed(),
-       obj->end_libraries_needed(), "Needed libraries iteration failed");
+  // Default to stdin if no filename is specified.
+  if (opts::InputFilenames.size() == 0)
+    opts::InputFilenames.push_back("-");
+
+  std::for_each(opts::InputFilenames.begin(), opts::InputFilenames.end(),
+                dumpInput);
 
   return 0;
 }
-
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index cf492b2a8da5..3f756106c978 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -1,4 +1,4 @@
-//===- llvm-readobj.h - Dump contents of an Object File -------------------===//
+//===-- llvm-readobj.h ----------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,13 +10,37 @@
 #ifndef LLVM_TOOLS_READ_OBJ_H
 #define LLVM_TOOLS_READ_OBJ_H
 
-#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/CommandLine.h"
+#include <string>
 
 namespace llvm {
-namespace object { class ObjectFile; }
-class raw_ostream;
+  namespace object {
+    class RelocationRef;
+  }
 
-ErrorOr<void> dumpELFDynamicTable(object::ObjectFile *O, raw_ostream &OS);
-} // end namespace llvm
+  class error_code;
+
+  // Various helper functions.
+  bool error(error_code ec);
+  bool relocAddressLess(object::RelocationRef A,
+                        object::RelocationRef B);
+} // namespace llvm
+
+namespace opts {
+  extern llvm::cl::list<std::string> InputFilenames;
+  extern llvm::cl::opt<bool> FileHeaders;
+  extern llvm::cl::opt<bool> Sections;
+  extern llvm::cl::opt<bool> SectionRelocations;
+  extern llvm::cl::opt<bool> SectionSymbols;
+  extern llvm::cl::opt<bool> SectionData;
+  extern llvm::cl::opt<bool> Relocations;
+  extern llvm::cl::opt<bool> Symbols;
+  extern llvm::cl::opt<bool> DynamicSymbols;
+  extern llvm::cl::opt<bool> UnwindInfo;
+  extern llvm::cl::opt<bool> ExpandRelocs;
+} // namespace opts
+
+#define LLVM_READOBJ_ENUM_ENT(ns, enum) \
+  { #enum, ns::enum }
 
 #endif
diff --git a/tools/llvm-stress/Makefile b/tools/llvm-stress/Makefile
index 90d57c3fa98a..8767cbe41767 100644
--- a/tools/llvm-stress/Makefile
+++ b/tools/llvm-stress/Makefile
@@ -10,7 +10,7 @@
 LEVEL := ../..
 TOOLNAME := llvm-stress
 LINK_COMPONENTS := object
-LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo
+LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts ipo
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp
index 86ea34bff6f1..29d91a0e92a3 100644
--- a/tools/llvm-symbolizer/LLVMSymbolize.cpp
+++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLVMSymbolize.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Path.h"
@@ -186,6 +187,10 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
   return ss.str();
 }
 
+void LLVMSymbolizer::flush() {
+  DeleteContainerSeconds(Modules);
+}
+
 // Returns true if the object endianness is known.
 static bool getObjectEndianness(const ObjectFile *Obj, bool &IsLittleEndian) {
   // FIXME: Implement this when libLLVMObject allows to do it easily.
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h
index e6220aa4ce9c..0733dfbbc52e 100644
--- a/tools/llvm-symbolizer/LLVMSymbolize.h
+++ b/tools/llvm-symbolizer/LLVMSymbolize.h
@@ -50,6 +50,7 @@ class LLVMSymbolizer {
   symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset);
   std::string
   symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset);
+  void flush();
 private:
   ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName);
   std::string printDILineInfo(DILineInfo LineInfo) const;
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
index d039ec691f1e..0cafffaf7126 100644
--- a/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -70,12 +70,25 @@ static bool parseCommand(bool &IsData, std::string &ModuleName,
     // If no cmd, assume it's CODE.
     IsData = false;
   }
-  // FIXME: Handle case when filename is given in quotes.
-  if (char *FilePath = strtok(pos, kDelimiters)) {
-    ModuleName = FilePath;
-    if (char *OffsetStr = strtok((char *)0, kDelimiters))
-      ModuleOffsetStr = OffsetStr;
+  // Skip delimiters and parse input filename.
+  pos += strspn(pos, kDelimiters);
+  if (*pos == '"' || *pos == '\'') {
+    char quote = *pos;
+    pos++;
+    char *end = strchr(pos, quote);
+    if (end == 0)
+      return false;
+    ModuleName = std::string(pos, end - pos);
+    pos = end + 1;
+  } else {
+    int name_length = strcspn(pos, kDelimiters);
+    ModuleName = std::string(pos, name_length);
+    pos += name_length;
   }
+  // Skip delimiters and parse module offset.
+  pos += strspn(pos, kDelimiters);
+  int offset_length = strcspn(pos, kDelimiters);
+  ModuleOffsetStr = std::string(pos, offset_length);
   if (StringRef(ModuleOffsetStr).getAsInteger(0, ModuleOffset))
     return false;
   return true;
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index cf7ffe28009e..e7c83f94f536 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/ObjCARC.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -397,6 +398,10 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
 
   formatted_raw_ostream Out(out);
 
+  // If the bitcode files contain ARC code and were compiled with optimization,
+  // the ObjCARCContractPass must be run, so do it unconditionally here.
+  codeGenPasses.add(createObjCARCContractPass());
+
   if (_target->addPassesToEmitFile(codeGenPasses, Out,
                                    TargetMachine::CGFT_ObjectFile)) {
     errMsg = "target file type not supported";
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index ff67769192c3..d805f49f9a90 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -743,7 +743,7 @@ namespace {
           AddValueSymbols(Inst.getOperand(i).getExpr());
     }
     virtual void EmitLabel(MCSymbol *Symbol) {
-      Symbol->setSection(*getCurrentSection());
+      Symbol->setSection(*getCurrentSection().first);
       markDefined(*Symbol);
     }
     virtual void EmitDebugLabel(MCSymbol *Symbol) {
@@ -771,7 +771,8 @@ namespace {
     virtual void EmitBundleUnlock() {}
 
     // Noop calls.
-    virtual void ChangeSection(const MCSection *Section) {}
+    virtual void ChangeSection(const MCSection *Section,
+                               const MCExpr *Subsection) {}
     virtual void InitToTextSection() {}
     virtual void InitSections() {}
     virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
diff --git a/utils/obj2yaml/CMakeLists.txt b/tools/obj2yaml/CMakeLists.txt
similarity index 100%
rename from utils/obj2yaml/CMakeLists.txt
rename to tools/obj2yaml/CMakeLists.txt
diff --git a/utils/obj2yaml/Makefile b/tools/obj2yaml/Makefile
similarity index 93%
rename from utils/obj2yaml/Makefile
rename to tools/obj2yaml/Makefile
index 5b96bdd5b9d8..95f393ddd6f4 100644
--- a/utils/obj2yaml/Makefile
+++ b/tools/obj2yaml/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TOOLNAME = obj2yaml
-USEDLIBS = LLVMObject.a LLVMSupport.a
+LINK_COMPONENTS := object
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/utils/obj2yaml/coff2yaml.cpp b/tools/obj2yaml/coff2yaml.cpp
similarity index 67%
rename from utils/obj2yaml/coff2yaml.cpp
rename to tools/obj2yaml/coff2yaml.cpp
index f0241d931e60..4a8ffbd81e20 100644
--- a/utils/obj2yaml/coff2yaml.cpp
+++ b/tools/obj2yaml/coff2yaml.cpp
@@ -10,6 +10,7 @@
 #include "obj2yaml.h"
 #include "llvm/Object/COFF.h"
 
+using namespace llvm;
 
 template <typename One, typename Two>
 struct pod_pair { // I'd much rather use std::pair, but it's not a POD
@@ -17,8 +18,8 @@ struct pod_pair { // I'd much rather use std::pair, but it's not a POD
   Two second;
 };
 
-#define STRING_PAIR(x)  {llvm::COFF::x, #x}
-static const pod_pair<llvm::COFF::MachineTypes, const char *> 
+#define STRING_PAIR(x)  {COFF::x, #x}
+static const pod_pair<COFF::MachineTypes, const char *>
 MachineTypePairs [] = {
   STRING_PAIR(IMAGE_FILE_MACHINE_UNKNOWN),
   STRING_PAIR(IMAGE_FILE_MACHINE_AM33),
@@ -43,7 +44,7 @@ MachineTypePairs [] = {
   STRING_PAIR(IMAGE_FILE_MACHINE_WCEMIPSV2)
 };
 
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+static const pod_pair<COFF::SectionCharacteristics, const char *>
 SectionCharacteristicsPairs1 [] = {
   STRING_PAIR(IMAGE_SCN_TYPE_NO_PAD),
   STRING_PAIR(IMAGE_SCN_CNT_CODE),
@@ -60,7 +61,7 @@ SectionCharacteristicsPairs1 [] = {
   STRING_PAIR(IMAGE_SCN_MEM_PRELOAD)
 };
 
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+static const pod_pair<COFF::SectionCharacteristics, const char *>
 SectionCharacteristicsPairsAlignment [] = {
   STRING_PAIR(IMAGE_SCN_ALIGN_1BYTES),
   STRING_PAIR(IMAGE_SCN_ALIGN_2BYTES),
@@ -78,7 +79,7 @@ SectionCharacteristicsPairsAlignment [] = {
   STRING_PAIR(IMAGE_SCN_ALIGN_8192BYTES)
 };
 
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+static const pod_pair<COFF::SectionCharacteristics, const char *>
 SectionCharacteristicsPairs2 [] = {
   STRING_PAIR(IMAGE_SCN_LNK_NRELOC_OVFL),
   STRING_PAIR(IMAGE_SCN_MEM_DISCARDABLE),
@@ -89,8 +90,8 @@ SectionCharacteristicsPairs2 [] = {
   STRING_PAIR(IMAGE_SCN_MEM_READ),
   STRING_PAIR(IMAGE_SCN_MEM_WRITE)
 };
-  
-static const pod_pair<llvm::COFF::SymbolBaseType, const char *> 
+
+static const pod_pair<COFF::SymbolBaseType, const char *>
 SymbolBaseTypePairs [] = {
   STRING_PAIR(IMAGE_SYM_TYPE_NULL),
   STRING_PAIR(IMAGE_SYM_TYPE_VOID),
@@ -110,15 +111,15 @@ SymbolBaseTypePairs [] = {
   STRING_PAIR(IMAGE_SYM_TYPE_DWORD)
 };
 
-static const pod_pair<llvm::COFF::SymbolComplexType, const char *> 
+static const pod_pair<COFF::SymbolComplexType, const char *>
 SymbolComplexTypePairs [] = {
   STRING_PAIR(IMAGE_SYM_DTYPE_NULL),
   STRING_PAIR(IMAGE_SYM_DTYPE_POINTER),
   STRING_PAIR(IMAGE_SYM_DTYPE_FUNCTION),
   STRING_PAIR(IMAGE_SYM_DTYPE_ARRAY),
 };
-  
-static const pod_pair<llvm::COFF::SymbolStorageClass, const char *> 
+
+static const pod_pair<COFF::SymbolStorageClass, const char *>
 SymbolStorageClassPairs [] = {
   STRING_PAIR(IMAGE_SYM_CLASS_END_OF_FUNCTION),
   STRING_PAIR(IMAGE_SYM_CLASS_NULL),
@@ -149,7 +150,7 @@ SymbolStorageClassPairs [] = {
   STRING_PAIR(IMAGE_SYM_CLASS_CLR_TOKEN),
 };
 
-static const pod_pair<llvm::COFF::RelocationTypeX86, const char *> 
+static const pod_pair<COFF::RelocationTypeX86, const char *>
 RelocationTypeX86Pairs [] = {
   STRING_PAIR(IMAGE_REL_I386_ABSOLUTE),
   STRING_PAIR(IMAGE_REL_I386_DIR16),
@@ -181,7 +182,7 @@ RelocationTypeX86Pairs [] = {
   STRING_PAIR(IMAGE_REL_AMD64_SSPAN32)
 };
 
-static const pod_pair<llvm::COFF::RelocationTypesARM, const char *> 
+static const pod_pair<COFF::RelocationTypesARM, const char *>
 RelocationTypesARMPairs [] = {
   STRING_PAIR(IMAGE_REL_ARM_ABSOLUTE),
   STRING_PAIR(IMAGE_REL_ARM_ADDR32),
@@ -201,13 +202,10 @@ RelocationTypesARMPairs [] = {
 };
 #undef STRING_PAIR
 
-
-static const char endl = '\n';
-
 namespace yaml {  // COFF-specific yaml-writing specific routines
 
-static llvm::raw_ostream &writeName(llvm::raw_ostream &Out, 
-                             const char *Name, std::size_t NameSize) {
+static raw_ostream &writeName(raw_ostream &Out,
+                              const char *Name, std::size_t NameSize) {
   for (std::size_t i = 0; i < NameSize; ++i) {
     if (!Name[i]) break;
     Out << Name[i];
@@ -217,8 +215,9 @@ static llvm::raw_ostream &writeName(llvm::raw_ostream &Out,
 
 // Given an array of pod_pair<enum, const char *>, write all enums that match
 template <typename T, std::size_t N>
-static llvm::raw_ostream &writeBitMask(llvm::raw_ostream &Out, 
-              const pod_pair<T, const char *> (&Arr)[N], unsigned long Val) {
+static raw_ostream &writeBitMask(raw_ostream &Out,
+                                 const pod_pair<T, const char *> (&Arr)[N],
+                                 unsigned long Val) {
   for (std::size_t i = 0; i < N; ++i)
     if (Val & Arr[i].first)
       Out << Arr[i].second << ", ";
@@ -229,8 +228,8 @@ static llvm::raw_ostream &writeBitMask(llvm::raw_ostream &Out,
 
 // Given an array of pod_pair<enum, const char *>, look up a value
 template <typename T, std::size_t N>
-const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N], 
-                           unsigned long Val, const char *NotFound = NULL) {
+const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N],
+                       unsigned long Val, const char *NotFound = NULL) {
   T n = static_cast<T>(Val);
   for (std::size_t i = 0; i < N; ++i)
     if (n == Arr[i].first)
@@ -239,123 +238,128 @@ const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N],
 }
 
 
-static llvm::raw_ostream &yamlCOFFHeader(
-          const llvm::object::coff_file_header *Header,llvm::raw_ostream &Out) {
+static raw_ostream &yamlCOFFHeader(const object::coff_file_header *Header,
+                                   raw_ostream &Out) {
 
-  Out << "header: !Header" << endl;
+  Out << "header: !Header\n";
   Out << "  Machine: ";
   Out << nameLookup(MachineTypePairs, Header->Machine, "# Unknown_MachineTypes")
       << " # (";
-  return yaml::writeHexNumber(Out, Header->Machine) << ")" << endl << endl;
+  return yaml::writeHexNumber(Out, Header->Machine) << ")\n\n";
 }
 
 
-static llvm::raw_ostream &yamlCOFFSections(llvm::object::COFFObjectFile &Obj, 
-                            std::size_t NumSections, llvm::raw_ostream &Out) {
-  llvm::error_code ec;
-  Out << "sections:" << endl;
-  for (llvm::object::section_iterator iter = Obj.begin_sections(); 
-                             iter != Obj.end_sections(); iter.increment(ec)) {
-    const llvm::object::coff_section *sect = Obj.getCOFFSection(iter);
-  
-    Out << "  - !Section" << endl;
+static raw_ostream &yamlCOFFSections(object::COFFObjectFile &Obj,
+                                     std::size_t NumSections,
+                                     raw_ostream &Out) {
+  error_code ec;
+  Out << "sections:\n";
+  for (object::section_iterator iter = Obj.begin_sections();
+       iter != Obj.end_sections(); iter.increment(ec)) {
+    const object::coff_section *sect = Obj.getCOFFSection(iter);
+
+    Out << "  - !Section\n";
     Out << "    Name: ";
-    yaml::writeName(Out, sect->Name, sizeof(sect->Name)) << endl;
+    yaml::writeName(Out, sect->Name, sizeof(sect->Name)) << '\n';
 
     Out << "    Characteristics: [";
     yaml::writeBitMask(Out, SectionCharacteristicsPairs1, sect->Characteristics);
-    Out << nameLookup(SectionCharacteristicsPairsAlignment, 
-        sect->Characteristics & 0x00F00000, "# Unrecognized_IMAGE_SCN_ALIGN") 
+    Out << nameLookup(SectionCharacteristicsPairsAlignment,
+        sect->Characteristics & 0x00F00000, "# Unrecognized_IMAGE_SCN_ALIGN")
         << ", ";
     yaml::writeBitMask(Out, SectionCharacteristicsPairs2, sect->Characteristics);
     Out << "] # ";
-    yaml::writeHexNumber(Out, sect->Characteristics) << endl;
+    yaml::writeHexNumber(Out, sect->Characteristics) << '\n';
 
-    llvm::ArrayRef<uint8_t> sectionData;
-    Obj.getSectionContents(sect, sectionData);    
+    ArrayRef<uint8_t> sectionData;
+    Obj.getSectionContents(sect, sectionData);
     Out << "    SectionData: ";
-    yaml::writeHexStream(Out, sectionData) << endl;
+    yaml::writeHexStream(Out, sectionData) << '\n';
     if (iter->begin_relocations() != iter->end_relocations())
       Out << "    Relocations:\n";
-    for (llvm::object::relocation_iterator rIter = iter->begin_relocations();
+    for (object::relocation_iterator rIter = iter->begin_relocations();
                        rIter != iter->end_relocations(); rIter.increment(ec)) {
-      const llvm::object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter);
+      const object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter);
 
-        Out << "      - !Relocation" << endl;
+        Out << "      - !Relocation\n";
         Out << "        VirtualAddress: " ;
-        yaml::writeHexNumber(Out, reloc->VirtualAddress) << endl;
-        Out << "        SymbolTableIndex: " << reloc->SymbolTableIndex << endl;
-        Out << "        Type: " 
-            << nameLookup(RelocationTypeX86Pairs, reloc->Type) << endl;
+        yaml::writeHexNumber(Out, reloc->VirtualAddress) << '\n';
+        Out << "        SymbolTableIndex: " << reloc->SymbolTableIndex << '\n';
+        Out << "        Type: "
+            << nameLookup(RelocationTypeX86Pairs, reloc->Type) << '\n';
     // TODO: Use the correct reloc type for the machine.
-        Out << endl;
+        Out << '\n';
       }
 
-  } 
+  }
   return Out;
 }
 
-static llvm::raw_ostream& yamlCOFFSymbols(llvm::object::COFFObjectFile &Obj, 
-                              std::size_t NumSymbols, llvm::raw_ostream &Out) {
-  llvm::error_code ec;
-  Out << "symbols:" << endl;
-  for (llvm::object::symbol_iterator iter = Obj.begin_symbols(); 
-                             iter != Obj.end_symbols(); iter.increment(ec)) {
+static raw_ostream& yamlCOFFSymbols(object::COFFObjectFile &Obj,
+                                    std::size_t NumSymbols,
+                                    raw_ostream &Out) {
+  error_code ec;
+  Out << "symbols:\n";
+  for (object::symbol_iterator iter = Obj.begin_symbols();
+       iter != Obj.end_symbols(); iter.increment(ec)) {
  // Gather all the info that we need
-    llvm::StringRef str;
-    const llvm::object::coff_symbol *symbol = Obj.getCOFFSymbol(iter);
+    StringRef str;
+    const object::coff_symbol *symbol = Obj.getCOFFSymbol(iter);
     Obj.getSymbolName(symbol, str);
     std::size_t  simpleType  = symbol->getBaseType();
     std::size_t complexType  = symbol->getComplexType();
     std::size_t storageClass = symbol->StorageClass;
-    
-    Out << "  - !Symbol" << endl;
-    Out << "    Name: " << str << endl; 
-
-    Out << "    Value: "         << symbol->Value << endl;
-    Out << "    SectionNumber: " << symbol->SectionNumber << endl;
-
-    Out << "    SimpleType: " 
-        << nameLookup(SymbolBaseTypePairs, simpleType, 
-            "# Unknown_SymbolBaseType") 
-        << " # (" << simpleType << ")" << endl;
-    
-    Out << "    ComplexType: " 
-        << nameLookup(SymbolComplexTypePairs, complexType, 
-                "# Unknown_SymbolComplexType") 
-        << " # (" << complexType << ")" << endl;
-    
-    Out << "    StorageClass: " 
+
+    Out << "  - !Symbol\n";
+    Out << "    Name: " << str << '\n';
+
+    Out << "    Value: "         << symbol->Value << '\n';
+    Out << "    SectionNumber: " << symbol->SectionNumber << '\n';
+
+    Out << "    SimpleType: "
+        << nameLookup(SymbolBaseTypePairs, simpleType,
+            "# Unknown_SymbolBaseType")
+        << " # (" << simpleType << ")\n";
+
+    Out << "    ComplexType: "
+        << nameLookup(SymbolComplexTypePairs, complexType,
+                "# Unknown_SymbolComplexType")
+        << " # (" << complexType << ")\n";
+
+    Out << "    StorageClass: "
         << nameLookup(SymbolStorageClassPairs, storageClass,
-              "# Unknown_StorageClass") 
-        << " # (" << (int) storageClass << ")" << endl;
+              "# Unknown_StorageClass")
+        << " # (" << (int) storageClass << ")\n";
 
     if (symbol->NumberOfAuxSymbols > 0) {
-      llvm::ArrayRef<uint8_t> aux = Obj.getSymbolAuxData(symbol);
-      Out << "    NumberOfAuxSymbols: " 
-          << (int) symbol->NumberOfAuxSymbols << endl;
+      ArrayRef<uint8_t> aux = Obj.getSymbolAuxData(symbol);
+      Out << "    NumberOfAuxSymbols: "
+          << (int) symbol->NumberOfAuxSymbols << '\n';
       Out << "    AuxillaryData: ";
       yaml::writeHexStream(Out, aux);
     }
-      
-    Out << endl;
+
+    Out << '\n';
   }
 
   return Out;
 }
 
 
-llvm::error_code coff2yaml(llvm::raw_ostream &Out, llvm::MemoryBuffer *TheObj) {
-  llvm::error_code ec;
-  llvm::object::COFFObjectFile obj(TheObj, ec);
-  if (!ec) {
-    const llvm::object::coff_file_header *hd;
-    ec = obj.getHeader(hd);
-    if (!ec) {
-      yamlCOFFHeader(hd, Out);
-      yamlCOFFSections(obj, hd->NumberOfSections, Out);
-      yamlCOFFSymbols(obj, hd->NumberOfSymbols, Out);
-    }
-  }
+error_code coff2yaml(raw_ostream &Out, MemoryBuffer *TheObj) {
+  error_code ec;
+  object::COFFObjectFile obj(TheObj, ec);
+  if (ec)
+    return ec;
+
+  const object::coff_file_header *hd;
+  ec = obj.getHeader(hd);
+  if (ec)
+    return ec;
+
+  yamlCOFFHeader(hd, Out);
+  yamlCOFFSections(obj, hd->NumberOfSections, Out);
+  yamlCOFFSymbols(obj, hd->NumberOfSymbols, Out);
+
   return ec;
 }
diff --git a/utils/obj2yaml/obj2yaml.cpp b/tools/obj2yaml/obj2yaml.cpp
similarity index 61%
rename from utils/obj2yaml/obj2yaml.cpp
rename to tools/obj2yaml/obj2yaml.cpp
index bdc461a94715..f089e0aba5ae 100644
--- a/utils/obj2yaml/obj2yaml.cpp
+++ b/tools/obj2yaml/obj2yaml.cpp
@@ -16,20 +16,19 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 
-const char endl = '\n';
+using namespace llvm;
 
 namespace yaml {  // generic yaml-writing specific routines
 
 unsigned char printable(unsigned char Ch) {
   return Ch >= ' ' && Ch <= '~' ? Ch : '.';
 }
-  
-llvm::raw_ostream &writeHexStream(llvm::raw_ostream &Out, 
-                                     const llvm::ArrayRef<uint8_t> arr) {
+
+raw_ostream &writeHexStream(raw_ostream &Out, const ArrayRef<uint8_t> arr) {
   const char *hex = "0123456789ABCDEF";
   Out << " !hex \"";
 
-  typedef llvm::ArrayRef<uint8_t>::const_iterator iter_t;
+  typedef ArrayRef<uint8_t>::const_iterator iter_t;
   const iter_t end = arr.end();
   for (iter_t iter = arr.begin(); iter != end; ++iter)
     Out << hex[(*iter >> 4) & 0x0F] << hex[(*iter & 0x0F)];
@@ -37,49 +36,50 @@ llvm::raw_ostream &writeHexStream(llvm::raw_ostream &Out,
   Out << "\" # |";
   for (iter_t iter = arr.begin(); iter != end; ++iter)
     Out << printable(*iter);
-  Out << "|" << endl;
+  Out << "|\n";
 
   return Out;
-  }
+}
 
-llvm::raw_ostream &writeHexNumber(llvm::raw_ostream &Out, unsigned long long N) {
+raw_ostream &writeHexNumber(raw_ostream &Out, unsigned long long N) {
   if (N >= 10)
     Out << "0x";
   Out.write_hex(N);
   return Out;
 }
 
+} // end namespace yaml
+
+namespace {
+enum ObjectFileType {
+  coff
+};
 }
 
+cl::opt<ObjectFileType> InputFormat(
+    cl::desc("Choose input format"),
+    cl::values(clEnumVal(coff, "process COFF object files"), clEnumValEnd));
 
-using namespace llvm;
-enum ObjectFileType { coff };
+cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"),
+                                   cl::init("-"));
 
-cl::opt<ObjectFileType> InputFormat(
-  cl::desc("Choose input format"),
-    cl::values(
-      clEnumVal(coff, "process COFF object files"),
-    clEnumValEnd));
-    
-cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
-
-int main(int argc, char * argv[]) {
+int main(int argc, char *argv[]) {
   cl::ParseCommandLineOptions(argc, argv);
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
-  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
 
-// Process the input file  
+  // Process the input file
   OwningPtr<MemoryBuffer> buf;
 
-// TODO: If this is an archive, then burst it and dump each entry
-  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, buf))
-    llvm::errs() << "Error: '" << ec.message() << "' opening file '" 
-              << InputFilename << "'" << endl;
-  else {
-    ec = coff2yaml(llvm::outs(), buf.take());
+  // TODO: If this is an archive, then burst it and dump each entry
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, buf)) {
+    errs() << "Error: '" << ec.message() << "' opening file '" << InputFilename
+           << "'\n";
+  } else {
+    ec = coff2yaml(outs(), buf.take());
     if (ec)
-      llvm::errs() << "Error: " << ec.message() << " dumping COFF file" << endl;
+      errs() << "Error: " << ec.message() << " dumping COFF file\n";
   }
 
   return 0;
diff --git a/utils/obj2yaml/obj2yaml.h b/tools/obj2yaml/obj2yaml.h
similarity index 94%
rename from utils/obj2yaml/obj2yaml.h
rename to tools/obj2yaml/obj2yaml.h
index 0bc376a6db82..b4a012607abb 100644
--- a/utils/obj2yaml/obj2yaml.h
+++ b/tools/obj2yaml/obj2yaml.h
@@ -10,8 +10,8 @@
 // source file, implement it.
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_UTILS_OBJ2YAML_H
-#define LLVM_UTILS_OBJ2YAML_H
+#ifndef LLVM_TOOLS_OBJ2YAML_H
+#define LLVM_TOOLS_OBJ2YAML_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/MemoryBuffer.h"
diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt
index cf5e5a83cf96..91959119e491 100644
--- a/tools/opt/CMakeLists.txt
+++ b/tools/opt/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter instrumentation scalaropts objcarcopts ipo vectorize)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter irreader instrumentation scalaropts objcarcopts ipo vectorize)
 
 add_llvm_tool(opt
   AnalysisWrappers.cpp
@@ -6,3 +6,4 @@ add_llvm_tool(opt
   PrintSCC.cpp
   opt.cpp
   )
+set_target_properties(opt PROPERTIES ENABLE_EXPORTS 1)
diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt
index a866d12a263c..77b94469edd5 100644
--- a/tools/opt/LLVMBuild.txt
+++ b/tools/opt/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = opt
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar ObjCARC all-targets
+required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Scalar ObjCARC all-targets
diff --git a/tools/opt/Makefile b/tools/opt/Makefile
index 79ed815dcec8..a451005574a5 100644
--- a/tools/opt/Makefile
+++ b/tools/opt/Makefile
@@ -9,6 +9,6 @@
 
 LEVEL := ../..
 TOOLNAME := opt
-LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts objcarcopts ipo vectorize all-targets
+LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts objcarcopts ipo vectorize all-targets
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 81a2de2d5581..e385d7f57716 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -26,17 +26,18 @@
 #include "llvm/DebugInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/LinkAllIR.h"
 #include "llvm/LinkAllPasses.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
@@ -588,7 +589,7 @@ int main(int argc, char **argv) {
   SMDiagnostic Err;
 
   // Load the input module...
-  std::auto_ptr<Module> M;
+  OwningPtr<Module> M;
   M.reset(ParseIRFile(InputFilename, Err, Context));
 
   if (M.get() == 0) {
@@ -655,7 +656,7 @@ int main(int argc, char **argv) {
   TargetMachine *Machine = 0;
   if (ModuleTriple.getArch())
     Machine = GetTargetMachine(Triple(ModuleTriple));
-  std::auto_ptr<TargetMachine> TM(Machine);
+  OwningPtr<TargetMachine> TM(Machine);
 
   // Add internal analysis passes from the target machine.
   if (TM.get())
diff --git a/utils/yaml2obj/CMakeLists.txt b/tools/yaml2obj/CMakeLists.txt
similarity index 100%
rename from utils/yaml2obj/CMakeLists.txt
rename to tools/yaml2obj/CMakeLists.txt
diff --git a/utils/yaml2obj/Makefile b/tools/yaml2obj/Makefile
similarity index 95%
rename from utils/yaml2obj/Makefile
rename to tools/yaml2obj/Makefile
index e746d8519008..cb6f47724b69 100644
--- a/utils/yaml2obj/Makefile
+++ b/tools/yaml2obj/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TOOLNAME = yaml2obj
-USEDLIBS = LLVMSupport.a
+LINK_COMPONENTS := support
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/yaml2obj/yaml2obj.cpp b/tools/yaml2obj/yaml2obj.cpp
new file mode 100644
index 000000000000..5741d95c5422
--- /dev/null
+++ b/tools/yaml2obj/yaml2obj.cpp
@@ -0,0 +1,736 @@
+//===- yaml2obj - Convert YAML to a binary object file --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program takes a YAML description of an object file and outputs the
+// binary equivalent.
+//
+// This is used for writing tests that require binary files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+  Input(cl::Positional, cl::desc("<input>"), cl::init("-"));
+
+template<class T>
+typename llvm::enable_if_c<std::numeric_limits<T>::is_integer, bool>::type
+getAs(const llvm::yaml::ScalarNode *SN, T &Result) {
+  SmallString<4> Storage;
+  StringRef Value = SN->getValue(Storage);
+  if (Value.getAsInteger(0, Result))
+    return false;
+  return true;
+}
+
+// Given a container with begin and end with ::value_type of a character type.
+// Iterate through pairs of characters in the the set of [a-fA-F0-9] ignoring
+// all other characters.
+struct hex_pair_iterator {
+  StringRef::const_iterator Current, End;
+  typedef SmallVector<char, 2> value_type;
+  value_type Pair;
+  bool IsDone;
+
+  hex_pair_iterator(StringRef C)
+    : Current(C.begin()), End(C.end()), IsDone(false) {
+    // Initalize Pair.
+    ++*this;
+  }
+
+  // End iterator.
+  hex_pair_iterator() : Current(), End(), IsDone(true) {}
+
+  value_type operator *() const {
+    return Pair;
+  }
+
+  hex_pair_iterator operator ++() {
+    // We're at the end of the input.
+    if (Current == End) {
+      IsDone = true;
+      return *this;
+    }
+    Pair = value_type();
+    for (; Current != End && Pair.size() != 2; ++Current) {
+      // Is a valid hex digit.
+      if ((*Current >= '0' && *Current <= '9') ||
+          (*Current >= 'a' && *Current <= 'f') ||
+          (*Current >= 'A' && *Current <= 'F'))
+        Pair.push_back(*Current);
+    }
+    // Hit the end without getting 2 hex digits. Pair is invalid.
+    if (Pair.size() != 2)
+      IsDone = true;
+    return *this;
+  }
+
+  bool operator ==(const hex_pair_iterator Other) {
+    return (IsDone == Other.IsDone) ||
+           (Current == Other.Current && End == Other.End);
+  }
+
+  bool operator !=(const hex_pair_iterator Other) {
+    return !(*this == Other);
+  }
+};
+
+template <class ContainerOut>
+static bool hexStringToByteArray(StringRef Str, ContainerOut &Out) {
+  for (hex_pair_iterator I(Str), E; I != E; ++I) {
+    typename hex_pair_iterator::value_type Pair = *I;
+    typename ContainerOut::value_type Byte;
+    if (StringRef(Pair.data(), 2).getAsInteger(16, Byte))
+      return false;
+    Out.push_back(Byte);
+  }
+  return true;
+}
+
+// The structure of the yaml files is not an exact 1:1 match to COFF. In order
+// to use yaml::IO, we use these structures which are closer to the source.
+namespace COFFYAML {
+  struct Section {
+    COFF::SectionCharacteristics Characteristics;
+    StringRef SectionData;
+    std::vector<COFF::relocation> Relocations;
+    StringRef Name;
+  };
+
+  struct Symbol {
+    COFF::SymbolBaseType SimpleType;
+    uint8_t NumberOfAuxSymbols;
+    StringRef Name;
+    COFF::SymbolStorageClass StorageClass;
+    StringRef AuxillaryData;
+    COFF::SymbolComplexType ComplexType;
+    uint32_t Value;
+    uint16_t SectionNumber;
+  };
+
+  struct Object {
+    COFF::header HeaderData;
+    std::vector<Section> Sections;
+    std::vector<Symbol> Symbols;
+  };
+}
+
+/// This parses a yaml stream that represents a COFF object file.
+/// See docs/yaml2obj for the yaml scheema.
+struct COFFParser {
+  COFFParser(COFFYAML::Object &Obj) : Obj(Obj) {
+    std::memset(&Header, 0, sizeof(Header));
+    // A COFF string table always starts with a 4 byte size field. Offsets into
+    // it include this size, so allocate it now.
+    StringTable.append(4, 0);
+  }
+
+  void parseHeader() {
+    Header.Machine = Obj.HeaderData.Machine;
+    Header.Characteristics = Obj.HeaderData.Characteristics;
+  }
+
+  bool parseSections() {
+    for (std::vector<COFFYAML::Section>::iterator i = Obj.Sections.begin(),
+           e = Obj.Sections.end(); i != e; ++i) {
+      const COFFYAML::Section &YamlSection = *i;
+      Section Sec;
+      std::memset(&Sec.Header, 0, sizeof(Sec.Header));
+
+      // If the name is less than 8 bytes, store it in place, otherwise
+      // store it in the string table.
+      StringRef Name = YamlSection.Name;
+      std::fill_n(Sec.Header.Name, unsigned(COFF::NameSize), 0);
+      if (Name.size() <= COFF::NameSize) {
+        std::copy(Name.begin(), Name.end(), Sec.Header.Name);
+      } else {
+        // Add string to the string table and format the index for output.
+        unsigned Index = getStringIndex(Name);
+        std::string str = utostr(Index);
+        if (str.size() > 7) {
+          errs() << "String table got too large";
+          return false;
+        }
+        Sec.Header.Name[0] = '/';
+        std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
+      }
+
+      Sec.Header.Characteristics = YamlSection.Characteristics;
+
+      StringRef Data = YamlSection.SectionData;
+      if (!hexStringToByteArray(Data, Sec.Data)) {
+        errs() << "SectionData must be a collection of pairs of hex bytes";
+        return false;
+      }
+      Sections.push_back(Sec);
+    }
+    return true;
+  }
+
+  bool parseSymbols() {
+    for (std::vector<COFFYAML::Symbol>::iterator i = Obj.Symbols.begin(),
+           e = Obj.Symbols.end(); i != e; ++i) {
+      COFFYAML::Symbol YamlSymbol = *i;
+      Symbol Sym;
+      std::memset(&Sym.Header, 0, sizeof(Sym.Header));
+
+      // If the name is less than 8 bytes, store it in place, otherwise
+      // store it in the string table.
+      StringRef Name = YamlSymbol.Name;
+      std::fill_n(Sym.Header.Name, unsigned(COFF::NameSize), 0);
+      if (Name.size() <= COFF::NameSize) {
+        std::copy(Name.begin(), Name.end(), Sym.Header.Name);
+      } else {
+        // Add string to the string table and format the index for output.
+        unsigned Index = getStringIndex(Name);
+        *reinterpret_cast<support::aligned_ulittle32_t*>(
+            Sym.Header.Name + 4) = Index;
+      }
+
+      Sym.Header.Value = YamlSymbol.Value;
+      Sym.Header.Type |= YamlSymbol.SimpleType;
+      Sym.Header.Type |= YamlSymbol.ComplexType << COFF::SCT_COMPLEX_TYPE_SHIFT;
+      Sym.Header.StorageClass = YamlSymbol.StorageClass;
+      Sym.Header.SectionNumber = YamlSymbol.SectionNumber;
+
+      StringRef Data = YamlSymbol.AuxillaryData;
+      if (!hexStringToByteArray(Data, Sym.AuxSymbols)) {
+        errs() << "AuxillaryData must be a collection of pairs of hex bytes";
+        return false;
+      }
+      Symbols.push_back(Sym);
+    }
+    return true;
+  }
+
+  bool parse() {
+    parseHeader();
+    if (!parseSections())
+      return false;
+    if (!parseSymbols())
+      return false;
+    return true;
+  }
+
+  unsigned getStringIndex(StringRef Str) {
+    StringMap<unsigned>::iterator i = StringTableMap.find(Str);
+    if (i == StringTableMap.end()) {
+      unsigned Index = StringTable.size();
+      StringTable.append(Str.begin(), Str.end());
+      StringTable.push_back(0);
+      StringTableMap[Str] = Index;
+      return Index;
+    }
+    return i->second;
+  }
+
+  COFFYAML::Object &Obj;
+  COFF::header Header;
+
+  struct Section {
+    COFF::section Header;
+    std::vector<uint8_t> Data;
+    std::vector<COFF::relocation> Relocations;
+  };
+
+  struct Symbol {
+    COFF::symbol Header;
+    std::vector<uint8_t> AuxSymbols;
+  };
+
+  std::vector<Section> Sections;
+  std::vector<Symbol> Symbols;
+  StringMap<unsigned> StringTableMap;
+  std::string StringTable;
+};
+
+// Take a CP and assign addresses and sizes to everything. Returns false if the
+// layout is not valid to do.
+static bool layoutCOFF(COFFParser &CP) {
+  uint32_t SectionTableStart = 0;
+  uint32_t SectionTableSize  = 0;
+
+  // The section table starts immediately after the header, including the
+  // optional header.
+  SectionTableStart = sizeof(COFF::header) + CP.Header.SizeOfOptionalHeader;
+  SectionTableSize = sizeof(COFF::section) * CP.Sections.size();
+
+  uint32_t CurrentSectionDataOffset = SectionTableStart + SectionTableSize;
+
+  // Assign each section data address consecutively.
+  for (std::vector<COFFParser::Section>::iterator i = CP.Sections.begin(),
+                                                  e = CP.Sections.end();
+                                                  i != e; ++i) {
+    if (!i->Data.empty()) {
+      i->Header.SizeOfRawData = i->Data.size();
+      i->Header.PointerToRawData = CurrentSectionDataOffset;
+      CurrentSectionDataOffset += i->Header.SizeOfRawData;
+      // TODO: Handle alignment.
+    } else {
+      i->Header.SizeOfRawData = 0;
+      i->Header.PointerToRawData = 0;
+    }
+  }
+
+  uint32_t SymbolTableStart = CurrentSectionDataOffset;
+
+  // Calculate number of symbols.
+  uint32_t NumberOfSymbols = 0;
+  for (std::vector<COFFParser::Symbol>::iterator i = CP.Symbols.begin(),
+                                                 e = CP.Symbols.end();
+                                                 i != e; ++i) {
+    if (i->AuxSymbols.size() % COFF::SymbolSize != 0) {
+      errs() << "AuxillaryData size not a multiple of symbol size!\n";
+      return false;
+    }
+    i->Header.NumberOfAuxSymbols = i->AuxSymbols.size() / COFF::SymbolSize;
+    NumberOfSymbols += 1 + i->Header.NumberOfAuxSymbols;
+  }
+
+  // Store all the allocated start addresses in the header.
+  CP.Header.NumberOfSections = CP.Sections.size();
+  CP.Header.NumberOfSymbols = NumberOfSymbols;
+  CP.Header.PointerToSymbolTable = SymbolTableStart;
+
+  *reinterpret_cast<support::ulittle32_t *>(&CP.StringTable[0])
+    = CP.StringTable.size();
+
+  return true;
+}
+
+template <typename value_type>
+struct binary_le_impl {
+  value_type Value;
+  binary_le_impl(value_type V) : Value(V) {}
+};
+
+template <typename value_type>
+raw_ostream &operator <<( raw_ostream &OS
+                        , const binary_le_impl<value_type> &BLE) {
+  char Buffer[sizeof(BLE.Value)];
+  support::endian::write<value_type, support::little, support::unaligned>(
+    Buffer, BLE.Value);
+  OS.write(Buffer, sizeof(BLE.Value));
+  return OS;
+}
+
+template <typename value_type>
+binary_le_impl<value_type> binary_le(value_type V) {
+  return binary_le_impl<value_type>(V);
+}
+
+void writeCOFF(COFFParser &CP, raw_ostream &OS) {
+  OS << binary_le(CP.Header.Machine)
+     << binary_le(CP.Header.NumberOfSections)
+     << binary_le(CP.Header.TimeDateStamp)
+     << binary_le(CP.Header.PointerToSymbolTable)
+     << binary_le(CP.Header.NumberOfSymbols)
+     << binary_le(CP.Header.SizeOfOptionalHeader)
+     << binary_le(CP.Header.Characteristics);
+
+  // Output section table.
+  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
+                                                        e = CP.Sections.end();
+                                                        i != e; ++i) {
+    OS.write(i->Header.Name, COFF::NameSize);
+    OS << binary_le(i->Header.VirtualSize)
+       << binary_le(i->Header.VirtualAddress)
+       << binary_le(i->Header.SizeOfRawData)
+       << binary_le(i->Header.PointerToRawData)
+       << binary_le(i->Header.PointerToRelocations)
+       << binary_le(i->Header.PointerToLineNumbers)
+       << binary_le(i->Header.NumberOfRelocations)
+       << binary_le(i->Header.NumberOfLineNumbers)
+       << binary_le(i->Header.Characteristics);
+  }
+
+  // Output section data.
+  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
+                                                        e = CP.Sections.end();
+                                                        i != e; ++i) {
+    if (!i->Data.empty())
+      OS.write(reinterpret_cast<const char*>(&i->Data[0]), i->Data.size());
+  }
+
+  // Output symbol table.
+
+  for (std::vector<COFFParser::Symbol>::const_iterator i = CP.Symbols.begin(),
+                                                       e = CP.Symbols.end();
+                                                       i != e; ++i) {
+    OS.write(i->Header.Name, COFF::NameSize);
+    OS << binary_le(i->Header.Value)
+       << binary_le(i->Header.SectionNumber)
+       << binary_le(i->Header.Type)
+       << binary_le(i->Header.StorageClass)
+       << binary_le(i->Header.NumberOfAuxSymbols);
+    if (!i->AuxSymbols.empty())
+      OS.write( reinterpret_cast<const char*>(&i->AuxSymbols[0])
+              , i->AuxSymbols.size());
+  }
+
+  // Output string table.
+  OS.write(&CP.StringTable[0], CP.StringTable.size());
+}
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFF::relocation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Section)
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Symbol)
+
+namespace llvm {
+
+namespace COFF {
+  Characteristics operator|(Characteristics a, Characteristics b) {
+    uint32_t Ret = static_cast<uint32_t>(a) | static_cast<uint32_t>(b);
+    return static_cast<Characteristics>(Ret);
+  }
+
+  SectionCharacteristics
+  operator|(SectionCharacteristics a, SectionCharacteristics b) {
+    uint32_t Ret = static_cast<uint32_t>(a) | static_cast<uint32_t>(b);
+    return static_cast<SectionCharacteristics>(Ret);
+  }
+}
+
+namespace yaml {
+
+#define BCase(X) IO.bitSetCase(Value, #X, COFF::X);
+
+template <>
+struct ScalarBitSetTraits<COFF::SectionCharacteristics> {
+  static void bitset(IO &IO, COFF::SectionCharacteristics &Value) {
+    BCase(IMAGE_SCN_TYPE_NO_PAD);
+    BCase(IMAGE_SCN_CNT_CODE);
+    BCase(IMAGE_SCN_CNT_INITIALIZED_DATA);
+    BCase(IMAGE_SCN_CNT_UNINITIALIZED_DATA);
+    BCase(IMAGE_SCN_LNK_OTHER);
+    BCase(IMAGE_SCN_LNK_INFO);
+    BCase(IMAGE_SCN_LNK_REMOVE);
+    BCase(IMAGE_SCN_LNK_COMDAT);
+    BCase(IMAGE_SCN_GPREL);
+    BCase(IMAGE_SCN_MEM_PURGEABLE);
+    BCase(IMAGE_SCN_MEM_16BIT);
+    BCase(IMAGE_SCN_MEM_LOCKED);
+    BCase(IMAGE_SCN_MEM_PRELOAD);
+    BCase(IMAGE_SCN_ALIGN_1BYTES);
+    BCase(IMAGE_SCN_ALIGN_2BYTES);
+    BCase(IMAGE_SCN_ALIGN_4BYTES);
+    BCase(IMAGE_SCN_ALIGN_8BYTES);
+    BCase(IMAGE_SCN_ALIGN_16BYTES);
+    BCase(IMAGE_SCN_ALIGN_32BYTES);
+    BCase(IMAGE_SCN_ALIGN_64BYTES);
+    BCase(IMAGE_SCN_ALIGN_128BYTES);
+    BCase(IMAGE_SCN_ALIGN_256BYTES);
+    BCase(IMAGE_SCN_ALIGN_512BYTES);
+    BCase(IMAGE_SCN_ALIGN_1024BYTES);
+    BCase(IMAGE_SCN_ALIGN_2048BYTES);
+    BCase(IMAGE_SCN_ALIGN_4096BYTES);
+    BCase(IMAGE_SCN_ALIGN_8192BYTES);
+    BCase(IMAGE_SCN_LNK_NRELOC_OVFL);
+    BCase(IMAGE_SCN_MEM_DISCARDABLE);
+    BCase(IMAGE_SCN_MEM_NOT_CACHED);
+    BCase(IMAGE_SCN_MEM_NOT_PAGED);
+    BCase(IMAGE_SCN_MEM_SHARED);
+    BCase(IMAGE_SCN_MEM_EXECUTE);
+    BCase(IMAGE_SCN_MEM_READ);
+    BCase(IMAGE_SCN_MEM_WRITE);
+  }
+};
+
+template <>
+struct ScalarBitSetTraits<COFF::Characteristics> {
+  static void bitset(IO &IO, COFF::Characteristics &Value) {
+    BCase(IMAGE_FILE_RELOCS_STRIPPED);
+    BCase(IMAGE_FILE_EXECUTABLE_IMAGE);
+    BCase(IMAGE_FILE_LINE_NUMS_STRIPPED);
+    BCase(IMAGE_FILE_LOCAL_SYMS_STRIPPED);
+    BCase(IMAGE_FILE_AGGRESSIVE_WS_TRIM);
+    BCase(IMAGE_FILE_LARGE_ADDRESS_AWARE);
+    BCase(IMAGE_FILE_BYTES_REVERSED_LO);
+    BCase(IMAGE_FILE_32BIT_MACHINE);
+    BCase(IMAGE_FILE_DEBUG_STRIPPED);
+    BCase(IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP);
+    BCase(IMAGE_FILE_NET_RUN_FROM_SWAP);
+    BCase(IMAGE_FILE_SYSTEM);
+    BCase(IMAGE_FILE_DLL);
+    BCase(IMAGE_FILE_UP_SYSTEM_ONLY);
+    BCase(IMAGE_FILE_BYTES_REVERSED_HI);
+  }
+};
+#undef BCase
+
+#define ECase(X) IO.enumCase(Value, #X, COFF::X);
+
+template <>
+struct ScalarEnumerationTraits<COFF::SymbolComplexType> {
+  static void enumeration(IO &IO, COFF::SymbolComplexType &Value) {
+    ECase(IMAGE_SYM_DTYPE_NULL);
+    ECase(IMAGE_SYM_DTYPE_POINTER);
+    ECase(IMAGE_SYM_DTYPE_FUNCTION);
+    ECase(IMAGE_SYM_DTYPE_ARRAY);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::SymbolStorageClass> {
+  static void enumeration(IO &IO, COFF::SymbolStorageClass &Value) {
+    ECase(IMAGE_SYM_CLASS_END_OF_FUNCTION);
+    ECase(IMAGE_SYM_CLASS_NULL);
+    ECase(IMAGE_SYM_CLASS_AUTOMATIC);
+    ECase(IMAGE_SYM_CLASS_EXTERNAL);
+    ECase(IMAGE_SYM_CLASS_STATIC);
+    ECase(IMAGE_SYM_CLASS_REGISTER);
+    ECase(IMAGE_SYM_CLASS_EXTERNAL_DEF);
+    ECase(IMAGE_SYM_CLASS_LABEL);
+    ECase(IMAGE_SYM_CLASS_UNDEFINED_LABEL);
+    ECase(IMAGE_SYM_CLASS_MEMBER_OF_STRUCT);
+    ECase(IMAGE_SYM_CLASS_ARGUMENT);
+    ECase(IMAGE_SYM_CLASS_STRUCT_TAG);
+    ECase(IMAGE_SYM_CLASS_MEMBER_OF_UNION);
+    ECase(IMAGE_SYM_CLASS_UNION_TAG);
+    ECase(IMAGE_SYM_CLASS_TYPE_DEFINITION);
+    ECase(IMAGE_SYM_CLASS_UNDEFINED_STATIC);
+    ECase(IMAGE_SYM_CLASS_ENUM_TAG);
+    ECase(IMAGE_SYM_CLASS_MEMBER_OF_ENUM);
+    ECase(IMAGE_SYM_CLASS_REGISTER_PARAM);
+    ECase(IMAGE_SYM_CLASS_BIT_FIELD);
+    ECase(IMAGE_SYM_CLASS_BLOCK);
+    ECase(IMAGE_SYM_CLASS_FUNCTION);
+    ECase(IMAGE_SYM_CLASS_END_OF_STRUCT);
+    ECase(IMAGE_SYM_CLASS_FILE);
+    ECase(IMAGE_SYM_CLASS_SECTION);
+    ECase(IMAGE_SYM_CLASS_WEAK_EXTERNAL);
+    ECase(IMAGE_SYM_CLASS_CLR_TOKEN);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::SymbolBaseType> {
+  static void enumeration(IO &IO, COFF::SymbolBaseType &Value) {
+    ECase(IMAGE_SYM_TYPE_NULL);
+    ECase(IMAGE_SYM_TYPE_VOID);
+    ECase(IMAGE_SYM_TYPE_CHAR);
+    ECase(IMAGE_SYM_TYPE_SHORT);
+    ECase(IMAGE_SYM_TYPE_INT);
+    ECase(IMAGE_SYM_TYPE_LONG);
+    ECase(IMAGE_SYM_TYPE_FLOAT);
+    ECase(IMAGE_SYM_TYPE_DOUBLE);
+    ECase(IMAGE_SYM_TYPE_STRUCT);
+    ECase(IMAGE_SYM_TYPE_UNION);
+    ECase(IMAGE_SYM_TYPE_ENUM);
+    ECase(IMAGE_SYM_TYPE_MOE);
+    ECase(IMAGE_SYM_TYPE_BYTE);
+    ECase(IMAGE_SYM_TYPE_WORD);
+    ECase(IMAGE_SYM_TYPE_UINT);
+    ECase(IMAGE_SYM_TYPE_DWORD);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::MachineTypes> {
+  static void enumeration(IO &IO, COFF::MachineTypes &Value) {
+    ECase(IMAGE_FILE_MACHINE_UNKNOWN);
+    ECase(IMAGE_FILE_MACHINE_AM33);
+    ECase(IMAGE_FILE_MACHINE_AMD64);
+    ECase(IMAGE_FILE_MACHINE_ARM);
+    ECase(IMAGE_FILE_MACHINE_ARMV7);
+    ECase(IMAGE_FILE_MACHINE_EBC);
+    ECase(IMAGE_FILE_MACHINE_I386);
+    ECase(IMAGE_FILE_MACHINE_IA64);
+    ECase(IMAGE_FILE_MACHINE_M32R);
+    ECase(IMAGE_FILE_MACHINE_MIPS16);
+    ECase(IMAGE_FILE_MACHINE_MIPSFPU);
+    ECase(IMAGE_FILE_MACHINE_MIPSFPU16);
+    ECase(IMAGE_FILE_MACHINE_POWERPC);
+    ECase(IMAGE_FILE_MACHINE_POWERPCFP);
+    ECase(IMAGE_FILE_MACHINE_R4000);
+    ECase(IMAGE_FILE_MACHINE_SH3);
+    ECase(IMAGE_FILE_MACHINE_SH3DSP);
+    ECase(IMAGE_FILE_MACHINE_SH4);
+    ECase(IMAGE_FILE_MACHINE_SH5);
+    ECase(IMAGE_FILE_MACHINE_THUMB);
+    ECase(IMAGE_FILE_MACHINE_WCEMIPSV2);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::RelocationTypeX86> {
+  static void enumeration(IO &IO, COFF::RelocationTypeX86 &Value) {
+    ECase(IMAGE_REL_I386_ABSOLUTE);
+    ECase(IMAGE_REL_I386_DIR16);
+    ECase(IMAGE_REL_I386_REL16);
+    ECase(IMAGE_REL_I386_DIR32);
+    ECase(IMAGE_REL_I386_DIR32NB);
+    ECase(IMAGE_REL_I386_SEG12);
+    ECase(IMAGE_REL_I386_SECTION);
+    ECase(IMAGE_REL_I386_SECREL);
+    ECase(IMAGE_REL_I386_TOKEN);
+    ECase(IMAGE_REL_I386_SECREL7);
+    ECase(IMAGE_REL_I386_REL32);
+    ECase(IMAGE_REL_AMD64_ABSOLUTE);
+    ECase(IMAGE_REL_AMD64_ADDR64);
+    ECase(IMAGE_REL_AMD64_ADDR32);
+    ECase(IMAGE_REL_AMD64_ADDR32NB);
+    ECase(IMAGE_REL_AMD64_REL32);
+    ECase(IMAGE_REL_AMD64_REL32_1);
+    ECase(IMAGE_REL_AMD64_REL32_2);
+    ECase(IMAGE_REL_AMD64_REL32_3);
+    ECase(IMAGE_REL_AMD64_REL32_4);
+    ECase(IMAGE_REL_AMD64_REL32_5);
+    ECase(IMAGE_REL_AMD64_SECTION);
+    ECase(IMAGE_REL_AMD64_SECREL);
+    ECase(IMAGE_REL_AMD64_SECREL7);
+    ECase(IMAGE_REL_AMD64_TOKEN);
+    ECase(IMAGE_REL_AMD64_SREL32);
+    ECase(IMAGE_REL_AMD64_PAIR);
+    ECase(IMAGE_REL_AMD64_SSPAN32);
+  }
+};
+
+#undef ECase
+
+template <>
+struct MappingTraits<COFFYAML::Symbol> {
+  static void mapping(IO &IO, COFFYAML::Symbol &S) {
+    IO.mapRequired("SimpleType", S.SimpleType);
+    IO.mapOptional("NumberOfAuxSymbols", S.NumberOfAuxSymbols);
+    IO.mapRequired("Name", S.Name);
+    IO.mapRequired("StorageClass", S.StorageClass);
+    IO.mapOptional("AuxillaryData", S.AuxillaryData); // FIXME: typo
+    IO.mapRequired("ComplexType", S.ComplexType);
+    IO.mapRequired("Value", S.Value);
+    IO.mapRequired("SectionNumber", S.SectionNumber);
+  }
+};
+
+template <>
+struct MappingTraits<COFF::header> {
+  struct NMachine {
+    NMachine(IO&) : Machine(COFF::MachineTypes(0)) {
+    }
+    NMachine(IO&, uint16_t M) : Machine(COFF::MachineTypes(M)) {
+    }
+    uint16_t denormalize(IO &) {
+      return Machine;
+    }
+    COFF::MachineTypes Machine;
+  };
+
+  struct NCharacteristics {
+    NCharacteristics(IO&) : Characteristics(COFF::Characteristics(0)) {
+    }
+    NCharacteristics(IO&, uint16_t C) :
+      Characteristics(COFF::Characteristics(C)) {
+    }
+    uint16_t denormalize(IO &) {
+      return Characteristics;
+    }
+
+    COFF::Characteristics Characteristics;
+  };
+
+  static void mapping(IO &IO, COFF::header &H) {
+    MappingNormalization<NMachine, uint16_t> NM(IO, H.Machine);
+    MappingNormalization<NCharacteristics, uint16_t> NC(IO, H.Characteristics);
+
+    IO.mapRequired("Machine", NM->Machine);
+    IO.mapOptional("Characteristics", NC->Characteristics);
+  }
+};
+
+template <>
+struct MappingTraits<COFF::relocation> {
+  struct NType {
+    NType(IO &) : Type(COFF::RelocationTypeX86(0)) {
+    }
+    NType(IO &, uint16_t T) : Type(COFF::RelocationTypeX86(T)) {
+    }
+    uint16_t denormalize(IO &) {
+      return Type;
+    }
+    COFF::RelocationTypeX86 Type;
+  };
+
+  static void mapping(IO &IO, COFF::relocation &Rel) {
+    MappingNormalization<NType, uint16_t> NT(IO, Rel.Type);
+
+    IO.mapRequired("Type", NT->Type);
+    IO.mapRequired("VirtualAddress", Rel.VirtualAddress);
+    IO.mapRequired("SymbolTableIndex", Rel.SymbolTableIndex);
+  }
+};
+
+template <>
+struct MappingTraits<COFFYAML::Section> {
+  static void mapping(IO &IO, COFFYAML::Section &Sec) {
+    IO.mapOptional("Relocations", Sec.Relocations);
+    IO.mapRequired("SectionData", Sec.SectionData);
+    IO.mapRequired("Characteristics", Sec.Characteristics);
+    IO.mapRequired("Name", Sec.Name);
+  }
+};
+
+template <>
+struct MappingTraits<COFFYAML::Object> {
+  static void mapping(IO &IO, COFFYAML::Object &Obj) {
+    IO.mapRequired("sections", Obj.Sections);
+    IO.mapRequired("header", Obj.HeaderData);
+    IO.mapRequired("symbols", Obj.Symbols);
+  }
+};
+} // end namespace yaml
+} // end namespace llvm
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv);
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  OwningPtr<MemoryBuffer> Buf;
+  if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
+    return 1;
+
+  yaml::Input YIn(Buf->getBuffer());
+  COFFYAML::Object Doc;
+  YIn >> Doc;
+  if (YIn.error()) {
+    errs() << "yaml2obj: Failed to parse YAML file!\n";
+    return 1;
+  }
+
+  COFFParser CP(Doc);
+  if (!CP.parse()) {
+    errs() << "yaml2obj: Failed to parse YAML file!\n";
+    return 1;
+  }
+
+  if (!layoutCOFF(CP)) {
+    errs() << "yaml2obj: Failed to layout COFF file!\n";
+    return 1;
+  }
+  writeCOFF(CP, outs());
+}
diff --git a/unittests/ADT/SmallPtrSetTest.cpp b/unittests/ADT/SmallPtrSetTest.cpp
index 9114875e0035..f85d7c941ebd 100644
--- a/unittests/ADT/SmallPtrSetTest.cpp
+++ b/unittests/ADT/SmallPtrSetTest.cpp
@@ -17,6 +17,61 @@
 using namespace llvm;
 
 // SmallPtrSet swapping test.
+TEST(SmallPtrSetTest, GrowthTest) {
+  int i;
+  int buf[8];
+  for(i=0; i<8; ++i) buf[i]=0;
+
+
+  SmallPtrSet<int *, 4> s;
+  typedef SmallPtrSet<int *, 4>::iterator iter;
+  
+  s.insert(&buf[0]);
+  s.insert(&buf[1]);
+  s.insert(&buf[2]);
+  s.insert(&buf[3]);
+  EXPECT_EQ(4U, s.size());
+
+  i = 0;
+  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
+      (**I)++;
+  EXPECT_EQ(4, i);
+  for(i=0; i<8; ++i)
+      EXPECT_EQ(i<4?1:0,buf[i]);
+
+  s.insert(&buf[4]);
+  s.insert(&buf[5]);
+  s.insert(&buf[6]);
+  s.insert(&buf[7]);
+
+  i = 0;
+  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
+      (**I)++;
+  EXPECT_EQ(8, i);
+  s.erase(&buf[4]);
+  s.erase(&buf[5]);
+  s.erase(&buf[6]);
+  s.erase(&buf[7]);
+  EXPECT_EQ(4U, s.size());
+
+  i = 0;
+  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
+      (**I)++;
+  EXPECT_EQ(4, i);
+  for(i=0; i<8; ++i)
+      EXPECT_EQ(i<4?3:1,buf[i]);
+
+  s.clear();
+  for(i=0; i<8; ++i) buf[i]=0;
+  for(i=0; i<128; ++i) s.insert(&buf[i%8]); // test repeated entires
+  EXPECT_EQ(8U, s.size());
+  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
+      (**I)++;
+  for(i=0; i<8; ++i)
+      EXPECT_EQ(1,buf[i]);
+}
+
+
 TEST(SmallPtrSetTest, SwapTest) {
   int buf[10];
 
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index a3f8bf34e731..4b7e418cd180 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -13,3 +13,4 @@ add_subdirectory(Option)
 add_subdirectory(Support)
 add_subdirectory(Transforms)
 add_subdirectory(IR)
+add_subdirectory(DebugInfo)
diff --git a/unittests/DebugInfo/CMakeLists.txt b/unittests/DebugInfo/CMakeLists.txt
new file mode 100644
index 000000000000..ec580b7f69a4
--- /dev/null
+++ b/unittests/DebugInfo/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(LLVM_LINK_COMPONENTS
+  debuginfo
+  object
+  support
+  )
+
+set(DebugInfoSources
+  DWARFFormValueTest.cpp
+  )
+
+add_llvm_unittest(DebugInfoTests
+  ${DebugInfoSources}
+  )
diff --git a/unittests/DebugInfo/DWARFFormValueTest.cpp b/unittests/DebugInfo/DWARFFormValueTest.cpp
new file mode 100644
index 000000000000..04b859bdb9db
--- /dev/null
+++ b/unittests/DebugInfo/DWARFFormValueTest.cpp
@@ -0,0 +1,31 @@
+//===- llvm/unittest/DebugInfo/DWARFFormValueTest.cpp ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARFFormValue.h"
+#include "llvm/Support/Dwarf.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+using namespace dwarf;
+
+namespace {
+
+TEST(DWARFFormValue, FixedFormSizes) {
+  // Size of DW_FORM_addr and DW_FORM_ref_addr are equal in DWARF2,
+  // DW_FORM_ref_addr is always 4 bytes in DWARF32 starting from DWARF3.
+  const uint8_t *sizes = DWARFFormValue::getFixedFormSizes(4, 2);
+  EXPECT_EQ(sizes[DW_FORM_addr], sizes[DW_FORM_ref_addr]);
+  sizes = DWARFFormValue::getFixedFormSizes(8, 2);
+  EXPECT_EQ(sizes[DW_FORM_addr], sizes[DW_FORM_ref_addr]);
+  sizes = DWARFFormValue::getFixedFormSizes(8, 3);
+  EXPECT_EQ(4, sizes[DW_FORM_ref_addr]);
+  // Check that we don't have fixed form sizes for weird address sizes.
+  EXPECT_EQ(0, DWARFFormValue::getFixedFormSizes(16, 2));
+}
+
+} // end anonymous namespace
diff --git a/unittests/DebugInfo/Makefile b/unittests/DebugInfo/Makefile
new file mode 100644
index 000000000000..999ded90b0f6
--- /dev/null
+++ b/unittests/DebugInfo/Makefile
@@ -0,0 +1,16 @@
+##===- unittests/DebugInfo/Makefile ------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = DebugInfo
+LINK_COMPONENTS := debuginfo object support
+
+include $(LEVEL)/Makefile.config
+
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/ExecutionEngine/CMakeLists.txt b/unittests/ExecutionEngine/CMakeLists.txt
index ed7f10a23c8a..4eefc1e3bb1b 100644
--- a/unittests/ExecutionEngine/CMakeLists.txt
+++ b/unittests/ExecutionEngine/CMakeLists.txt
@@ -6,5 +6,9 @@ add_llvm_unittest(ExecutionEngineTests
   ExecutionEngineTest.cpp
   )
 
-add_subdirectory(JIT)
-add_subdirectory(MCJIT)
+# Include JIT/MCJIT tests only if native arch is a JIT target.
+list(FIND LLVM_TARGETS_WITH_JIT "${LLVM_NATIVE_ARCH}" have_jit)
+if (NOT have_jit EQUAL -1 )
+  add_subdirectory(JIT)
+  add_subdirectory(MCJIT)
+endif()
diff --git a/unittests/ExecutionEngine/JIT/CMakeLists.txt b/unittests/ExecutionEngine/JIT/CMakeLists.txt
index 3d33e4cb8351..ef37026dfc9f 100644
--- a/unittests/ExecutionEngine/JIT/CMakeLists.txt
+++ b/unittests/ExecutionEngine/JIT/CMakeLists.txt
@@ -55,3 +55,4 @@ add_llvm_unittest(JITTests
 if(MINGW OR CYGWIN)
   set_property(TARGET JITTests PROPERTY LINK_FLAGS -Wl,--export-all-symbols)
 endif()
+set_target_properties(JITTests PROPERTIES ENABLE_EXPORTS 1)
diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile
index ca1195631a22..38e667ff9888 100644
--- a/unittests/ExecutionEngine/Makefile
+++ b/unittests/ExecutionEngine/Makefile
@@ -10,7 +10,11 @@
 LEVEL = ../..
 TESTNAME = ExecutionEngine
 LINK_COMPONENTS :=interpreter
-PARALLEL_DIRS = JIT MCJIT
 
 include $(LEVEL)/Makefile.config
+
+ifeq ($(TARGET_HAS_JIT),1)
+  PARALLEL_DIRS = JIT MCJIT
+endif
+
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt
index aed45979c069..b6098c79282c 100644
--- a/unittests/IR/CMakeLists.txt
+++ b/unittests/IR/CMakeLists.txt
@@ -16,6 +16,7 @@ set(IRSources
   TypeBuilderTest.cpp
   TypesTest.cpp
   ValueMapTest.cpp
+  ValueTest.cpp
   VerifierTest.cpp
   WaymarkTest.cpp
   )
diff --git a/unittests/IR/ValueTest.cpp b/unittests/IR/ValueTest.cpp
new file mode 100644
index 000000000000..52efb1a220aa
--- /dev/null
+++ b/unittests/IR/ValueTest.cpp
@@ -0,0 +1,46 @@
+//===- llvm/unittest/IR/ValueTest.cpp - Value unit tests ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Parser.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+
+namespace {
+
+TEST(ValueTest, UsedInBasicBlock) {
+  LLVMContext C;
+
+  const char *ModuleString = "define void @f(i32 %x, i32 %y) {\n"
+                             "bb0:\n"
+                             "  %y1 = add i32 %y, 1\n"
+                             "  %y2 = add i32 %y, 1\n"
+                             "  %y3 = add i32 %y, 1\n"
+                             "  %y4 = add i32 %y, 1\n"
+                             "  %y5 = add i32 %y, 1\n"
+                             "  %y6 = add i32 %y, 1\n"
+                             "  %y7 = add i32 %y, 1\n"
+                             "  %y8 = add i32 %x, 1\n"
+                             "  ret void\n"
+                             "}\n";
+  SMDiagnostic Err;
+  Module *M = ParseAssemblyString(ModuleString, NULL, Err, C);
+
+  Function *F = M->getFunction("f");
+
+  EXPECT_FALSE(F->isUsedInBasicBlock(F->begin()));
+  EXPECT_TRUE((++F->arg_begin())->isUsedInBasicBlock(F->begin()));
+  EXPECT_TRUE(F->arg_begin()->isUsedInBasicBlock(F->begin()));
+}
+
+} // end anonymous namespace
diff --git a/unittests/Makefile b/unittests/Makefile
index 926459ac08f8..61d60611be9c 100644
--- a/unittests/Makefile
+++ b/unittests/Makefile
@@ -9,7 +9,8 @@
 
 LEVEL = ..
 
-PARALLEL_DIRS = ADT ExecutionEngine Support Transforms IR Analysis Bitcode
+PARALLEL_DIRS = ADT ExecutionEngine Support Transforms IR Analysis Bitcode \
+								DebugInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/unittests/Support/ManagedStatic.cpp b/unittests/Support/ManagedStatic.cpp
index a4137619f4d7..8ddad38ecf17 100644
--- a/unittests/Support/ManagedStatic.cpp
+++ b/unittests/Support/ManagedStatic.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
 
 namespace {
 
-#ifdef HAVE_PTHREAD_H
+#if defined(HAVE_PTHREAD_H) && !__has_feature(memory_sanitizer)
 namespace test1 {
   llvm::ManagedStatic<int> ms;
   void *helper(void*) {
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index 45112597979c..eec8c62d8c84 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -298,12 +298,19 @@ TEST_F(FileSystemTest, DirectoryIteration) {
   ASSERT_LT(z0, za1);
 }
 
+const char elf[] = {0x7f, 'E', 'L', 'F', 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
 TEST_F(FileSystemTest, Magic) {
   struct type {
     const char *filename;
     const char *magic_str;
-    size_t      magic_str_len;
-  } types [] = {{"magic.archive", "!<arch>\x0A", 8}};
+    size_t magic_str_len;
+    fs::file_magic magic;
+  } types [] = {
+    {"magic.archive", "!<arch>\x0A", 8, fs::file_magic::archive},
+    {"magic.elf", elf, sizeof(elf),
+     fs::file_magic::elf_relocatable}
+  };
 
   // Create some files filled with magic.
   for (type *i = types, *e = types + (sizeof(types) / sizeof(type)); i != e;
@@ -320,6 +327,7 @@ TEST_F(FileSystemTest, Magic) {
     bool res = false;
     ASSERT_NO_ERROR(fs::has_magic(file_pathname.c_str(), magic, res));
     EXPECT_TRUE(res);
+    EXPECT_EQ(i->magic, fs::identify_magic(magic));
   }
 }
 
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index cd304e720082..e19ae5bc2fb4 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -7,12 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Instructions.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -143,4 +146,31 @@ TEST_F(CloneInstruction, Exact) {
   EXPECT_TRUE(this->clone(SDiv)->isExact());
 }
 
+TEST_F(CloneInstruction, Attributes) {
+  Type *ArgTy1[] = { Type::getInt32PtrTy(context) };
+  FunctionType *FT1 =  FunctionType::get(Type::getVoidTy(context), ArgTy1, false);
+
+  Function *F1 = Function::Create(FT1, Function::ExternalLinkage);
+  BasicBlock *BB = BasicBlock::Create(context, "", F1);
+  IRBuilder<> Builder(BB);
+  Builder.CreateRetVoid();
+
+  Function *F2 = Function::Create(FT1, Function::ExternalLinkage);
+
+  Attribute::AttrKind AK[] = { Attribute::NoCapture };
+  AttributeSet AS = AttributeSet::get(context, 0, AK);
+  Argument *A = F1->arg_begin();
+  A->addAttr(AS);
+
+  SmallVector<ReturnInst*, 4> Returns;
+  ValueToValueMapTy VMap;
+  VMap[A] = UndefValue::get(A->getType());
+
+  CloneFunctionInto(F2, F1, VMap, false, Returns);
+  EXPECT_FALSE(F2->arg_begin()->hasNoCaptureAttr());
+
+  delete F1;
+  delete F2;
+}
+
 }
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 563cab0d5dc4..b0ef67ac88ba 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -639,11 +639,11 @@ static bool ReadCheckFile(SourceMgr &SM,
            << ec.message() << '\n';
     return true;
   }
-  MemoryBuffer *F = File.take();
 
   // If we want to canonicalize whitespace, strip excess whitespace from the
   // buffer containing the CHECK lines. Remove DOS style line endings.
-  F = CanonicalizeInputFile(F, NoCanonicalizeWhiteSpace);
+  MemoryBuffer *F =
+    CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);
 
   SM.AddNewSourceBuffer(F, SMLoc());
 
@@ -803,16 +803,16 @@ int main(int argc, char **argv) {
            << ec.message() << '\n';
     return 2;
   }
-  MemoryBuffer *F = File.take();
 
-  if (F->getBufferSize() == 0) {
+  if (File->getBufferSize() == 0) {
     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
     return 2;
   }
-  
+
   // Remove duplicate spaces in the input file if requested.
   // Remove DOS style line endings.
-  F = CanonicalizeInputFile(F, NoCanonicalizeWhiteSpace);
+  MemoryBuffer *F =
+    CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);
 
   SM.AddNewSourceBuffer(F, SMLoc());
 
diff --git a/utils/Makefile b/utils/Makefile
index 7a3c17d0325a..ecb30bed7c63 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ..
 PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
-	      count fpcmp llvm-lit not unittest yaml2obj
+	      count fpcmp llvm-lit not unittest
 
 EXTRA_DIST := check-each-file codegen-diff countloc.sh \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 6faf819529d0..65d78593f06a 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -2296,29 +2296,25 @@ static std::string GetAliasRequiredFeatures(Record *R,
   return Result;
 }
 
-/// emitMnemonicAliases - If the target has any MnemonicAlias<> definitions,
-/// emit a function for them and return true, otherwise return false.
-static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
-  // Ignore aliases when match-prefix is set.
-  if (!MatchPrefix.empty())
-    return false;
-
-  std::vector<Record*> Aliases =
-    Info.getRecords().getAllDerivedDefinitions("MnemonicAlias");
-  if (Aliases.empty()) return false;
-
-  OS << "static void applyMnemonicAliases(StringRef &Mnemonic, "
-        "unsigned Features) {\n";
-
+static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info,
+                                     std::vector<Record*> &Aliases,
+                                     unsigned Indent = 0,
+                                  StringRef AsmParserVariantName = StringRef()){
   // Keep track of all the aliases from a mnemonic.  Use an std::map so that the
   // iteration order of the map is stable.
   std::map<std::string, std::vector<Record*> > AliasesFromMnemonic;
 
   for (unsigned i = 0, e = Aliases.size(); i != e; ++i) {
     Record *R = Aliases[i];
+    // FIXME: Allow AssemblerVariantName to be a comma separated list.
+    std::string AsmVariantName = R->getValueAsString("AsmVariantName");
+    if (AsmVariantName != AsmParserVariantName)
+      continue;
     AliasesFromMnemonic[R->getValueAsString("FromMnemonic")].push_back(R);
   }
-
+  if (AliasesFromMnemonic.empty())
+    return;
+    
   // Process each alias a "from" mnemonic at a time, building the code executed
   // by the string remapper.
   std::vector<StringMatcher::StringPair> Cases;
@@ -2370,8 +2366,39 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
 
     Cases.push_back(std::make_pair(I->first, MatchCode));
   }
+  StringMatcher("Mnemonic", Cases, OS).Emit(Indent);
+}
+
+/// emitMnemonicAliases - If the target has any MnemonicAlias<> definitions,
+/// emit a function for them and return true, otherwise return false.
+static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
+                                CodeGenTarget &Target) {
+  // Ignore aliases when match-prefix is set.
+  if (!MatchPrefix.empty())
+    return false;
+
+  std::vector<Record*> Aliases =
+    Info.getRecords().getAllDerivedDefinitions("MnemonicAlias");
+  if (Aliases.empty()) return false;
+
+  OS << "static void applyMnemonicAliases(StringRef &Mnemonic, "
+    "unsigned Features, unsigned VariantID) {\n";
+  OS << "  switch (VariantID) {\n";
+  unsigned VariantCount = Target.getAsmParserVariantCount();
+  for (unsigned VC = 0; VC != VariantCount; ++VC) {
+    Record *AsmVariant = Target.getAsmParserVariant(VC);
+    int AsmParserVariantNo = AsmVariant->getValueAsInt("Variant");
+    std::string AsmParserVariantName = AsmVariant->getValueAsString("Name");
+    OS << "    case " << AsmParserVariantNo << ":\n";
+    emitMnemonicAliasVariant(OS, Info, Aliases, /*Indent=*/2,
+                             AsmParserVariantName);
+    OS << "    break;\n";
+  }
+  OS << "  }\n";
+
+  // Emit aliases that apply to all variants.
+  emitMnemonicAliasVariant(OS, Info, Aliases);
 
-  StringMatcher("Mnemonic", Cases, OS).Emit();
   OS << "}\n\n";
 
   return true;
@@ -2674,7 +2701,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_MATCHER_IMPLEMENTATION\n\n";
 
   // Generate the function that remaps for mnemonic aliases.
-  bool HasMnemonicAliases = emitMnemonicAliases(OS, Info);
+  bool HasMnemonicAliases = emitMnemonicAliases(OS, Info, Target);
 
   // Generate the convertToMCInst function to convert operands into an MCInst.
   // Also, generate the convertToMapAndConstraints function for MS-style inline
@@ -2832,9 +2859,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   if (HasMnemonicAliases) {
     OS << "  // Process all MnemonicAliases to remap the mnemonic.\n";
-    OS << "  // FIXME : Add an entry in AsmParserVariant to check this.\n";
-    OS << "  if (!VariantID)\n";
-    OS << "    applyMnemonicAliases(Mnemonic, AvailableFeatures);\n\n";
+    OS << "  applyMnemonicAliases(Mnemonic, AvailableFeatures, VariantID);\n\n";
   }
 
   // Emit code to compute the class list for this operand vector.
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index d0f44be69a59..3ee197443131 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -33,4 +33,5 @@ add_tablegen(llvm-tblgen LLVM
   X86DisassemblerTables.cpp
   X86ModRMFilters.cpp
   X86RecognizableInstr.cpp
+  CTagsEmitter.cpp
   )
diff --git a/utils/TableGen/CTagsEmitter.cpp b/utils/TableGen/CTagsEmitter.cpp
new file mode 100644
index 000000000000..8bf777839bdd
--- /dev/null
+++ b/utils/TableGen/CTagsEmitter.cpp
@@ -0,0 +1,99 @@
+//===- CTagsEmitter.cpp - Generate ctags-compatible index ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits an index of definitions in ctags(1) format.
+// A helper script, utils/TableGen/tdtags, provides an easier-to-use
+// interface; run 'tdtags -H' for documentation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ctags-emitter"
+
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+using namespace llvm;
+
+namespace llvm { extern SourceMgr SrcMgr; }
+
+namespace {
+
+class Tag {
+private:
+  const std::string *Id;
+  SMLoc Loc;
+public:
+  Tag(const std::string &Name, const SMLoc Location)
+      : Id(&Name), Loc(Location) {}
+  int operator<(const Tag &B) const { return *Id < *B.Id; }
+  void emit(raw_ostream &OS) const {
+    int BufferID = SrcMgr.FindBufferContainingLoc(Loc);
+    MemoryBuffer *CurMB = SrcMgr.getBufferInfo(BufferID).Buffer;
+    const char *BufferName = CurMB->getBufferIdentifier();
+    std::pair<unsigned, unsigned> LineAndColumn = SrcMgr.getLineAndColumn(Loc);
+    OS << *Id << "\t" << BufferName << "\t" << LineAndColumn.first << "\n";
+  }
+};
+
+class CTagsEmitter {
+private:
+  RecordKeeper &Records;
+public:
+  CTagsEmitter(RecordKeeper &R) : Records(R) {}
+
+  void run(raw_ostream &OS);
+
+private:
+  static SMLoc locate(const Record *R);
+};
+
+} // End anonymous namespace.
+
+SMLoc CTagsEmitter::locate(const Record *R) {
+  ArrayRef<SMLoc> Locs = R->getLoc();
+  if (Locs.empty()) {
+    SMLoc NullLoc;
+    return NullLoc;
+  }
+  return Locs.front();
+}
+
+void CTagsEmitter::run(raw_ostream &OS) {
+  const std::map<std::string, Record *> &Classes = Records.getClasses();
+  const std::map<std::string, Record *> &Defs = Records.getDefs();
+  std::vector<Tag> Tags;
+  // Collect tags.
+  Tags.reserve(Classes.size() + Defs.size());
+  for (std::map<std::string, Record *>::const_iterator I = Classes.begin(),
+                                                       E = Classes.end();
+       I != E; ++I)
+    Tags.push_back(Tag(I->first, locate(I->second)));
+  for (std::map<std::string, Record *>::const_iterator I = Defs.begin(),
+                                                       E = Defs.end();
+       I != E; ++I)
+    Tags.push_back(Tag(I->first, locate(I->second)));
+  // Emit tags.
+  std::sort(Tags.begin(), Tags.end());
+  OS << "!_TAG_FILE_FORMAT\t1\t/original ctags format/\n";
+  OS << "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/\n";
+  for (std::vector<Tag>::const_iterator I = Tags.begin(), E = Tags.end();
+       I != E; ++I)
+    I->emit(OS);
+}
+
+namespace llvm {
+
+void EmitCTags(RecordKeeper &RK, raw_ostream &OS) { CTagsEmitter(RK).run(OS); }
+
+} // End llvm namespace.
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 009070671802..8e5bb7760f65 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -1321,8 +1321,18 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 /// type which should be applied to it.  This will infer the type of register
 /// references from the register file information, for example.
 ///
+/// When Unnamed is set, return the type of a DAG operand with no name, such as
+/// the F8RC register class argument in:
+///
+///   (COPY_TO_REGCLASS GPR:$src, F8RC)
+///
+/// When Unnamed is false, return the type of a named DAG operand such as the
+/// GPR:$src operand above.
+///
 static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
-                                     bool NotRegisters, TreePattern &TP) {
+                                     bool NotRegisters,
+                                     bool Unnamed,
+                                     TreePattern &TP) {
   // Check to see if this is a register operand.
   if (R->isSubClassOf("RegisterOperand")) {
     assert(ResNo == 0 && "Regoperand ref only has one result!");
@@ -1336,6 +1346,13 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
   // Check to see if this is a register or a register class.
   if (R->isSubClassOf("RegisterClass")) {
     assert(ResNo == 0 && "Regclass ref only has one result!");
+    // An unnamed register class represents itself as an i32 immediate, for
+    // example on a COPY_TO_REGCLASS instruction.
+    if (Unnamed)
+      return EEVT::TypeSet(MVT::i32, TP);
+
+    // In a named operand, the register class provides the possible set of
+    // types.
     if (NotRegisters)
       return EEVT::TypeSet(); // Unknown.
     const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
@@ -1361,9 +1378,27 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
     return EEVT::TypeSet();
   }
 
-  if (R->isSubClassOf("ValueType") || R->isSubClassOf("CondCode")) {
+  if (R->isSubClassOf("ValueType")) {
+    assert(ResNo == 0 && "This node only has one result!");
+    // An unnamed VTSDNode represents itself as an MVT::Other immediate.
+    //
+    //   (sext_inreg GPR:$src, i16)
+    //                         ~~~
+    if (Unnamed)
+      return EEVT::TypeSet(MVT::Other, TP);
+    // With a name, the ValueType simply provides the type of the named
+    // variable.
+    //
+    //   (sext_inreg i32:$src, i16)
+    //               ~~~~~~~~
+    if (NotRegisters)
+      return EEVT::TypeSet(); // Unknown.
+    return EEVT::TypeSet(getValueType(R), TP);
+  }
+
+  if (R->isSubClassOf("CondCode")) {
     assert(ResNo == 0 && "This node only has one result!");
-    // Using a VTSDNode or CondCodeSDNode.
+    // Using a CondCodeSDNode.
     return EEVT::TypeSet(MVT::Other, TP);
   }
 
@@ -1469,7 +1504,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       bool MadeChange = false;
       for (unsigned i = 0, e = Types.size(); i != e; ++i)
         MadeChange |= UpdateNodeType(i, getImplicitType(DI->getDef(), i,
-                                                        NotRegisters, TP), TP);
+                                                        NotRegisters,
+                                                        !hasName(), TP), TP);
       return MadeChange;
     }
 
@@ -1532,25 +1568,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     return MadeChange;
   }
 
-  if (getOperator()->getName() == "COPY_TO_REGCLASS") {
-    bool MadeChange = false;
-    MadeChange |= getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
-    MadeChange |= getChild(1)->ApplyTypeConstraints(TP, NotRegisters);
-
-    assert(getChild(0)->getNumTypes() == 1 &&
-           getChild(1)->getNumTypes() == 1 && "Unhandled case");
-
-    // child #1 of COPY_TO_REGCLASS should be a register class.  We don't care
-    // what type it gets, so if it didn't get a concrete type just give it the
-    // first viable type from the reg class.
-    if (!getChild(1)->hasTypeSet(0) &&
-        !getChild(1)->getExtType(0).isCompletelyUnknown()) {
-      MVT::SimpleValueType RCVT = getChild(1)->getExtType(0).getTypeList()[0];
-      MadeChange |= getChild(1)->UpdateNodeType(0, RCVT, TP);
-    }
-    return MadeChange;
-  }
-
   if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP)) {
     bool MadeChange = false;
 
@@ -1654,8 +1671,42 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
 
       TreePatternNode *Child = getChild(ChildNo++);
       unsigned ChildResNo = 0;  // Instructions always use res #0 of their op.
+
+      // If the operand has sub-operands, they may be provided by distinct
+      // child patterns, so attempt to match each sub-operand separately.
+      if (OperandNode->isSubClassOf("Operand")) {
+        DagInit *MIOpInfo = OperandNode->getValueAsDag("MIOperandInfo");
+        if (unsigned NumArgs = MIOpInfo->getNumArgs()) {
+          // But don't do that if the whole operand is being provided by
+          // a single ComplexPattern.
+          const ComplexPattern *AM = Child->getComplexPatternInfo(CDP);
+          if (!AM || AM->getNumOperands() < NumArgs) {
+            // Match first sub-operand against the child we already have.
+            Record *SubRec = cast<DefInit>(MIOpInfo->getArg(0))->getDef();
+            MadeChange |=
+              Child->UpdateNodeTypeFromInst(ChildResNo, SubRec, TP);
+
+            // And the remaining sub-operands against subsequent children.
+            for (unsigned Arg = 1; Arg < NumArgs; ++Arg) {
+              if (ChildNo >= getNumChildren()) {
+                TP.error("Instruction '" + getOperator()->getName() +
+                         "' expects more operands than were provided.");
+                return false;
+              }
+              Child = getChild(ChildNo++);
+
+              SubRec = cast<DefInit>(MIOpInfo->getArg(Arg))->getDef();
+              MadeChange |=
+                Child->UpdateNodeTypeFromInst(ChildResNo, SubRec, TP);
+            }
+            continue;
+          }
+        }
+      }
+
+      // If we didn't match by pieces above, attempt to match the whole
+      // operand now.
       MadeChange |= Child->UpdateNodeTypeFromInst(ChildResNo, OperandNode, TP);
-      MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
     }
 
     if (ChildNo != getNumChildren()) {
@@ -1664,6 +1715,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       return false;
     }
 
+    for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+      MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
     return MadeChange;
   }
 
@@ -1813,6 +1866,16 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     return Res;
   }
 
+  // ?:$name or just $name.
+  if (TheInit == UnsetInit::get()) {
+    if (OpName.empty())
+      error("'?' argument requires a name to match with operand list");
+    TreePatternNode *Res = new TreePatternNode(TheInit, 1);
+    Args.push_back(OpName);
+    Res->setName(OpName);
+    return Res;
+  }
+
   if (IntInit *II = dyn_cast<IntInit>(TheInit)) {
     if (!OpName.empty())
       error("Constant int argument should not have a name!");
@@ -2379,6 +2442,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       I->error("set destination should be a register!");
 
     if (Val->getDef()->isSubClassOf("RegisterClass") ||
+        Val->getDef()->isSubClassOf("ValueType") ||
         Val->getDef()->isSubClassOf("RegisterOperand") ||
         Val->getDef()->isSubClassOf("PointerLikeRegClass")) {
       if (Dest->getName().empty())
@@ -2595,6 +2659,25 @@ getInstructionsInTree(TreePatternNode *Tree, SmallVectorImpl<Record*> &Instrs) {
     getInstructionsInTree(Tree->getChild(i), Instrs);
 }
 
+/// Check the class of a pattern leaf node against the instruction operand it
+/// represents.
+static bool checkOperandClass(CGIOperandList::OperandInfo &OI,
+                              Record *Leaf) {
+  if (OI.Rec == Leaf)
+    return true;
+
+  // Allow direct value types to be used in instruction set patterns.
+  // The type will be checked later.
+  if (Leaf->isSubClassOf("ValueType"))
+    return true;
+
+  // Patterns can also be ComplexPattern instances.
+  if (Leaf->isSubClassOf("ComplexPattern"))
+    return true;
+
+  return false;
+}
+
 /// ParseInstructions - Parse all of the instructions, inlining and resolving
 /// any fragments involved.  This populates the Instructions list with fully
 /// resolved instructions.
@@ -2704,7 +2787,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
         I->error("Operand $" + OpName + " should be a set destination: all "
                  "outputs must occur before inputs in operand list!");
 
-      if (CGI.Operands[i].Rec != R)
+      if (!checkOperandClass(CGI.Operands[i], R))
         I->error("Operand $" + OpName + " class mismatch!");
 
       // Remember the return type.
@@ -2743,7 +2826,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
       if (InVal->isLeaf() && isa<DefInit>(InVal->getLeafValue())) {
         Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef();
-        if (Op.Rec != InRec && !InRec->isSubClassOf("ComplexPattern"))
+        if (!checkOperandClass(Op, InRec))
           I->error("Operand $" + OpName + "'s register class disagrees"
                    " between the operand and pattern");
       }
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 6190798814dc..7c2fa3674108 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -334,6 +334,7 @@ class TreePatternNode {
   }
   ~TreePatternNode();
 
+  bool hasName() const { return !Name.empty(); }
   const std::string &getName() const { return Name; }
   void setName(StringRef N) { Name.assign(N.begin(), N.end()); }
 
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index e3a6fe1bf133..c02f0843d651 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -779,14 +779,16 @@ void CodeGenSchedModels::collectProcItins() {
     for (unsigned i = 0, N = ItinRecords.size(); i < N; i++) {
       Record *ItinData = ItinRecords[i];
       Record *ItinDef = ItinData->getValueAsDef("TheClass");
-      SchedClassIter SCI = schedClassBegin(), SCE = schedClassEnd();
-      for( ; SCI != SCE; ++SCI) {
+      bool FoundClass = false;
+      for (SchedClassIter SCI = schedClassBegin(), SCE = schedClassEnd();
+           SCI != SCE; ++SCI) {
+        // Multiple SchedClasses may share an itinerary. Update all of them.
         if (SCI->ItinClassDef == ItinDef) {
           ProcModel.ItinDefList[SCI->Index] = ItinData;
-          break;
+          FoundClass = true;
         }
       }
-      if (SCI == SCE) {
+      if (!FoundClass) {
         DEBUG(dbgs() << ProcModel.ItinsDef->getName()
               << " missing class for itinerary " << ItinDef->getName() << '\n');
       }
@@ -1030,11 +1032,13 @@ static bool hasVariant(ArrayRef<PredTransition> Transitions,
 
 // Populate IntersectingVariants with any variants or aliased sequences of the
 // given SchedRW whose processor indices and predicates are not mutually
-// exclusive with the given transition,
+// exclusive with the given transition.
 void PredTransitions::getIntersectingVariants(
   const CodeGenSchedRW &SchedRW, unsigned TransIdx,
   std::vector<TransVariant> &IntersectingVariants) {
 
+  bool GenericRW = false;
+
   std::vector<TransVariant> Variants;
   if (SchedRW.HasVariants) {
     unsigned VarProcIdx = 0;
@@ -1046,6 +1050,8 @@ void PredTransitions::getIntersectingVariants(
     const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants");
     for (RecIter RI = VarDefs.begin(), RE = VarDefs.end(); RI != RE; ++RI)
       Variants.push_back(TransVariant(*RI, SchedRW.Index, VarProcIdx, 0));
+    if (VarProcIdx == 0)
+      GenericRW = true;
   }
   for (RecIter AI = SchedRW.Aliases.begin(), AE = SchedRW.Aliases.end();
        AI != AE; ++AI) {
@@ -1069,6 +1075,8 @@ void PredTransitions::getIntersectingVariants(
       Variants.push_back(
         TransVariant(AliasRW.TheDef, SchedRW.Index, AliasProcIdx, 0));
     }
+    if (AliasProcIdx == 0)
+      GenericRW = true;
   }
   for (unsigned VIdx = 0, VEnd = Variants.size(); VIdx != VEnd; ++VIdx) {
     TransVariant &Variant = Variants[VIdx];
@@ -1106,6 +1114,10 @@ void PredTransitions::getIntersectingVariants(
       TransVec.push_back(TransVec[TransIdx]);
     }
   }
+  if (GenericRW && IntersectingVariants.empty()) {
+    PrintFatalError(SchedRW.TheDef->getLoc(), "No variant of this type has "
+                    "a matching predicate on any processor");
+  }
 }
 
 // Push the Reads/Writes selected by this variant onto the PredTransition
@@ -1203,10 +1215,6 @@ void PredTransitions::substituteVariantOperand(
       // This will push a copies of TransVec[TransIdx] on the back of TransVec.
       std::vector<TransVariant> IntersectingVariants;
       getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
-      if (IntersectingVariants.empty())
-        PrintFatalError(SchedRW.TheDef->getLoc(),
-                      "No variant of this type has "
-                      "a matching predicate on any processor");
       // Now expand each variant on top of its copy of the transition.
       for (std::vector<TransVariant>::const_iterator
              IVI = IntersectingVariants.begin(),
@@ -1306,7 +1314,7 @@ void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
                                      const IdxVec &OperReads,
                                      unsigned FromClassIdx,
                                      const IdxVec &ProcIndices) {
-  DEBUG(dbgs() << "INFER RW: ");
+  DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices); dbgs() << ") ");
 
   // Create a seed transition with an empty PredTerm and the expanded sequences
   // of SchedWrites for the current SchedClass.
@@ -1648,6 +1656,13 @@ void CodeGenSchedClass::dump(const CodeGenSchedModels* SchedModels) const {
     }
   }
   dbgs() << "\n  ProcIdx: "; dumpIdxVec(ProcIndices); dbgs() << '\n';
+  if (!Transitions.empty()) {
+    dbgs() << "\n Transitions for Proc ";
+    for (std::vector<CodeGenSchedTransition>::const_iterator
+           TI = Transitions.begin(), TE = Transitions.end(); TI != TE; ++TI) {
+      dumpIdxVec(TI->ProcIndices);
+    }
+  }
 }
 
 void PredTransitions::dump() const {
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 38ffa30ec851..ed41631456b0 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -211,6 +211,12 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
     return AddMatcher(new CheckIntegerMatcher(II->getValue()));
   }
 
+  // An UnsetInit represents a named node without any constraints.
+  if (N->getLeafValue() == UnsetInit::get()) {
+    assert(N->hasName() && "Unnamed ? leaf");
+    return;
+  }
+
   DefInit *DI = dyn_cast<DefInit>(N->getLeafValue());
   if (DI == 0) {
     errs() << "Unknown leaf kind: " << *N << "\n";
@@ -218,6 +224,17 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
   }
 
   Record *LeafRec = DI->getDef();
+
+  // A ValueType leaf node can represent a register when named, or itself when
+  // unnamed.
+  if (LeafRec->isSubClassOf("ValueType")) {
+    // A named ValueType leaf always matches: (add i32:$a, i32:$b).
+    if (N->hasName())
+      return;
+    // An unnamed ValueType as in (sext_inreg GPR:$foo, i8).
+    return AddMatcher(new CheckValueTypeMatcher(LeafRec->getName()));
+  }
+
   if (// Handle register references.  Nothing to do here, they always match.
       LeafRec->isSubClassOf("RegisterClass") ||
       LeafRec->isSubClassOf("RegisterOperand") ||
@@ -236,9 +253,6 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
     return;
   }
 
-  if (LeafRec->isSubClassOf("ValueType"))
-    return AddMatcher(new CheckValueTypeMatcher(LeafRec->getName()));
-
   if (LeafRec->isSubClassOf("CondCode"))
     return AddMatcher(new CheckCondCodeMatcher(LeafRec->getName()));
 
@@ -734,20 +748,33 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
       continue;
     }
 
-    const TreePatternNode *Child = N->getChild(ChildNo);
-
     // Otherwise this is a normal operand or a predicate operand without
     // 'execute always'; emit it.
-    unsigned BeforeAddingNumOps = InstOps.size();
-    EmitResultOperand(Child, InstOps);
-    assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands");
 
-    // If the operand is an instruction and it produced multiple results, just
-    // take the first one.
-    if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction"))
-      InstOps.resize(BeforeAddingNumOps+1);
+    // For operands with multiple sub-operands we may need to emit
+    // multiple child patterns to cover them all.  However, ComplexPattern
+    // children may themselves emit multiple MI operands.
+    unsigned NumSubOps = 1;
+    if (OperandNode->isSubClassOf("Operand")) {
+      DagInit *MIOpInfo = OperandNode->getValueAsDag("MIOperandInfo");
+      if (unsigned NumArgs = MIOpInfo->getNumArgs())
+        NumSubOps = NumArgs;
+    }
 
-    ++ChildNo;
+    unsigned FinalNumOps = InstOps.size() + NumSubOps;
+    while (InstOps.size() < FinalNumOps) {
+      const TreePatternNode *Child = N->getChild(ChildNo);
+      unsigned BeforeAddingNumOps = InstOps.size();
+      EmitResultOperand(Child, InstOps);
+      assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands");
+
+      // If the operand is an instruction and it produced multiple results, just
+      // take the first one.
+      if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction"))
+        InstOps.resize(BeforeAddingNumOps+1);
+
+      ++ChildNo;
+    }
   }
 
   // If this node has input glue or explicitly specified input physregs, we
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 5fb811eacace..98892e11441b 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -850,7 +850,22 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     SCDesc.ReadAdvanceIdx = 0;
 
     // A Variant SchedClass has no resources of its own.
-    if (!SCI->Transitions.empty()) {
+    bool HasVariants = false;
+    for (std::vector<CodeGenSchedTransition>::const_iterator
+           TI = SCI->Transitions.begin(), TE = SCI->Transitions.end();
+         TI != TE; ++TI) {
+      if (TI->ProcIndices[0] == 0) {
+        HasVariants = true;
+        break;
+      }
+      IdxIter PIPos = std::find(TI->ProcIndices.begin(),
+                                TI->ProcIndices.end(), ProcModel.Index);
+      if (PIPos != TI->ProcIndices.end()) {
+        HasVariants = true;
+        break;
+      }
+    }
+    if (HasVariants) {
       SCDesc.NumMicroOps = MCSchedClassDesc::VariantNumMicroOps;
       continue;
     }
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 510f2548cd35..b5c3ca760db6 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -40,7 +40,8 @@ enum ActionType {
   GenTgtIntrinsic,
   PrintEnums,
   PrintSets,
-  GenOptParserDefs
+  GenOptParserDefs,
+  GenCTags
 };
 
 namespace {
@@ -82,6 +83,8 @@ namespace {
                                "Print expanded sets for testing DAG exprs"),
                     clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
                                "Generate option definitions"),
+                    clEnumValN(GenCTags, "gen-ctags",
+                               "Generate ctags-compatible index"),
                     clEnumValEnd));
 
   cl::opt<std::string>
@@ -161,6 +164,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
     }
     break;
   }
+  case GenCTags:
+    EmitCTags(Records, OS);
+    break;
   }
 
   return false;
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index cc9909265844..28b626e17e89 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -75,5 +75,6 @@ void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
 void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
 void EmitOptParser(RecordKeeper &RK, raw_ostream &OS);
+void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 8471faaaf62e..46f2052b010b 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -29,22 +29,25 @@ using namespace llvm;
   MAP(C4, 36)           \
   MAP(C8, 37)           \
   MAP(C9, 38)           \
-  MAP(E8, 39)           \
-  MAP(F0, 40)           \
-  MAP(F8, 41)           \
-  MAP(F9, 42)           \
-  MAP(D0, 45)           \
-  MAP(D1, 46)           \
-  MAP(D4, 47)           \
-  MAP(D5, 48)           \
-  MAP(D8, 49)           \
-  MAP(D9, 50)           \
-  MAP(DA, 51)           \
-  MAP(DB, 52)           \
-  MAP(DC, 53)           \
-  MAP(DD, 54)           \
-  MAP(DE, 55)           \
-  MAP(DF, 56)
+  MAP(CA, 39)           \
+  MAP(CB, 40)           \
+  MAP(E8, 41)           \
+  MAP(F0, 42)           \
+  MAP(F8, 45)           \
+  MAP(F9, 46)           \
+  MAP(D0, 47)           \
+  MAP(D1, 48)           \
+  MAP(D4, 49)           \
+  MAP(D5, 50)           \
+  MAP(D6, 51)           \
+  MAP(D8, 52)           \
+  MAP(D9, 53)           \
+  MAP(DA, 54)           \
+  MAP(DB, 55)           \
+  MAP(DC, 56)           \
+  MAP(DD, 57)           \
+  MAP(DE, 58)           \
+  MAP(DF, 59)
 
 // A clone of X86 since we can't depend on something that is generated.
 namespace X86Local {
diff --git a/utils/TableGen/tdtags b/utils/TableGen/tdtags
new file mode 100644
index 000000000000..5214485f4ec1
--- /dev/null
+++ b/utils/TableGen/tdtags
@@ -0,0 +1,453 @@
+#!/bin/sh
+#===-- tdtags - TableGen tags wrapper ---------------------------*- sh -*-===#
+# vim:set sts=2 sw=2 et:
+#===----------------------------------------------------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===#
+#
+# This is a wrapper script to simplify generating ctags(1)-compatible index
+# files for target .td files. Run tdtags -H for more documentation.
+#
+# For portability, this script is intended to conform to IEEE Std 1003.1-2008.
+#
+#===----------------------------------------------------------------------===#
+
+SELF=${0##*/}
+
+usage() {
+cat <<END
+Usage: $SELF [ <options> ] tdfile
+   or: $SELF [ <options> ] -x recipe [arg ...]
+OPTIONS
+  -H          Display further help.
+  -a          Append the tags to an existing tags file.
+  -f <file>   Write tags to the specified file (defaults to 'tags').
+  -I <dir>    Add the directory to the search path for tblgen include files.
+  -x <recipe> Generate tags file(s) for a common use case:
+  -q          Suppress $TBLGEN error messages.
+  -v          Be verbose; report progress.
+END
+  usage_recipes
+}
+
+usage_recipes() {
+cat <<END
+     all      - Generate an index in each directory that contains .td files
+                in the LLVM source tree.
+     here     - Generate an index for all .td files in the current directory.
+     recurse  - Generate an index in each directory that contains .td files
+                in and under the current directory.
+     target [<target> ...]
+              - Generate a tags file for each specified LLVM code generator
+                target, or if none are specified, all targets.
+END
+}
+
+help() {
+cat <<END
+NAME
+  $SELF - generate ctags(1)-compatible index files for tblgen .td source
+
+SYNOPSIS
+  $SELF [ options ] -x recipe [arg ...]
+  $SELF [ options ] [file ...]
+
+DESCRIPTION
+  With the '-x' option, $SELF produces one or more tags files for a
+  particular common use case. See the RECIPES section below for details.
+
+  Without the '-x' option, $SELF provides a ctags(1)-like interface to
+  $TBLGEN.
+
+OPTIONS
+  -a          Append newly generated tags to those already in an existing
+              tags file. Without ths option, any and all existing tags are
+              replaced. NOTE: When building a mixed tags file, using ${SELF}
+              for tblgen tags and ctags(1) for other languages, it is best
+              to run ${SELF} first without '-a', and ctags(1) second with '-a',
+              because ctags(1) handling is more capable.
+  -f <file>   Use the name <file> for the tags file, rather than the default
+              "tags". If the <file> is "-", then the tag index is written to
+              standard output.
+  -H          Display this document.
+  -I <dir>    Add the directory <dir> to the search path for 'include'
+              statements in tblgen source.
+  -x          Run a canned recipe, rather than operate on specified files.
+              When '-x' is present, the first non-option argument is the
+              name of a recipe, and any further arguments are arguments to
+              that recipe. With no arguments, lists the available recipes.
+  -q          Suppress $TBLGEN error messages. Not all .td files are well-
+              formed outside a specific context, so recipes will sometimes
+              produce error messages for certain .td files. These errors
+              do not affect the indices produced for valid files.
+  -v          Be verbose; report progress.
+
+RECIPES
+  $SELF -x all
+              Produce a tags file in every directory in the LLVM source tree
+              that contains any .td files.
+  $SELF -x here
+              Produce a tags file from .td files in the current directory.
+  $SELF -x recurse
+              Produce a tags file in every directory that contains any .td
+              files, in and under the current directory.
+  $SELF -x target [<target> ...]
+              Produce a tags file for each named code generator target, or
+              if none are named, for all code generator targets.
+END
+}
+
+# Temporary file management.
+#
+# Since SUS sh(1) has no arrays, this script makes extensive use of
+# temporary files. The follow are 'global' and used to carry information
+# across functions:
+#   $TMP:D    Include directories.
+#   $TMP:I    Included files.
+#   $TMP:T    Top-level files, that are not included by another.
+#   $TMP:W    Directories in which to generate tags (Worklist).
+# For portability to OS X, names must not differ only in case.
+#
+TMP=${TMPDIR:-/tmp}/$SELF:$$
+trap "rm -f $TMP*" 0
+trap exit 1 2 13 15
+>$TMP:D
+
+td_dump()
+{
+  if [ $OPT_VERBOSE -gt 1 ]
+  then
+    printf '===== %s =====\n' "$1"
+    cat <"$1"
+  fi
+}
+
+# Escape the arguments, taken as a whole.
+e() {
+  printf '%s' "$*" |
+    sed -e "s/'/'\\\\''/g" -e "1s/^/'/" -e "\$s/\$/'/"
+}
+
+# Determine whether the given directory contains at least one .td file.
+dir_has_td() {
+  for i in $1/*.td
+  do
+    [ -f "$i" ] && return 0
+  done
+  return 1
+}
+
+# Partition the supplied list of files, plus any files included from them,
+# into two groups:
+#   $TMP:T    Top-level files, that are not included by another.
+#   $TMP:I    Included files.
+# Add standard directories to the include paths in $TMP:D if this would
+# benefit the any of the included files.
+td_prep() {
+  >$TMP:E
+  >$TMP:J
+  for i in *.td
+  do
+    [ "x$i" = 'x*.td' ] && return 1
+    if [ -f "$i" ]
+    then
+      printf '%s\n' "$i" >>$TMP:E
+      sed -n -e 's/include[[:space:]]"\(.*\)".*/\1/p' <"$i" >>$TMP:J
+    else
+      printf >&2 '%s: "%s" not found.\n' "$SELF" "$i"
+      exit 7
+    fi
+  done
+  sort -u <$TMP:E >$TMP:X
+  sort -u <$TMP:J >$TMP:I
+  # A file that exists but is not included is toplevel.
+  comm -23 $TMP:X $TMP:I >$TMP:T
+  td_dump $TMP:T
+  td_dump $TMP:I
+  # Check include files.
+  while read i
+  do
+    [ -f "$i" ] && continue
+    while read d
+    do
+      [ -f "$d/$i" ] && break
+    done <$TMP:D
+    if [ -z "$d" ]
+    then
+      # See whether this include file can be found in a common location.
+      for d in $LLVM_SRC_ROOT/include \
+               $LLVM_SRC_ROOT/tools/clang/include
+      do
+        if [ -f "$d/$i" ]
+        then
+          printf '%s\n' "$d" >>$TMP:D
+          break
+        fi
+      done
+    fi
+  done <$TMP:I
+  td_dump $TMP:D
+}
+
+# Generate tags for the list of files in $TMP:T.
+td_tag() {
+  # Collect include directories.
+  inc=
+  while read d
+  do
+    inc="${inc}${inc:+ }$(e "-I=$d")"
+  done <$TMP:D
+
+  if [ $OPT_VERBOSE -ne 0 ]
+  then
+    printf >&2 'In "%s",\n' "$PWD"
+  fi
+
+  # Generate tags for each file.
+  n=0
+  while read i
+  do
+    if [ $OPT_VERBOSE -ne 0 ]
+    then
+      printf >&2 '  generating tags from "%s"\n' "$i"
+    fi
+    n=$((n + 1))
+    t=$(printf '%s:A:%05u' "$TMP" $n)
+    eval $TBLGEN --gen-ctags $inc "$i" >$t 2>$TMP:F
+    [ $OPT_NOTBLGENERR -eq 1 ] || cat $TMP:F
+  done <$TMP:T
+
+  # Add existing tags if requested.
+  if [ $OPT_APPEND -eq 1 -a -f "$OPT_TAGSFILE" ]
+  then
+    if [ $OPT_VERBOSE -ne 0 ]
+    then
+      printf >&2 '  and existing tags from "%s"\n' "$OPT_TAGSFILE"
+    fi
+    n=$((n + 1))
+    t=$(printf '%s:A:%05u' "$TMP" $n)
+    sed -e '/^!_TAG_/d' <"$OPT_TAGSFILE" | sort -u >$t
+  fi
+
+  # Merge tags.
+  if [ $n = 1 ]
+  then
+    mv -f "$t" $TMP:M
+  else
+    sort -m -u $TMP:A:* >$TMP:M
+  fi
+
+  # Emit tags.
+  if [ x${OPT_TAGSFILE}x = x-x ]
+  then
+    cat $TMP:M
+  else
+    if [ $OPT_VERBOSE -ne 0 ]
+    then
+      printf >&2 '  into "%s".\n' "$OPT_TAGSFILE"
+    fi
+    mv -f $TMP:M "$OPT_TAGSFILE"
+  fi
+}
+
+# Generate tags for the current directory.
+td_here() {
+  td_prep
+  [ -s $TMP:T ] || return 1
+  td_tag
+}
+
+# Generate tags for the current directory, and report an error if there are
+# no .td files present.
+do_here()
+{
+  if ! td_here
+  then
+    printf >&2 '%s: Nothing to do here.\n' "$SELF"
+    exit 1
+  fi
+}
+
+# Generate tags for all .td files under the current directory.
+do_recurse()
+{
+  td_find "$PWD"
+  td_dirs
+}
+
+# Generate tags for all .td files in LLVM.
+do_all()
+{
+  td_find "$LLVM_SRC_ROOT"
+  td_dirs
+}
+
+# Generate tags for each directory in the worklist $TMP:W.
+td_dirs()
+{
+  while read d
+  do
+    (cd "$d" && td_here)
+  done <$TMP:W
+}
+
+# Find directories containing .td files within the specified directory,
+# and record them in the worklist $TMP:W.
+td_find()
+{
+  find -L "$1" -type f -name '*.td' |
+    sed -e 's:/[^/]*$::' |
+    sort -u >$TMP:W
+  td_dump $TMP:W
+}
+
+# Generate tags for the specified code generator targets, or
+# if there are no arguments, all targets.
+do_targets() {
+  cd $LLVM_SRC_ROOT/lib/Target
+  if [ -z "$*" ]
+  then
+    td_find "$PWD"
+  else
+    # Check that every specified argument is a target directory;
+    # if not, list all target directories.
+    for d
+    do
+      if [ -d "$d" ] && dir_has_td "$d"
+      then
+        printf '%s/%s\n' "$PWD" "$d"
+      else
+        printf >&2 '%s: "%s" is not a target. Targets are:\n' "$SELF" "$d"
+        for d in *
+        do
+          [ -d "$d" ] || continue
+          dir_has_td "$d" && printf >&2 '  %s\n' "$d"
+        done
+        exit 2
+      fi
+    done >$TMP:W
+  fi
+  td_dirs
+}
+
+# Change to the directory at the top of the enclosing LLVM source tree,
+# if possible.
+llvm_src_root() {
+  while [ "$PWD" != / ]
+  do
+    # Use this directory if multiple notable subdirectories are present.
+    [ -d include/llvm -a -d lib/Target ] && return 0
+    cd ..
+  done
+  return 1
+}
+
+# Ensure sort(1) behaves consistently.
+LC_ALL=C
+export LC_ALL
+
+# Globals.
+TBLGEN=llvm-tblgen
+LLVM_SRC_ROOT=
+
+# Command options.
+OPT_TAGSFILE=tags
+OPT_RECIPES=0
+OPT_APPEND=0
+OPT_VERBOSE=0
+OPT_NOTBLGENERR=0
+
+while getopts 'af:hxqvHI:' opt
+do
+  case $opt in
+  a)
+    OPT_APPEND=1
+    ;;
+  f)
+    OPT_TAGSFILE="$OPTARG"
+    ;;
+  x)
+    OPT_RECIPES=1
+    ;;
+  q)
+    OPT_NOTBLGENERR=1
+    ;;
+  v)
+    OPT_VERBOSE=$((OPT_VERBOSE + 1))
+    ;;
+  I)
+    printf '%s\n' "$OPTARG" >>$TMP:D
+    ;;
+  [hH])
+    help
+    exit 0
+    ;;
+  *)
+    usage >&2
+    exit 4
+    ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+# Handle the case where tdtags is a simple ctags(1)-like wrapper for tblgen.
+if [ $OPT_RECIPES -eq 0 ]
+then
+  if [ -z "$*" ]
+  then
+    help >&2
+    exit 5
+  fi
+  for i
+  do
+    printf '%s\n' "$i"
+  done >$TMP:T
+  td_tag
+  exit $?
+fi
+
+# Find the directory at the top of the enclosing LLVM source tree.
+if ! LLVM_SRC_ROOT=$(llvm_src_root && pwd)
+then
+  printf >&2 '%s: Run from within the LLVM source tree.\n' "$SELF"
+  exit 3
+fi
+
+# Select canned actions.
+RECIPE="$1"
+case "$RECIPE" in
+all)
+  shift
+  do_all
+  ;;
+.|cwd|here)
+  shift
+  do_here
+  ;;
+recurse)
+  shift
+  do_recurse
+  ;;
+target)
+  shift
+  do_targets "$@"
+  ;;
+*)
+  if [ -n "$RECIPE" ]
+  then
+    shift
+    printf >&2 '%s: Unknown recipe "-x %s". ' "$SELF" "$RECIPE"
+  fi
+  printf >&2 'Recipes:\n'
+  usage_recipes >&2
+  printf >&2 'Run "%s -H" for help.\n' "$SELF"
+  exit 6
+  ;;
+esac
+
+exit $?
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 07fb43f840d7..84176996a8c8 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -277,7 +277,10 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
         script += '.bat'
 
     # Write script file
-    f = open(script,'w')
+    mode = 'w'
+    if litConfig.isWindows and not isWin32CMDEXE:
+      mode += 'b'  # Avoid CRLFs when writing bash scripts.
+    f = open(script, mode)
     if isWin32CMDEXE:
         f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
     else:
diff --git a/utils/lit/lit/discovery.py b/utils/lit/lit/discovery.py
index c869a671ef72..64a9510f955b 100644
--- a/utils/lit/lit/discovery.py
+++ b/utils/lit/lit/discovery.py
@@ -137,7 +137,7 @@ def getTestsInSuite(ts, path_in_suite, litConfig,
     # Search subdirectories.
     for filename in os.listdir(source_path):
         # FIXME: This doesn't belong here?
-        if filename in ('Output', '.svn') or filename in lc.excludes:
+        if filename in ('Output', '.svn', '.git') or filename in lc.excludes:
             continue
 
         # Ignore non-directories.
@@ -147,20 +147,31 @@ def getTestsInSuite(ts, path_in_suite, litConfig,
 
         # Check for nested test suites, first in the execpath in case there is a
         # site configuration and then in the source path.
-        file_execpath = ts.getExecPath(path_in_suite + (filename,))
+        subpath = path_in_suite + (filename,)
+        file_execpath = ts.getExecPath(subpath)
         if dirContainsTestSuite(file_execpath, litConfig):
-            sub_ts, subiter = getTests(file_execpath, litConfig,
-                                       testSuiteCache, localConfigCache)
+            sub_ts, subpath_in_suite = getTestSuite(file_execpath, litConfig,
+                                                    testSuiteCache)
         elif dirContainsTestSuite(file_sourcepath, litConfig):
-            sub_ts, subiter = getTests(file_sourcepath, litConfig,
-                                       testSuiteCache, localConfigCache)
+            sub_ts, subpath_in_suite = getTestSuite(file_sourcepath, litConfig,
+                                                    testSuiteCache)
         else:
-            # Otherwise, continue loading from inside this test suite.
-            subiter = getTestsInSuite(ts, path_in_suite + (filename,),
-                                      litConfig, testSuiteCache,
-                                      localConfigCache)
             sub_ts = None
 
+        # If the this directory recursively maps back to the current test suite,
+        # disregard it (this can happen if the exec root is located inside the
+        # current test suite, for example).
+        if sub_ts is ts:
+            continue
+
+        # Otherwise, load from the nested test suite, if present.
+        if sub_ts is not None:
+            subiter = getTestsInSuite(sub_ts, subpath_in_suite, litConfig,
+                                      testSuiteCache, localConfigCache)
+        else:
+            subiter = getTestsInSuite(ts, subpath, litConfig, testSuiteCache,
+                                      localConfigCache)
+
         N = 0
         for res in subiter:
             N += 1
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index da961eeedc63..de97a8e1aaf6 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -219,7 +219,7 @@ def main(builtinParameters = {}):
     group.add_option("", "--shuffle", dest="shuffle",
                      help="Run tests in random order",
                      action="store_true", default=False)
-    group.add_option("", "--filter", dest="filter", metavar="EXPRESSION",
+    group.add_option("", "--filter", dest="filter", metavar="REGEX",
                      help=("Only run tests with paths matching the given "
                            "regular expression"),
                      action="store", default=None)
diff --git a/utils/lit/tests/Inputs/discovery/lit.cfg b/utils/lit/tests/Inputs/discovery/lit.cfg
index 3513bfffd173..4049ab16f9cc 100644
--- a/utils/lit/tests/Inputs/discovery/lit.cfg
+++ b/utils/lit/tests/Inputs/discovery/lit.cfg
@@ -1,5 +1,10 @@
 config.name = 'top-level-suite'
 config.suffixes = ['.txt']
 config.test_format = lit.formats.ShTest()
-config.test_source_root = None
-config.test_exec_root = None
+
+# We intentionally don't set the source root or exec root directories here,
+# because this suite gets reused for testing the exec root behavior (in
+# ../exec-discovery).
+#
+#config.test_source_root = None
+#config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/exec-discovery-in-tree/lit.cfg b/utils/lit/tests/Inputs/exec-discovery-in-tree/lit.cfg
new file mode 100644
index 000000000000..342b2fdd3c89
--- /dev/null
+++ b/utils/lit/tests/Inputs/exec-discovery-in-tree/lit.cfg
@@ -0,0 +1,7 @@
+# Verify that the site configuration was loaded.
+if config.test_source_root is None or config.test_exec_root is None:
+    lit.fatal("No site specific configuration")
+
+config.name = 'exec-discovery-in-tree-suite'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
diff --git a/utils/lit/tests/Inputs/exec-discovery-in-tree/obj/lit.site.cfg b/utils/lit/tests/Inputs/exec-discovery-in-tree/obj/lit.site.cfg
new file mode 100644
index 000000000000..de9a3d0c6dfd
--- /dev/null
+++ b/utils/lit/tests/Inputs/exec-discovery-in-tree/obj/lit.site.cfg
@@ -0,0 +1,4 @@
+import os
+config.test_exec_root = os.path.dirname(__file__)
+config.test_source_root = os.path.dirname(config.test_exec_root)
+lit.load_config(config, os.path.join(config.test_source_root, "lit.cfg"))
\ No newline at end of file
diff --git a/utils/lit/tests/Inputs/exec-discovery-in-tree/test-one.txt b/utils/lit/tests/Inputs/exec-discovery-in-tree/test-one.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/exec-discovery-in-tree/test-one.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/exec-discovery/lit.site.cfg b/utils/lit/tests/Inputs/exec-discovery/lit.site.cfg
new file mode 100644
index 000000000000..796569a301b8
--- /dev/null
+++ b/utils/lit/tests/Inputs/exec-discovery/lit.site.cfg
@@ -0,0 +1,5 @@
+# Load the discovery suite, but with a separate exec root.
+import os
+config.test_exec_root = os.path.dirname(__file__)
+config.test_source_root = os.path.join(os.path.dirname(config.test_exec_root), "discovery")
+lit.load_config(config, os.path.join(config.test_source_root, "lit.cfg"))
diff --git a/utils/lit/tests/Inputs/progress-bar/lit.cfg b/utils/lit/tests/Inputs/progress-bar/lit.cfg
new file mode 100644
index 000000000000..4878b6560968
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/lit.cfg
@@ -0,0 +1,5 @@
+config.name = 'shtest-shell'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/progress-bar/test-1.txt b/utils/lit/tests/Inputs/progress-bar/test-1.txt
new file mode 100644
index 000000000000..49932c3006e1
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/test-1.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/Inputs/progress-bar/test-2.txt b/utils/lit/tests/Inputs/progress-bar/test-2.txt
new file mode 100644
index 000000000000..49932c3006e1
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/test-2.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/Inputs/progress-bar/test-3.txt b/utils/lit/tests/Inputs/progress-bar/test-3.txt
new file mode 100644
index 000000000000..49932c3006e1
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/test-3.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/Inputs/progress-bar/test-4.txt b/utils/lit/tests/Inputs/progress-bar/test-4.txt
new file mode 100644
index 000000000000..49932c3006e1
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/test-4.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/discovery.py b/utils/lit/tests/discovery.py
index 54b99d39458c..56d9dd07e841 100644
--- a/utils/lit/tests/discovery.py
+++ b/utils/lit/tests/discovery.py
@@ -5,17 +5,17 @@
 # RUN: FileCheck --check-prefix=CHECK-BASIC-OUT < %t.out %s
 # RUN: FileCheck --check-prefix=CHECK-BASIC-ERR < %t.err %s
 #
-# CHECK-BASIC-ERR: loading suite config '{{.*}}/tests/Inputs/discovery/lit.cfg'
-# CHECK-BASIC-ERR: loading local config '{{.*}}/tests/Inputs/discovery/subdir/lit.local.cfg'
-# CHECK-BASIC-ERR: loading suite config '{{.*}}/tests/Inputs/discovery/subsuite/lit.cfg'
+# CHECK-BASIC-ERR: loading suite config '{{.*}}/discovery/lit.cfg'
+# CHECK-BASIC-ERR: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
+# CHECK-BASIC-ERR: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
 #
 # CHECK-BASIC-OUT: -- Test Suites --
 # CHECK-BASIC-OUT:   sub-suite - 2 tests
-# CHECK-BASIC-OUT:     Source Root:
-# CHECK-BASIC-OUT:     Exec Root  :
+# CHECK-BASIC-OUT:     Source Root: {{.*/discovery/subsuite$}}
+# CHECK-BASIC-OUT:     Exec Root  : {{.*/discovery/subsuite$}}
 # CHECK-BASIC-OUT:   top-level-suite - 3 tests
-# CHECK-BASIC-OUT:     Source Root:
-# CHECK-BASIC-OUT:     Exec Root  :
+# CHECK-BASIC-OUT:     Source Root: {{.*/discovery$}}
+# CHECK-BASIC-OUT:     Exec Root  : {{.*/discovery$}}
 #
 # CHECK-BASIC-OUT: -- Testing: 5 tests, 1 threads --
 # CHECK-BASIC-OUT: PASS: sub-suite :: test-one
@@ -23,3 +23,74 @@
 # CHECK-BASIC-OUT: PASS: top-level-suite :: subdir/test-three
 # CHECK-BASIC-OUT: PASS: top-level-suite :: test-one
 # CHECK-BASIC-OUT: PASS: top-level-suite :: test-two
+
+
+# Check discovery when exact test names are given.
+#
+# RUN: %{lit} \
+# RUN:     %{inputs}/discovery/subdir/test-three.py \
+# RUN:     %{inputs}/discovery/subsuite/test-one.txt \
+# RUN:   -j 1 --no-execute --show-suites -v > %t.out
+# RUN: FileCheck --check-prefix=CHECK-EXACT-TEST < %t.out %s
+#
+# CHECK-EXACT-TEST: -- Testing: 2 tests, 1 threads --
+# CHECK-EXACT-TEST: PASS: sub-suite :: test-one
+# CHECK-EXACT-TEST: PASS: top-level-suite :: subdir/test-three
+
+
+# Check discovery when using an exec path.
+#
+# RUN: %{lit} %{inputs}/exec-discovery \
+# RUN:   -j 1 --debug --no-execute --show-suites -v > %t.out 2> %t.err
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-OUT < %t.out %s
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-ERR < %t.err %s
+#
+# CHECK-ASEXEC-ERR: loading suite config '{{.*}}/exec-discovery/lit.site.cfg'
+# CHECK-ASEXEC-ERR: load_config from '{{.*}}/discovery/lit.cfg'
+# CHECK-ASEXEC-ERR: loaded config '{{.*}}/discovery/lit.cfg'
+# CHECK-ASEXEC-ERR: loaded config '{{.*}}/exec-discovery/lit.site.cfg'
+# CHECK-ASEXEC-ERR: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
+# CHECK-ASEXEC-ERR: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
+#
+# CHECK-ASEXEC-OUT: -- Test Suites --
+# CHECK-ASEXEC-OUT:   sub-suite - 2 tests
+# CHECK-ASEXEC-OUT:     Source Root: {{.*/discovery/subsuite$}}
+# CHECK-ASEXEC-OUT:     Exec Root  : {{.*/discovery/subsuite$}}
+# CHECK-ASEXEC-OUT:   top-level-suite - 3 tests
+# CHECK-ASEXEC-OUT:     Source Root: {{.*/discovery$}}
+# CHECK-ASEXEC-OUT:     Exec Root  : {{.*/exec-discovery$}}
+#
+# CHECK-ASEXEC-OUT: -- Testing: 5 tests, 1 threads --
+# CHECK-ASEXEC-OUT: PASS: sub-suite :: test-one
+# CHECK-ASEXEC-OUT: PASS: sub-suite :: test-two
+# CHECK-ASEXEC-OUT: PASS: top-level-suite :: subdir/test-three
+# CHECK-ASEXEC-OUT: PASS: top-level-suite :: test-one
+# CHECK-ASEXEC-OUT: PASS: top-level-suite :: test-two
+
+
+# Check discovery when exact test names are given.
+#
+# FIXME: Note that using a path into a subsuite doesn't work correctly here.
+#
+# RUN: %{lit} \
+# RUN:     %{inputs}/exec-discovery/subdir/test-three.py \
+# RUN:   -j 1 --no-execute --show-suites -v > %t.out
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-EXACT-TEST < %t.out %s
+#
+# CHECK-ASEXEC-EXACT-TEST: -- Testing: 1 tests, 1 threads --
+# CHECK-ASEXEC-EXACT-TEST: PASS: top-level-suite :: subdir/test-three
+
+
+# Check that we don't recurse infinitely when loading an site specific test
+# suite located inside the test source root.
+#
+# RUN: %{lit} \
+# RUN:     %{inputs}/exec-discovery-in-tree/obj/ \
+# RUN:   -j 1 --no-execute --show-suites -v > %t.out
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-INTREE < %t.out %s
+#
+#      CHECK-ASEXEC-INTREE:   exec-discovery-in-tree-suite - 1 tests
+# CHECK-ASEXEC-INTREE-NEXT:     Source Root: {{.*/exec-discovery-in-tree$}}
+# CHECK-ASEXEC-INTREE-NEXT:     Exec Root  : {{.*/exec-discovery-in-tree/obj$}}
+# CHECK-ASEXEC-INTREE-NEXT: -- Testing: 1 tests, 1 threads --
+# CHECK-ASEXEC-INTREE-NEXT: PASS: exec-discovery-in-tree-suite :: test-one
diff --git a/utils/lit/tests/progress-bar.py b/utils/lit/tests/progress-bar.py
new file mode 100644
index 000000000000..d0467485082a
--- /dev/null
+++ b/utils/lit/tests/progress-bar.py
@@ -0,0 +1,13 @@
+# Check the simple progress bar.
+#
+# RUN: not %{lit} -j 1 -s %{inputs}/progress-bar > %t.out
+# RUN: FileCheck < %t.out %s
+#
+# CHECK: Testing: 0 .. 10.. 20
+# CHECK: FAIL: shtest-shell :: test-1.txt (1 of 4)
+# CHECK: Testing: 0 .. 10.. 20.. 30.. 40.. 
+# CHECK: FAIL: shtest-shell :: test-2.txt (2 of 4)
+# CHECK: Testing: 0 .. 10.. 20.. 30.. 40.. 50.. 60.. 70
+# CHECK: FAIL: shtest-shell :: test-3.txt (3 of 4)
+# CHECK: Testing: 0 .. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90.. 
+# CHECK: FAIL: shtest-shell :: test-4.txt (4 of 4)
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index 830476f158b6..fb8f654fb939 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -41,18 +41,18 @@ syn keyword llvmKeyword arm_aapcscc arm_apcscc asm atomic available_externally
 syn keyword llvmKeyword blockaddress byval c catch cc ccc cleanup coldcc common
 syn keyword llvmKeyword constant datalayout declare default define deplibs
 syn keyword llvmKeyword dllexport dllimport except extern_weak external fastcc
-syn keyword llvmKeyword filter gc global hidden initialexec inlinehint inreg
-syn keyword llvmKeyword intel_ocl_bicc inteldialect internal linker_private
-syn keyword llvmKeyword linker_private_weak linker_private_weak_def_auto
-syn keyword llvmKeyword linkonce linkonce_odr linkonce_odr_auto_hide
-syn keyword llvmKeyword localdynamic localexec minsize module monotonic
-syn keyword llvmKeyword msp430_intrcc naked nest noalias nocapture
-syn keyword llvmKeyword noimplicitfloat noinline nonlazybind noredzone noreturn
-syn keyword llvmKeyword nounwind optsize personality private protected
-syn keyword llvmKeyword ptx_device ptx_kernel readnone readonly release
-syn keyword llvmKeyword returns_twice section seq_cst sideeffect signext
-syn keyword llvmKeyword singlethread spir_func spir_kernel sret ssp sspreq
-syn keyword llvmKeyword sspstrong tail target thread_local to triple
+syn keyword llvmKeyword filter fixedstacksegment gc global hidden initialexec
+syn keyword llvmKeyword inlinehint inreg intel_ocl_bicc inteldialect internal
+syn keyword llvmKeyword linker_private linker_private_weak
+syn keyword llvmKeyword linker_private_weak_def_auto linkonce linkonce_odr
+syn keyword llvmKeyword linkonce_odr_auto_hide localdynamic localexec minsize
+syn keyword llvmKeyword module monotonic msp430_intrcc naked nest noalias
+syn keyword llvmKeyword nocapture noimplicitfloat noinline nonlazybind
+syn keyword llvmKeyword noredzone noreturn nounwind optsize personality private
+syn keyword llvmKeyword protected ptx_device ptx_kernel readnone readonly
+syn keyword llvmKeyword release returns_twice section seq_cst sideeffect
+syn keyword llvmKeyword signext singlethread spir_func spir_kernel sret ssp
+syn keyword llvmKeyword sspreq sspstrong tail target thread_local to triple
 syn keyword llvmKeyword unnamed_addr unordered uwtable volatile weak weak_odr
 syn keyword llvmKeyword x86_fastcallcc x86_stdcallcc x86_thiscallcc zeroext
 syn keyword llvmKeyword sanitize_thread sanitize_memory
diff --git a/utils/yaml2obj/yaml2obj.cpp b/utils/yaml2obj/yaml2obj.cpp
deleted file mode 100644
index 5a8ec1d09661..000000000000
--- a/utils/yaml2obj/yaml2obj.cpp
+++ /dev/null
@@ -1,879 +0,0 @@
-//===- yaml2obj - Convert YAML to a binary object file --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This program takes a YAML description of an object file and outputs the
-// binary equivalent.
-//
-// This is used for writing tests that require binary files.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/YAMLParser.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <vector>
-
-using namespace llvm;
-
-static cl::opt<std::string>
-  Input(cl::Positional, cl::desc("<input>"), cl::init("-"));
-
-template<class T>
-typename llvm::enable_if_c<std::numeric_limits<T>::is_integer, bool>::type
-getAs(const llvm::yaml::ScalarNode *SN, T &Result) {
-  SmallString<4> Storage;
-  StringRef Value = SN->getValue(Storage);
-  if (Value.getAsInteger(0, Result))
-    return false;
-  return true;
-}
-
-// Given a container with begin and end with ::value_type of a character type.
-// Iterate through pairs of characters in the the set of [a-fA-F0-9] ignoring
-// all other characters.
-struct hex_pair_iterator {
-  StringRef::const_iterator Current, End;
-  typedef SmallVector<char, 2> value_type;
-  value_type Pair;
-  bool IsDone;
-
-  hex_pair_iterator(StringRef C)
-    : Current(C.begin()), End(C.end()), IsDone(false) {
-    // Initalize Pair.
-    ++*this;
-  }
-
-  // End iterator.
-  hex_pair_iterator() : Current(), End(), IsDone(true) {}
-
-  value_type operator *() const {
-    return Pair;
-  }
-
-  hex_pair_iterator operator ++() {
-    // We're at the end of the input.
-    if (Current == End) {
-      IsDone = true;
-      return *this;
-    }
-    Pair = value_type();
-    for (; Current != End && Pair.size() != 2; ++Current) {
-      // Is a valid hex digit.
-      if ((*Current >= '0' && *Current <= '9') ||
-          (*Current >= 'a' && *Current <= 'f') ||
-          (*Current >= 'A' && *Current <= 'F'))
-        Pair.push_back(*Current);
-    }
-    // Hit the end without getting 2 hex digits. Pair is invalid.
-    if (Pair.size() != 2)
-      IsDone = true;
-    return *this;
-  }
-
-  bool operator ==(const hex_pair_iterator Other) {
-    return (IsDone == Other.IsDone) ||
-           (Current == Other.Current && End == Other.End);
-  }
-
-  bool operator !=(const hex_pair_iterator Other) {
-    return !(*this == Other);
-  }
-};
-
-template <class ContainerOut>
-static bool hexStringToByteArray(StringRef Str, ContainerOut &Out) {
-  for (hex_pair_iterator I(Str), E; I != E; ++I) {
-    typename hex_pair_iterator::value_type Pair = *I;
-    typename ContainerOut::value_type Byte;
-    if (StringRef(Pair.data(), 2).getAsInteger(16, Byte))
-      return false;
-    Out.push_back(Byte);
-  }
-  return true;
-}
-
-/// This parses a yaml stream that represents a COFF object file.
-/// See docs/yaml2obj for the yaml scheema.
-struct COFFParser {
-  COFFParser(yaml::Stream &Input) : YS(Input) {
-    std::memset(&Header, 0, sizeof(Header));
-    // A COFF string table always starts with a 4 byte size field. Offsets into
-    // it include this size, so allocate it now.
-    StringTable.append(4, 0);
-  }
-
-  bool parseHeader(yaml::Node *HeaderN) {
-    yaml::MappingNode *MN = dyn_cast<yaml::MappingNode>(HeaderN);
-    if (!MN) {
-      YS.printError(HeaderN, "header's value must be a mapping node");
-      return false;
-    }
-    for (yaml::MappingNode::iterator i = MN->begin(), e = MN->end();
-                                     i != e; ++i) {
-      yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey());
-      if (!Key) {
-        YS.printError(i->getKey(), "Keys must be scalar values");
-        return false;
-      }
-      SmallString<32> Storage;
-      StringRef KeyValue = Key->getValue(Storage);
-      if (KeyValue == "Characteristics") {
-        if (!parseHeaderCharacteristics(i->getValue()))
-          return false;
-      } else {
-        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(i->getValue());
-        if (!Value) {
-          YS.printError(Value,
-            Twine(KeyValue) + " must be a scalar value");
-          return false;
-        }
-        if (KeyValue == "Machine") {
-          uint16_t Machine = COFF::MT_Invalid;
-          if (!getAs(Value, Machine)) {
-            // It's not a raw number, try matching the string.
-            StringRef ValueValue = Value->getValue(Storage);
-            Machine = StringSwitch<COFF::MachineTypes>(ValueValue)
-              .Case( "IMAGE_FILE_MACHINE_UNKNOWN"
-                   , COFF::IMAGE_FILE_MACHINE_UNKNOWN)
-              .Case( "IMAGE_FILE_MACHINE_AM33"
-                   , COFF::IMAGE_FILE_MACHINE_AM33)
-              .Case( "IMAGE_FILE_MACHINE_AMD64"
-                   , COFF::IMAGE_FILE_MACHINE_AMD64)
-              .Case( "IMAGE_FILE_MACHINE_ARM"
-                   , COFF::IMAGE_FILE_MACHINE_ARM)
-              .Case( "IMAGE_FILE_MACHINE_ARMV7"
-                   , COFF::IMAGE_FILE_MACHINE_ARMV7)
-              .Case( "IMAGE_FILE_MACHINE_EBC"
-                   , COFF::IMAGE_FILE_MACHINE_EBC)
-              .Case( "IMAGE_FILE_MACHINE_I386"
-                   , COFF::IMAGE_FILE_MACHINE_I386)
-              .Case( "IMAGE_FILE_MACHINE_IA64"
-                   , COFF::IMAGE_FILE_MACHINE_IA64)
-              .Case( "IMAGE_FILE_MACHINE_M32R"
-                   , COFF::IMAGE_FILE_MACHINE_M32R)
-              .Case( "IMAGE_FILE_MACHINE_MIPS16"
-                   , COFF::IMAGE_FILE_MACHINE_MIPS16)
-              .Case( "IMAGE_FILE_MACHINE_MIPSFPU"
-                   , COFF::IMAGE_FILE_MACHINE_MIPSFPU)
-              .Case( "IMAGE_FILE_MACHINE_MIPSFPU16"
-                   , COFF::IMAGE_FILE_MACHINE_MIPSFPU16)
-              .Case( "IMAGE_FILE_MACHINE_POWERPC"
-                   , COFF::IMAGE_FILE_MACHINE_POWERPC)
-              .Case( "IMAGE_FILE_MACHINE_POWERPCFP"
-                   , COFF::IMAGE_FILE_MACHINE_POWERPCFP)
-              .Case( "IMAGE_FILE_MACHINE_R4000"
-                   , COFF::IMAGE_FILE_MACHINE_R4000)
-              .Case( "IMAGE_FILE_MACHINE_SH3"
-                   , COFF::IMAGE_FILE_MACHINE_SH3)
-              .Case( "IMAGE_FILE_MACHINE_SH3DSP"
-                   , COFF::IMAGE_FILE_MACHINE_SH3DSP)
-              .Case( "IMAGE_FILE_MACHINE_SH4"
-                   , COFF::IMAGE_FILE_MACHINE_SH4)
-              .Case( "IMAGE_FILE_MACHINE_SH5"
-                   , COFF::IMAGE_FILE_MACHINE_SH5)
-              .Case( "IMAGE_FILE_MACHINE_THUMB"
-                   , COFF::IMAGE_FILE_MACHINE_THUMB)
-              .Case( "IMAGE_FILE_MACHINE_WCEMIPSV2"
-                   , COFF::IMAGE_FILE_MACHINE_WCEMIPSV2)
-              .Default(COFF::MT_Invalid);
-            if (Machine == COFF::MT_Invalid) {
-              YS.printError(Value, "Invalid value for Machine");
-              return false;
-            }
-          }
-          Header.Machine = Machine;
-        } else if (KeyValue == "NumberOfSections") {
-          if (!getAs(Value, Header.NumberOfSections)) {
-              YS.printError(Value, "Invalid value for NumberOfSections");
-              return false;
-          }
-        } else if (KeyValue == "TimeDateStamp") {
-          if (!getAs(Value, Header.TimeDateStamp)) {
-              YS.printError(Value, "Invalid value for TimeDateStamp");
-              return false;
-          }
-        } else if (KeyValue == "PointerToSymbolTable") {
-          if (!getAs(Value, Header.PointerToSymbolTable)) {
-              YS.printError(Value, "Invalid value for PointerToSymbolTable");
-              return false;
-          }
-        } else if (KeyValue == "NumberOfSymbols") {
-          if (!getAs(Value, Header.NumberOfSymbols)) {
-              YS.printError(Value, "Invalid value for NumberOfSymbols");
-              return false;
-          }
-        } else if (KeyValue == "SizeOfOptionalHeader") {
-          if (!getAs(Value, Header.SizeOfOptionalHeader)) {
-              YS.printError(Value, "Invalid value for SizeOfOptionalHeader");
-              return false;
-          }
-        } else {
-          YS.printError(Key, "Unrecognized key in header");
-          return false;
-        }
-      }
-    }
-    return true;
-  }
-
-  bool parseHeaderCharacteristics(yaml::Node *Characteristics) {
-    yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(Characteristics);
-    yaml::SequenceNode *SeqValue
-      = dyn_cast<yaml::SequenceNode>(Characteristics);
-    if (!Value && !SeqValue) {
-      YS.printError(Characteristics,
-        "Characteristics must either be a number or sequence");
-      return false;
-    }
-    if (Value) {
-      if (!getAs(Value, Header.Characteristics)) {
-        YS.printError(Value, "Invalid value for Characteristics");
-        return false;
-      }
-    } else {
-      for (yaml::SequenceNode::iterator ci = SeqValue->begin(),
-                                        ce = SeqValue->end();
-                                        ci != ce; ++ci) {
-        yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci);
-        if (!CharValue) {
-          YS.printError(CharValue,
-            "Characteristics must be scalar values");
-          return false;
-        }
-        SmallString<32> Storage;
-        StringRef Char = CharValue->getValue(Storage);
-        uint16_t Characteristic = StringSwitch<COFF::Characteristics>(Char)
-          .Case( "IMAGE_FILE_RELOCS_STRIPPED"
-                , COFF::IMAGE_FILE_RELOCS_STRIPPED)
-          .Case( "IMAGE_FILE_EXECUTABLE_IMAGE"
-                , COFF::IMAGE_FILE_EXECUTABLE_IMAGE)
-          .Case( "IMAGE_FILE_LINE_NUMS_STRIPPED"
-                , COFF::IMAGE_FILE_LINE_NUMS_STRIPPED)
-          .Case( "IMAGE_FILE_LOCAL_SYMS_STRIPPED"
-                , COFF::IMAGE_FILE_LOCAL_SYMS_STRIPPED)
-          .Case( "IMAGE_FILE_AGGRESSIVE_WS_TRIM"
-                , COFF::IMAGE_FILE_AGGRESSIVE_WS_TRIM)
-          .Case( "IMAGE_FILE_LARGE_ADDRESS_AWARE"
-                , COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE)
-          .Case( "IMAGE_FILE_BYTES_REVERSED_LO"
-                , COFF::IMAGE_FILE_BYTES_REVERSED_LO)
-          .Case( "IMAGE_FILE_32BIT_MACHINE"
-                , COFF::IMAGE_FILE_32BIT_MACHINE)
-          .Case( "IMAGE_FILE_DEBUG_STRIPPED"
-                , COFF::IMAGE_FILE_DEBUG_STRIPPED)
-          .Case( "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP"
-                , COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP)
-          .Case( "IMAGE_FILE_SYSTEM"
-                , COFF::IMAGE_FILE_SYSTEM)
-          .Case( "IMAGE_FILE_DLL"
-                , COFF::IMAGE_FILE_DLL)
-          .Case( "IMAGE_FILE_UP_SYSTEM_ONLY"
-                , COFF::IMAGE_FILE_UP_SYSTEM_ONLY)
-          .Default(COFF::C_Invalid);
-        if (Characteristic == COFF::C_Invalid) {
-          // TODO: Typo-correct.
-          YS.printError(CharValue,
-            "Invalid value for Characteristic");
-          return false;
-        }
-        Header.Characteristics |= Characteristic;
-      }
-    }
-    return true;
-  }
-
-  bool parseSections(yaml::Node *SectionsN) {
-    yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SectionsN);
-    if (!SN) {
-      YS.printError(SectionsN, "Sections must be a sequence");
-      return false;
-    }
-    for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end();
-                                      i != e; ++i) {
-      Section Sec;
-      std::memset(&Sec.Header, 0, sizeof(Sec.Header));
-      yaml::MappingNode *SecMap = dyn_cast<yaml::MappingNode>(&*i);
-      if (!SecMap) {
-        YS.printError(&*i, "Section entry must be a map");
-        return false;
-      }
-      for (yaml::MappingNode::iterator si = SecMap->begin(), se = SecMap->end();
-                                       si != se; ++si) {
-        yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey());
-        if (!Key) {
-          YS.printError(si->getKey(), "Keys must be scalar values");
-          return false;
-        }
-        SmallString<32> Storage;
-        StringRef KeyValue = Key->getValue(Storage);
-
-        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
-        if (KeyValue == "Name") {
-          // If the name is less than 8 bytes, store it in place, otherwise
-          // store it in the string table.
-          StringRef Name = Value->getValue(Storage);
-          std::fill_n(Sec.Header.Name, unsigned(COFF::NameSize), 0);
-          if (Name.size() <= COFF::NameSize) {
-            std::copy(Name.begin(), Name.end(), Sec.Header.Name);
-          } else {
-            // Add string to the string table and format the index for output.
-            unsigned Index = getStringIndex(Name);
-            std::string str = utostr(Index);
-            if (str.size() > 7) {
-              YS.printError(Value, "String table got too large");
-              return false;
-            }
-            Sec.Header.Name[0] = '/';
-            std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
-          }
-        } else if (KeyValue == "VirtualSize") {
-          if (!getAs(Value, Sec.Header.VirtualSize)) {
-            YS.printError(Value, "Invalid value for VirtualSize");
-            return false;
-          }
-        } else if (KeyValue == "VirtualAddress") {
-          if (!getAs(Value, Sec.Header.VirtualAddress)) {
-            YS.printError(Value, "Invalid value for VirtualAddress");
-            return false;
-          }
-        } else if (KeyValue == "SizeOfRawData") {
-          if (!getAs(Value, Sec.Header.SizeOfRawData)) {
-            YS.printError(Value, "Invalid value for SizeOfRawData");
-            return false;
-          }
-        } else if (KeyValue == "PointerToRawData") {
-          if (!getAs(Value, Sec.Header.PointerToRawData)) {
-            YS.printError(Value, "Invalid value for PointerToRawData");
-            return false;
-          }
-        } else if (KeyValue == "PointerToRelocations") {
-          if (!getAs(Value, Sec.Header.PointerToRelocations)) {
-            YS.printError(Value, "Invalid value for PointerToRelocations");
-            return false;
-          }
-        } else if (KeyValue == "PointerToLineNumbers") {
-          if (!getAs(Value, Sec.Header.PointerToLineNumbers)) {
-            YS.printError(Value, "Invalid value for PointerToLineNumbers");
-            return false;
-          }
-        } else if (KeyValue == "NumberOfRelocations") {
-          if (!getAs(Value, Sec.Header.NumberOfRelocations)) {
-            YS.printError(Value, "Invalid value for NumberOfRelocations");
-            return false;
-          }
-        } else if (KeyValue == "NumberOfLineNumbers") {
-          if (!getAs(Value, Sec.Header.NumberOfLineNumbers)) {
-            YS.printError(Value, "Invalid value for NumberOfLineNumbers");
-            return false;
-          }
-        } else if (KeyValue == "Characteristics") {
-          yaml::SequenceNode *SeqValue
-            = dyn_cast<yaml::SequenceNode>(si->getValue());
-          if (!Value && !SeqValue) {
-            YS.printError(si->getValue(),
-              "Characteristics must either be a number or sequence");
-            return false;
-          }
-          if (Value) {
-            if (!getAs(Value, Sec.Header.Characteristics)) {
-              YS.printError(Value, "Invalid value for Characteristics");
-              return false;
-            }
-          } else {
-            for (yaml::SequenceNode::iterator ci = SeqValue->begin(),
-                                              ce = SeqValue->end();
-                                              ci != ce; ++ci) {
-              yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci);
-              if (!CharValue) {
-                YS.printError(CharValue, "Invalid value for Characteristics");
-                return false;
-              }
-              StringRef Char = CharValue->getValue(Storage);
-              uint32_t Characteristic =
-                StringSwitch<COFF::SectionCharacteristics>(Char)
-                .Case( "IMAGE_SCN_TYPE_NO_PAD"
-                     , COFF::IMAGE_SCN_TYPE_NO_PAD)
-                .Case( "IMAGE_SCN_CNT_CODE"
-                     , COFF::IMAGE_SCN_CNT_CODE)
-                .Case( "IMAGE_SCN_CNT_INITIALIZED_DATA"
-                     , COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
-                .Case( "IMAGE_SCN_CNT_UNINITIALIZED_DATA"
-                     , COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
-                .Case( "IMAGE_SCN_LNK_OTHER"
-                     , COFF::IMAGE_SCN_LNK_OTHER)
-                .Case( "IMAGE_SCN_LNK_INFO"
-                     , COFF::IMAGE_SCN_LNK_INFO)
-                .Case( "IMAGE_SCN_LNK_REMOVE"
-                     , COFF::IMAGE_SCN_LNK_REMOVE)
-                .Case( "IMAGE_SCN_LNK_COMDAT"
-                     , COFF::IMAGE_SCN_LNK_COMDAT)
-                .Case( "IMAGE_SCN_GPREL"
-                     , COFF::IMAGE_SCN_GPREL)
-                .Case( "IMAGE_SCN_MEM_PURGEABLE"
-                     , COFF::IMAGE_SCN_MEM_PURGEABLE)
-                .Case( "IMAGE_SCN_MEM_16BIT"
-                     , COFF::IMAGE_SCN_MEM_16BIT)
-                .Case( "IMAGE_SCN_MEM_LOCKED"
-                     , COFF::IMAGE_SCN_MEM_LOCKED)
-                .Case( "IMAGE_SCN_MEM_PRELOAD"
-                     , COFF::IMAGE_SCN_MEM_PRELOAD)
-                .Case( "IMAGE_SCN_ALIGN_1BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_1BYTES)
-                .Case( "IMAGE_SCN_ALIGN_2BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_2BYTES)
-                .Case( "IMAGE_SCN_ALIGN_4BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_4BYTES)
-                .Case( "IMAGE_SCN_ALIGN_8BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_8BYTES)
-                .Case( "IMAGE_SCN_ALIGN_16BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_16BYTES)
-                .Case( "IMAGE_SCN_ALIGN_32BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_32BYTES)
-                .Case( "IMAGE_SCN_ALIGN_64BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_64BYTES)
-                .Case( "IMAGE_SCN_ALIGN_128BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_128BYTES)
-                .Case( "IMAGE_SCN_ALIGN_256BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_256BYTES)
-                .Case( "IMAGE_SCN_ALIGN_512BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_512BYTES)
-                .Case( "IMAGE_SCN_ALIGN_1024BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_1024BYTES)
-                .Case( "IMAGE_SCN_ALIGN_2048BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_2048BYTES)
-                .Case( "IMAGE_SCN_ALIGN_4096BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_4096BYTES)
-                .Case( "IMAGE_SCN_ALIGN_8192BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_8192BYTES)
-                .Case( "IMAGE_SCN_LNK_NRELOC_OVFL"
-                     , COFF::IMAGE_SCN_LNK_NRELOC_OVFL)
-                .Case( "IMAGE_SCN_MEM_DISCARDABLE"
-                     , COFF::IMAGE_SCN_MEM_DISCARDABLE)
-                .Case( "IMAGE_SCN_MEM_NOT_CACHED"
-                     , COFF::IMAGE_SCN_MEM_NOT_CACHED)
-                .Case( "IMAGE_SCN_MEM_NOT_PAGED"
-                     , COFF::IMAGE_SCN_MEM_NOT_PAGED)
-                .Case( "IMAGE_SCN_MEM_SHARED"
-                     , COFF::IMAGE_SCN_MEM_SHARED)
-                .Case( "IMAGE_SCN_MEM_EXECUTE"
-                     , COFF::IMAGE_SCN_MEM_EXECUTE)
-                .Case( "IMAGE_SCN_MEM_READ"
-                     , COFF::IMAGE_SCN_MEM_READ)
-                .Case( "IMAGE_SCN_MEM_WRITE"
-                     , COFF::IMAGE_SCN_MEM_WRITE)
-                .Default(COFF::SC_Invalid);
-              if (Characteristic == COFF::SC_Invalid) {
-                YS.printError(CharValue, "Invalid value for Characteristic");
-                return false;
-              }
-              Sec.Header.Characteristics |= Characteristic;
-            }
-          }
-        } else if (KeyValue == "SectionData") {
-          yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
-          SmallString<32> Storage;
-          StringRef Data = Value->getValue(Storage);
-          if (!hexStringToByteArray(Data, Sec.Data)) {
-            YS.printError(Value, "SectionData must be a collection of pairs of"
-                                 "hex bytes");
-            return false;
-          }
-        } else
-          si->skip();
-      }
-      Sections.push_back(Sec);
-    }
-    return true;
-  }
-
-  bool parseSymbols(yaml::Node *SymbolsN) {
-    yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SymbolsN);
-    if (!SN) {
-      YS.printError(SymbolsN, "Symbols must be a sequence");
-      return false;
-    }
-    for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end();
-                                      i != e; ++i) {
-      Symbol Sym;
-      std::memset(&Sym.Header, 0, sizeof(Sym.Header));
-      yaml::MappingNode *SymMap = dyn_cast<yaml::MappingNode>(&*i);
-      if (!SymMap) {
-        YS.printError(&*i, "Symbol must be a map");
-        return false;
-      }
-      for (yaml::MappingNode::iterator si = SymMap->begin(), se = SymMap->end();
-                                       si != se; ++si) {
-        yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey());
-        if (!Key) {
-          YS.printError(si->getKey(), "Keys must be scalar values");
-          return false;
-        }
-        SmallString<32> Storage;
-        StringRef KeyValue = Key->getValue(Storage);
-
-        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
-        if (!Value) {
-          YS.printError(si->getValue(), "Must be a scalar value");
-          return false;
-        }
-        if (KeyValue == "Name") {
-          // If the name is less than 8 bytes, store it in place, otherwise
-          // store it in the string table.
-          StringRef Name = Value->getValue(Storage);
-          std::fill_n(Sym.Header.Name, unsigned(COFF::NameSize), 0);
-          if (Name.size() <= COFF::NameSize) {
-            std::copy(Name.begin(), Name.end(), Sym.Header.Name);
-          } else {
-            // Add string to the string table and format the index for output.
-            unsigned Index = getStringIndex(Name);
-            *reinterpret_cast<support::aligned_ulittle32_t*>(
-              Sym.Header.Name + 4) = Index;
-          }
-        } else if (KeyValue == "Value") {
-          if (!getAs(Value, Sym.Header.Value)) {
-            YS.printError(Value, "Invalid value for Value");
-            return false;
-          }
-        } else if (KeyValue == "SimpleType") {
-          Sym.Header.Type |= StringSwitch<COFF::SymbolBaseType>(
-            Value->getValue(Storage))
-            .Case("IMAGE_SYM_TYPE_NULL", COFF::IMAGE_SYM_TYPE_NULL)
-            .Case("IMAGE_SYM_TYPE_VOID", COFF::IMAGE_SYM_TYPE_VOID)
-            .Case("IMAGE_SYM_TYPE_CHAR", COFF::IMAGE_SYM_TYPE_CHAR)
-            .Case("IMAGE_SYM_TYPE_SHORT", COFF::IMAGE_SYM_TYPE_SHORT)
-            .Case("IMAGE_SYM_TYPE_INT", COFF::IMAGE_SYM_TYPE_INT)
-            .Case("IMAGE_SYM_TYPE_LONG", COFF::IMAGE_SYM_TYPE_LONG)
-            .Case("IMAGE_SYM_TYPE_FLOAT", COFF::IMAGE_SYM_TYPE_FLOAT)
-            .Case("IMAGE_SYM_TYPE_DOUBLE", COFF::IMAGE_SYM_TYPE_DOUBLE)
-            .Case("IMAGE_SYM_TYPE_STRUCT", COFF::IMAGE_SYM_TYPE_STRUCT)
-            .Case("IMAGE_SYM_TYPE_UNION", COFF::IMAGE_SYM_TYPE_UNION)
-            .Case("IMAGE_SYM_TYPE_ENUM", COFF::IMAGE_SYM_TYPE_ENUM)
-            .Case("IMAGE_SYM_TYPE_MOE", COFF::IMAGE_SYM_TYPE_MOE)
-            .Case("IMAGE_SYM_TYPE_BYTE", COFF::IMAGE_SYM_TYPE_BYTE)
-            .Case("IMAGE_SYM_TYPE_WORD", COFF::IMAGE_SYM_TYPE_WORD)
-            .Case("IMAGE_SYM_TYPE_UINT", COFF::IMAGE_SYM_TYPE_UINT)
-            .Case("IMAGE_SYM_TYPE_DWORD", COFF::IMAGE_SYM_TYPE_DWORD)
-            .Default(COFF::IMAGE_SYM_TYPE_NULL);
-        } else if (KeyValue == "ComplexType") {
-          Sym.Header.Type |= StringSwitch<COFF::SymbolComplexType>(
-            Value->getValue(Storage))
-            .Case("IMAGE_SYM_DTYPE_NULL", COFF::IMAGE_SYM_DTYPE_NULL)
-            .Case("IMAGE_SYM_DTYPE_POINTER", COFF::IMAGE_SYM_DTYPE_POINTER)
-            .Case("IMAGE_SYM_DTYPE_FUNCTION", COFF::IMAGE_SYM_DTYPE_FUNCTION)
-            .Case("IMAGE_SYM_DTYPE_ARRAY", COFF::IMAGE_SYM_DTYPE_ARRAY)
-            .Default(COFF::IMAGE_SYM_DTYPE_NULL)
-            << COFF::SCT_COMPLEX_TYPE_SHIFT;
-        } else if (KeyValue == "StorageClass") {
-          Sym.Header.StorageClass = StringSwitch<COFF::SymbolStorageClass>(
-            Value->getValue(Storage))
-            .Case( "IMAGE_SYM_CLASS_END_OF_FUNCTION"
-                 , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION)
-            .Case( "IMAGE_SYM_CLASS_NULL"
-                 , COFF::IMAGE_SYM_CLASS_NULL)
-            .Case( "IMAGE_SYM_CLASS_AUTOMATIC"
-                 , COFF::IMAGE_SYM_CLASS_AUTOMATIC)
-            .Case( "IMAGE_SYM_CLASS_EXTERNAL"
-                 , COFF::IMAGE_SYM_CLASS_EXTERNAL)
-            .Case( "IMAGE_SYM_CLASS_STATIC"
-                 , COFF::IMAGE_SYM_CLASS_STATIC)
-            .Case( "IMAGE_SYM_CLASS_REGISTER"
-                 , COFF::IMAGE_SYM_CLASS_REGISTER)
-            .Case( "IMAGE_SYM_CLASS_EXTERNAL_DEF"
-                 , COFF::IMAGE_SYM_CLASS_EXTERNAL_DEF)
-            .Case( "IMAGE_SYM_CLASS_LABEL"
-                 , COFF::IMAGE_SYM_CLASS_LABEL)
-            .Case( "IMAGE_SYM_CLASS_UNDEFINED_LABEL"
-                 , COFF::IMAGE_SYM_CLASS_UNDEFINED_LABEL)
-            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_STRUCT"
-                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_STRUCT)
-            .Case( "IMAGE_SYM_CLASS_ARGUMENT"
-                 , COFF::IMAGE_SYM_CLASS_ARGUMENT)
-            .Case( "IMAGE_SYM_CLASS_STRUCT_TAG"
-                 , COFF::IMAGE_SYM_CLASS_STRUCT_TAG)
-            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_UNION"
-                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_UNION)
-            .Case( "IMAGE_SYM_CLASS_UNION_TAG"
-                 , COFF::IMAGE_SYM_CLASS_UNION_TAG)
-            .Case( "IMAGE_SYM_CLASS_TYPE_DEFINITION"
-                 , COFF::IMAGE_SYM_CLASS_TYPE_DEFINITION)
-            .Case( "IMAGE_SYM_CLASS_UNDEFINED_STATIC"
-                 , COFF::IMAGE_SYM_CLASS_UNDEFINED_STATIC)
-            .Case( "IMAGE_SYM_CLASS_ENUM_TAG"
-                 , COFF::IMAGE_SYM_CLASS_ENUM_TAG)
-            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_ENUM"
-                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_ENUM)
-            .Case( "IMAGE_SYM_CLASS_REGISTER_PARAM"
-                 , COFF::IMAGE_SYM_CLASS_REGISTER_PARAM)
-            .Case( "IMAGE_SYM_CLASS_BIT_FIELD"
-                 , COFF::IMAGE_SYM_CLASS_BIT_FIELD)
-            .Case( "IMAGE_SYM_CLASS_BLOCK"
-                 , COFF::IMAGE_SYM_CLASS_BLOCK)
-            .Case( "IMAGE_SYM_CLASS_FUNCTION"
-                 , COFF::IMAGE_SYM_CLASS_FUNCTION)
-            .Case( "IMAGE_SYM_CLASS_END_OF_STRUCT"
-                 , COFF::IMAGE_SYM_CLASS_END_OF_STRUCT)
-            .Case( "IMAGE_SYM_CLASS_FILE"
-                 , COFF::IMAGE_SYM_CLASS_FILE)
-            .Case( "IMAGE_SYM_CLASS_SECTION"
-                 , COFF::IMAGE_SYM_CLASS_SECTION)
-            .Case( "IMAGE_SYM_CLASS_WEAK_EXTERNAL"
-                 , COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
-            .Case( "IMAGE_SYM_CLASS_CLR_TOKEN"
-                 , COFF::IMAGE_SYM_CLASS_CLR_TOKEN)
-            .Default(COFF::SSC_Invalid);
-          if (Sym.Header.StorageClass == COFF::SSC_Invalid) {
-            YS.printError(Value, "Invalid value for StorageClass");
-            return false;
-          }
-        } else if (KeyValue == "SectionNumber") {
-          if (!getAs(Value, Sym.Header.SectionNumber)) {
-              YS.printError(Value, "Invalid value for SectionNumber");
-              return false;
-          }
-        } else if (KeyValue == "AuxillaryData") {
-          StringRef Data = Value->getValue(Storage);
-          if (!hexStringToByteArray(Data, Sym.AuxSymbols)) {
-            YS.printError(Value, "AuxillaryData must be a collection of pairs"
-                                 "of hex bytes");
-            return false;
-          }
-        } else
-          si->skip();
-      }
-      Symbols.push_back(Sym);
-    }
-    return true;
-  }
-
-  bool parse() {
-    yaml::Document &D = *YS.begin();
-    yaml::MappingNode *Root = dyn_cast<yaml::MappingNode>(D.getRoot());
-    if (!Root) {
-      YS.printError(D.getRoot(), "Root node must be a map");
-      return false;
-    }
-    for (yaml::MappingNode::iterator i = Root->begin(), e = Root->end();
-                                     i != e; ++i) {
-      yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey());
-      if (!Key) {
-        YS.printError(i->getKey(), "Keys must be scalar values");
-        return false;
-      }
-      SmallString<32> Storage;
-      StringRef KeyValue = Key->getValue(Storage);
-      if (KeyValue == "header") {
-        if (!parseHeader(i->getValue()))
-          return false;
-      } else if (KeyValue == "sections") {
-        if (!parseSections(i->getValue()))
-          return false;
-      } else if (KeyValue == "symbols") {
-        if (!parseSymbols(i->getValue()))
-          return false;
-      }
-    }
-    return !YS.failed();
-  }
-
-  unsigned getStringIndex(StringRef Str) {
-    StringMap<unsigned>::iterator i = StringTableMap.find(Str);
-    if (i == StringTableMap.end()) {
-      unsigned Index = StringTable.size();
-      StringTable.append(Str.begin(), Str.end());
-      StringTable.push_back(0);
-      StringTableMap[Str] = Index;
-      return Index;
-    }
-    return i->second;
-  }
-
-  yaml::Stream &YS;
-  COFF::header Header;
-
-  struct Section {
-    COFF::section Header;
-    std::vector<uint8_t> Data;
-    std::vector<COFF::relocation> Relocations;
-  };
-
-  struct Symbol {
-    COFF::symbol Header;
-    std::vector<uint8_t> AuxSymbols;
-  };
-
-  std::vector<Section> Sections;
-  std::vector<Symbol> Symbols;
-  StringMap<unsigned> StringTableMap;
-  std::string StringTable;
-};
-
-// Take a CP and assign addresses and sizes to everything. Returns false if the
-// layout is not valid to do.
-static bool layoutCOFF(COFFParser &CP) {
-  uint32_t SectionTableStart = 0;
-  uint32_t SectionTableSize  = 0;
-
-  // The section table starts immediately after the header, including the
-  // optional header.
-  SectionTableStart = sizeof(COFF::header) + CP.Header.SizeOfOptionalHeader;
-  SectionTableSize = sizeof(COFF::section) * CP.Sections.size();
-
-  uint32_t CurrentSectionDataOffset = SectionTableStart + SectionTableSize;
-
-  // Assign each section data address consecutively.
-  for (std::vector<COFFParser::Section>::iterator i = CP.Sections.begin(),
-                                                  e = CP.Sections.end();
-                                                  i != e; ++i) {
-    if (!i->Data.empty()) {
-      i->Header.SizeOfRawData = i->Data.size();
-      i->Header.PointerToRawData = CurrentSectionDataOffset;
-      CurrentSectionDataOffset += i->Header.SizeOfRawData;
-      // TODO: Handle alignment.
-    } else {
-      i->Header.SizeOfRawData = 0;
-      i->Header.PointerToRawData = 0;
-    }
-  }
-
-  uint32_t SymbolTableStart = CurrentSectionDataOffset;
-
-  // Calculate number of symbols.
-  uint32_t NumberOfSymbols = 0;
-  for (std::vector<COFFParser::Symbol>::iterator i = CP.Symbols.begin(),
-                                                 e = CP.Symbols.end();
-                                                 i != e; ++i) {
-    if (i->AuxSymbols.size() % COFF::SymbolSize != 0) {
-      errs() << "AuxillaryData size not a multiple of symbol size!\n";
-      return false;
-    }
-    i->Header.NumberOfAuxSymbols = i->AuxSymbols.size() / COFF::SymbolSize;
-    NumberOfSymbols += 1 + i->Header.NumberOfAuxSymbols;
-  }
-
-  // Store all the allocated start addresses in the header.
-  CP.Header.NumberOfSections = CP.Sections.size();
-  CP.Header.NumberOfSymbols = NumberOfSymbols;
-  CP.Header.PointerToSymbolTable = SymbolTableStart;
-
-  *reinterpret_cast<support::ulittle32_t *>(&CP.StringTable[0])
-    = CP.StringTable.size();
-
-  return true;
-}
-
-template <typename value_type>
-struct binary_le_impl {
-  value_type Value;
-  binary_le_impl(value_type V) : Value(V) {}
-};
-
-template <typename value_type>
-raw_ostream &operator <<( raw_ostream &OS
-                        , const binary_le_impl<value_type> &BLE) {
-  char Buffer[sizeof(BLE.Value)];
-  support::endian::write<value_type, support::little, support::unaligned>(
-    Buffer, BLE.Value);
-  OS.write(Buffer, sizeof(BLE.Value));
-  return OS;
-}
-
-template <typename value_type>
-binary_le_impl<value_type> binary_le(value_type V) {
-  return binary_le_impl<value_type>(V);
-}
-
-void writeCOFF(COFFParser &CP, raw_ostream &OS) {
-  OS << binary_le(CP.Header.Machine)
-     << binary_le(CP.Header.NumberOfSections)
-     << binary_le(CP.Header.TimeDateStamp)
-     << binary_le(CP.Header.PointerToSymbolTable)
-     << binary_le(CP.Header.NumberOfSymbols)
-     << binary_le(CP.Header.SizeOfOptionalHeader)
-     << binary_le(CP.Header.Characteristics);
-
-  // Output section table.
-  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
-                                                        e = CP.Sections.end();
-                                                        i != e; ++i) {
-    OS.write(i->Header.Name, COFF::NameSize);
-    OS << binary_le(i->Header.VirtualSize)
-       << binary_le(i->Header.VirtualAddress)
-       << binary_le(i->Header.SizeOfRawData)
-       << binary_le(i->Header.PointerToRawData)
-       << binary_le(i->Header.PointerToRelocations)
-       << binary_le(i->Header.PointerToLineNumbers)
-       << binary_le(i->Header.NumberOfRelocations)
-       << binary_le(i->Header.NumberOfLineNumbers)
-       << binary_le(i->Header.Characteristics);
-  }
-
-  // Output section data.
-  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
-                                                        e = CP.Sections.end();
-                                                        i != e; ++i) {
-    if (!i->Data.empty())
-      OS.write(reinterpret_cast<const char*>(&i->Data[0]), i->Data.size());
-  }
-
-  // Output symbol table.
-
-  for (std::vector<COFFParser::Symbol>::const_iterator i = CP.Symbols.begin(),
-                                                       e = CP.Symbols.end();
-                                                       i != e; ++i) {
-    OS.write(i->Header.Name, COFF::NameSize);
-    OS << binary_le(i->Header.Value)
-       << binary_le(i->Header.SectionNumber)
-       << binary_le(i->Header.Type)
-       << binary_le(i->Header.StorageClass)
-       << binary_le(i->Header.NumberOfAuxSymbols);
-    if (!i->AuxSymbols.empty())
-      OS.write( reinterpret_cast<const char*>(&i->AuxSymbols[0])
-              , i->AuxSymbols.size());
-  }
-
-  // Output string table.
-  OS.write(&CP.StringTable[0], CP.StringTable.size());
-}
-
-int main(int argc, char **argv) {
-  cl::ParseCommandLineOptions(argc, argv);
-  sys::PrintStackTraceOnErrorSignal();
-  PrettyStackTraceProgram X(argc, argv);
-  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
-
-  OwningPtr<MemoryBuffer> Buf;
-  if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
-    return 1;
-
-  SourceMgr SM;
-  yaml::Stream S(Buf->getBuffer(), SM);
-  COFFParser CP(S);
-  if (!CP.parse()) {
-    errs() << "yaml2obj: Failed to parse YAML file!\n";
-    return 1;
-  }
-  if (!layoutCOFF(CP)) {
-    errs() << "yaml2obj: Failed to layout COFF file!\n";
-    return 1;
-  }
-  writeCOFF(CP, outs());
-}