diff --git a/CMakeLists.txt b/CMakeLists.txt
index c8677ae2ae9..cb3825ecd15 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -439,7 +439,6 @@ option (ENABLE_UNWIND "Enable libunwind (better stacktraces)" ON)
 include (libs/libdaemon/cmake/find_unwind.cmake)
 
 # Need to process before "contrib" dir:
-include (libs/libcommon/cmake/find_gperftools.cmake)
 include (libs/libcommon/cmake/find_jemalloc.cmake)
 include (libs/libcommon/cmake/find_mimalloc.cmake)
 include (libs/libcommon/cmake/find_cctz.cmake)
diff --git a/NOTICE b/NOTICE
index 3fbb523f72b..34da926905b 100644
--- a/NOTICE
+++ b/NOTICE
@@ -123,11 +123,6 @@ Revision: 21d37dbaa45742c0bd23cc1e5a70b52cbc27f809
 Address: https://github.com/ClickHouse/ClickHouse/tree/b13313eecca9455f4fdc923f597fe863df409742/contrib/libsparsehash
 License: https://github.com/ClickHouse/ClickHouse/blob/b13313eecca9455f4fdc923f597fe863df409742/contrib/libsparsehash/COPYING
 --------------------------------------
-tcmalloc
-Revision: dde32f8bbc95312379f9f5a651799815bb6327c5
-Address: https://github.com/ClickHouse/ClickHouse/tree/b13313eecca9455f4fdc923f597fe863df409742/contrib/libtcmalloc
-License: https://github.com/ClickHouse/ClickHouse/blob/b13313eecca9455f4fdc923f597fe863df409742/contrib/libtcmalloc/COPYING
---------------------------------------
 libunwind
 Revision: 19a2c01b1e8ac20871ea09d20f596d425ba53aed
 Address: https://github.com/libunwind/libunwind
diff --git a/cmake/Modules/FindGperftools.cmake b/cmake/Modules/FindGperftools.cmake
deleted file mode 100644
index dabc13d1611..00000000000
--- a/cmake/Modules/FindGperftools.cmake
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright 2022 PingCAP, Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# https://github.com/vast-io/vast/blob/master/cmake/FindGperftools.cmake
-
-# Tries to find Gperftools.
-#
-# Usage of this module as follows:
-#
-#     find_package(Gperftools)
-#
-# Variables used by this module, they can change the default behaviour and need
-# to be set before calling find_package:
-#
-#  Gperftools_ROOT_DIR  Set this variable to the root installation of
-#                       Gperftools if the module has problems finding
-#                       the proper installation path.
-#
-# Variables defined by this module:
-#
-#  GPERFTOOLS_FOUND              System has Gperftools libs/headers
-#  GPERFTOOLS_LIBRARIES          The Gperftools libraries (tcmalloc & profiler)
-#  GPERFTOOLS_INCLUDE_DIR        The location of Gperftools headers
-
-find_library(GPERFTOOLS_TCMALLOC
-  NAMES tcmalloc
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_library(GPERFTOOLS_TCMALLOC_MINIMAL
-  NAMES tcmalloc_minimal
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_library(GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG
-  NAMES tcmalloc_minimal_debug
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_library(GPERFTOOLS_PROFILER
-  NAMES profiler
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_library(GPERFTOOLS_TCMALLOC_AND_PROFILER
-  NAMES tcmalloc_and_profiler
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_path(GPERFTOOLS_INCLUDE_DIR
-  NAMES gperftools/heap-profiler.h
-  HINTS ${Gperftools_ROOT_DIR}/include)
-
-set(GPERFTOOLS_LIBRARIES ${GPERFTOOLS_TCMALLOC_AND_PROFILER})
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(
-  Gperftools
-  DEFAULT_MSG
-  GPERFTOOLS_LIBRARIES
-  GPERFTOOLS_INCLUDE_DIR)
-
-mark_as_advanced(
-  Gperftools_ROOT_DIR
-  GPERFTOOLS_TCMALLOC
-  GPERFTOOLS_PROFILER
-  GPERFTOOLS_TCMALLOC_AND_PROFILER
-  GPERFTOOLS_LIBRARIES
-  GPERFTOOLS_INCLUDE_DIR)
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 4520d1cb176..eca6338104f 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -123,11 +123,6 @@ if (USE_INTERNAL_TIFLASH_PROXY)
     add_subdirectory (tiflash-proxy-cmake)
 endif ()
 
-# TODO: remove tcmalloc
-if (ENABLE_TCMALLOC AND USE_INTERNAL_GPERFTOOLS_LIBRARY)
-    add_subdirectory (libtcmalloc)
-endif ()
-
 if (ENABLE_JEMALLOC AND USE_INTERNAL_JEMALLOC_LIBRARY)
     add_subdirectory (jemalloc-cmake)
 endif ()
@@ -164,7 +159,9 @@ SET (BENCHMARK_ENABLE_GTEST_TESTS OFF CACHE BOOL "Disable google-benchmark testi
 add_subdirectory(benchmark)
 
 set (BUILD_TESTING OFF CACHE BOOL "Disable cpu-features testing" FORCE)
-add_subdirectory(cpu_features)
+if ((NOT APPLE) AND (NOT ARCH_AARCH64))
+    add_subdirectory(cpu_features)
+endif()
 
 if (ARCH_AARCH64 AND ARCH_LINUX)
     add_subdirectory(arm-optimized-routines-cmake)
diff --git a/contrib/libtcmalloc/AUTHORS b/contrib/libtcmalloc/AUTHORS
deleted file mode 100644
index 3995ed4cf57..00000000000
--- a/contrib/libtcmalloc/AUTHORS
+++ /dev/null
@@ -1,2 +0,0 @@
-google-perftools@googlegroups.com
-
diff --git a/contrib/libtcmalloc/CMakeLists.txt b/contrib/libtcmalloc/CMakeLists.txt
deleted file mode 100644
index 5bf61f98bea..00000000000
--- a/contrib/libtcmalloc/CMakeLists.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-
-message (STATUS "Building: tcmalloc_minimal_internal")
-
-add_library (tcmalloc_minimal_internal
-./src/malloc_hook.cc
-./src/base/spinlock_internal.cc
-./src/base/spinlock.cc
-./src/base/dynamic_annotations.c
-./src/base/linuxthreads.cc
-./src/base/elf_mem_image.cc
-./src/base/vdso_support.cc
-./src/base/sysinfo.cc
-./src/base/low_level_alloc.cc
-./src/base/thread_lister.c
-./src/base/logging.cc
-./src/base/atomicops-internals-x86.cc
-./src/memfs_malloc.cc
-./src/tcmalloc.cc
-./src/malloc_extension.cc
-./src/thread_cache.cc
-./src/symbolize.cc
-./src/page_heap.cc
-./src/maybe_threads.cc
-./src/central_freelist.cc
-./src/static_vars.cc
-./src/sampler.cc
-./src/internal_logging.cc
-./src/system-alloc.cc
-./src/span.cc
-./src/common.cc
-./src/stacktrace.cc
-./src/stack_trace_table.cc
-./src/heap-checker.cc
-./src/heap-checker-bcad.cc
-./src/heap-profile-table.cc
-./src/raw_printer.cc
-./src/memory_region_map.cc
-)
-
-
-target_compile_options (tcmalloc_minimal_internal
-    PRIVATE
-    -DNO_TCMALLOC_SAMPLES
-    -DNDEBUG
-    -DNO_FRAME_POINTER
-    -Wwrite-strings
-    -Wno-sign-compare
-    -Wno-unused-result
-    -Wno-deprecated-declarations
-    -Wno-unused-function
-    -Wno-unused-private-field
-
-    PUBLIC
-    -fno-builtin-malloc
-    -fno-builtin-free
-    -fno-builtin-realloc
-    -fno-builtin-calloc
-    -fno-builtin-cfree
-    -fno-builtin-memalign
-    -fno-builtin-posix_memalign
-    -fno-builtin-valloc
-    -fno-builtin-pvalloc
-)
-
-if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.9.1)
-    target_compile_options(tcmalloc_minimal_internal PUBLIC -Wno-dynamic-exception-spec )
-endif ()
-
-if (CMAKE_SYSTEM MATCHES "FreeBSD" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    target_compile_options(tcmalloc_minimal_internal PUBLIC -Wno-unused-but-set-variable)
-endif ()
-
-if (CMAKE_SYSTEM MATCHES "FreeBSD")
-    target_compile_definitions(tcmalloc_minimal_internal PUBLIC _GNU_SOURCE)
-endif ()
-
-target_include_directories (tcmalloc_minimal_internal PUBLIC include)
-target_include_directories (tcmalloc_minimal_internal PRIVATE src)
-
-find_package (Threads)
-target_link_libraries (tcmalloc_minimal_internal ${CMAKE_THREAD_LIBS_INIT})
diff --git a/contrib/libtcmalloc/COPYING b/contrib/libtcmalloc/COPYING
deleted file mode 100644
index e4956cfd9fd..00000000000
--- a/contrib/libtcmalloc/COPYING
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright (c) 2005, Google Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-    * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/contrib/libtcmalloc/README b/contrib/libtcmalloc/README
deleted file mode 100644
index 8f7377c0b3b..00000000000
--- a/contrib/libtcmalloc/README
+++ /dev/null
@@ -1,8 +0,0 @@
-https://github.com/gperftools/gperftools/commit/dde32f8bbc95312379f9f5a651799815bb6327c5
-
-Several modifications:
-1. Disabled TCMALLOC_AGGRESSIVE_DECOMMIT by default. It is important.
-2. Using only files for tcmalloc_minimal build (./configure --enable-minimal).
-3. Using some compiler flags from project.
-4. Removed warning about unused variable when build with NDEBUG (by default).
-5. Including config.h with relative path.
diff --git a/contrib/libtcmalloc/include/gperftools/heap-checker.h b/contrib/libtcmalloc/include/gperftools/heap-checker.h
deleted file mode 100644
index 5a87d8da7f7..00000000000
--- a/contrib/libtcmalloc/include/gperftools/heap-checker.h
+++ /dev/null
@@ -1,422 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Maxim Lifantsev (with design ideas by Sanjay Ghemawat)
-//
-//
-// Module for detecing heap (memory) leaks.
-//
-// For full(er) information, see doc/heap_checker.html
-//
-// This module can be linked into programs with
-// no slowdown caused by this unless you activate the leak-checker:
-//
-//    1. Set the environment variable HEAPCHEK to _type_ before
-//       running the program.
-//
-// _type_ is usually "normal" but can also be "minimal", "strict", or
-// "draconian".  (See the html file for other options, like 'local'.)
-//
-// After that, just run your binary.  If the heap-checker detects
-// a memory leak at program-exit, it will print instructions on how
-// to track down the leak.
-
-#ifndef BASE_HEAP_CHECKER_H_
-#define BASE_HEAP_CHECKER_H_
-
-#include <sys/types.h>  // for size_t
-// I can't #include config.h in this public API file, but I should
-// really use configure (and make malloc_extension.h a .in file) to
-// figure out if the system has stdint.h or not.  But I'm lazy, so
-// for now I'm assuming it's a problem only with MSVC.
-#ifndef _MSC_VER
-#include <stdint.h>     // for uintptr_t
-#endif
-#include <stdarg.h>     // for va_list
-#include <vector>
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-
-// The class is thread-safe with respect to all the provided static methods,
-// as well as HeapLeakChecker objects: they can be accessed by multiple threads.
-class PERFTOOLS_DLL_DECL HeapLeakChecker {
- public:
-
-  // ----------------------------------------------------------------------- //
-  // Static functions for working with (whole-program) leak checking.
-
-  // If heap leak checking is currently active in some mode
-  // e.g. if leak checking was started (and is still active now)
-  // due to HEAPCHECK=... defined in the environment.
-  // The return value reflects iff HeapLeakChecker objects manually
-  // constructed right now will be doing leak checking or nothing.
-  // Note that we can go from active to inactive state during InitGoogle()
-  // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle().
-  static bool IsActive();
-
-  // Return pointer to the whole-program checker if it has been created
-  // and NULL otherwise.
-  // Once GlobalChecker() returns non-NULL that object will not disappear and
-  // will be returned by all later GlobalChecker calls.
-  // This is mainly to access BytesLeaked() and ObjectsLeaked() (see below)
-  // for the whole-program checker after one calls NoGlobalLeaks()
-  // or similar and gets false.
-  static HeapLeakChecker* GlobalChecker();
-
-  // Do whole-program leak check now (if it was activated for this binary);
-  // return false only if it was activated and has failed.
-  // The mode of the check is controlled by the command-line flags.
-  // This method can be called repeatedly.
-  // Things like GlobalChecker()->SameHeap() can also be called explicitly
-  // to do the desired flavor of the check.
-  static bool NoGlobalLeaks();
-
-  // If whole-program checker if active,
-  // cancel its automatic execution after main() exits.
-  // This requires that some leak check (e.g. NoGlobalLeaks())
-  // has been called at least once on the whole-program checker.
-  static void CancelGlobalCheck();
-
-  // ----------------------------------------------------------------------- //
-  // Non-static functions for starting and doing leak checking.
-
-  // Start checking and name the leak check performed.
-  // The name is used in naming dumped profiles
-  // and needs to be unique only within your binary.
-  // It must also be a string that can be a part of a file name,
-  // in particular not contain path expressions.
-  explicit HeapLeakChecker(const char *name);
-
-  // Destructor (verifies that some *NoLeaks or *SameHeap method
-  // has been called at least once).
-  ~HeapLeakChecker();
-
-  // These used to be different but are all the same now: they return
-  // true iff all memory allocated since this HeapLeakChecker object
-  // was constructor is still reachable from global state.
-  //
-  // Because we fork to convert addresses to symbol-names, and forking
-  // is not thread-safe, and we may be called in a threaded context,
-  // we do not try to symbolize addresses when called manually.
-  bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); }
-
-  // These forms are obsolete; use NoLeaks() instead.
-  // TODO(csilvers): mark as DEPRECATED.
-  bool QuickNoLeaks()  { return NoLeaks(); }
-  bool BriefNoLeaks()  { return NoLeaks(); }
-  bool SameHeap()      { return NoLeaks(); }
-  bool QuickSameHeap() { return NoLeaks(); }
-  bool BriefSameHeap() { return NoLeaks(); }
-
-  // Detailed information about the number of leaked bytes and objects
-  // (both of these can be negative as well).
-  // These are available only after a *SameHeap or *NoLeaks
-  // method has been called.
-  // Note that it's possible for both of these to be zero
-  // while SameHeap() or NoLeaks() returned false in case
-  // of a heap state change that is significant
-  // but preserves the byte and object counts.
-  ssize_t BytesLeaked() const;
-  ssize_t ObjectsLeaked() const;
-
-  // ----------------------------------------------------------------------- //
-  // Static helpers to make us ignore certain leaks.
-
-  // Scoped helper class.  Should be allocated on the stack inside a
-  // block of code.  Any heap allocations done in the code block
-  // covered by the scoped object (including in nested function calls
-  // done by the code block) will not be reported as leaks.  This is
-  // the recommended replacement for the GetDisableChecksStart() and
-  // DisableChecksToHereFrom() routines below.
-  //
-  // Example:
-  //   void Foo() {
-  //     HeapLeakChecker::Disabler disabler;
-  //     ... code that allocates objects whose leaks should be ignored ...
-  //   }
-  //
-  // REQUIRES: Destructor runs in same thread as constructor
-  class Disabler {
-   public:
-    Disabler();
-    ~Disabler();
-   private:
-    Disabler(const Disabler&);        // disallow copy
-    void operator=(const Disabler&);  // and assign
-  };
-
-  // Ignore an object located at 'ptr' (can go at the start or into the object)
-  // as well as all heap objects (transitively) referenced from it for the
-  // purposes of heap leak checking. Returns 'ptr' so that one can write
-  //   static T* obj = IgnoreObject(new T(...));
-  //
-  // If 'ptr' does not point to an active allocated object at the time of this
-  // call, it is ignored; but if it does, the object must not get deleted from
-  // the heap later on.
-  //
-  // See also HiddenPointer, below, if you need to prevent a pointer from
-  // being traversed by the heap checker but do not wish to transitively
-  // whitelist objects referenced through it.
-  template <typename T>
-  static T* IgnoreObject(T* ptr) {
-    DoIgnoreObject(static_cast<const void*>(const_cast<const T*>(ptr)));
-    return ptr;
-  }
-
-  // Undo what an earlier IgnoreObject() call promised and asked to do.
-  // At the time of this call 'ptr' must point at or inside of an active
-  // allocated object which was previously registered with IgnoreObject().
-  static void UnIgnoreObject(const void* ptr);
-
-  // ----------------------------------------------------------------------- //
-  // Internal types defined in .cc
-
-  class Allocator;
-  struct RangeValue;
-
- private:
-
-  // ----------------------------------------------------------------------- //
-  // Various helpers
-
-  // Create the name of the heap profile file.
-  // Should be deleted via Allocator::Free().
-  char* MakeProfileNameLocked();
-
-  // Helper for constructors
-  void Create(const char *name, bool make_start_snapshot);
-
-  enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE };
-
-  // Helper for *NoLeaks and *SameHeap
-  bool DoNoLeaks(ShouldSymbolize should_symbolize);
-
-  // Helper for NoGlobalLeaks, also called by the global destructor.
-  static bool NoGlobalLeaksMaybeSymbolize(ShouldSymbolize should_symbolize);
-
-  // These used to be public, but they are now deprecated.
-  // Will remove entirely when all internal uses are fixed.
-  // In the meantime, use friendship so the unittest can still test them.
-  static void* GetDisableChecksStart();
-  static void DisableChecksToHereFrom(const void* start_address);
-  static void DisableChecksIn(const char* pattern);
-  friend void RangeDisabledLeaks();
-  friend void NamedTwoDisabledLeaks();
-  friend void* RunNamedDisabledLeaks(void*);
-  friend void TestHeapLeakCheckerNamedDisabling();
-
-  // Actually implements IgnoreObject().
-  static void DoIgnoreObject(const void* ptr);
-
-  // Disable checks based on stack trace entry at a depth <=
-  // max_depth.  Used to hide allocations done inside some special
-  // libraries.
-  static void DisableChecksFromToLocked(const void* start_address,
-                                        const void* end_address,
-                                        int max_depth);
-
-  // Helper for DoNoLeaks to ignore all objects reachable from all live data
-  static void IgnoreAllLiveObjectsLocked(const void* self_stack_top);
-
-  // Callback we pass to TCMalloc_ListAllProcessThreads (see thread_lister.h)
-  // that is invoked when all threads of our process are found and stopped.
-  // The call back does the things needed to ignore live data reachable from
-  // thread stacks and registers for all our threads
-  // as well as do other global-live-data ignoring
-  // (via IgnoreNonThreadLiveObjectsLocked)
-  // during the quiet state of all threads being stopped.
-  // For the argument meaning see the comment by TCMalloc_ListAllProcessThreads.
-  // Here we only use num_threads and thread_pids, that TCMalloc_ListAllProcessThreads
-  // fills for us with the number and pids of all the threads of our process
-  // it found and attached to.
-  static int IgnoreLiveThreadsLocked(void* parameter,
-                                     int num_threads,
-                                     pid_t* thread_pids,
-                                     va_list ap);
-
-  // Helper for IgnoreAllLiveObjectsLocked and IgnoreLiveThreadsLocked
-  // that we prefer to execute from IgnoreLiveThreadsLocked
-  // while all threads are stopped.
-  // This helper does live object discovery and ignoring
-  // for all objects that are reachable from everything
-  // not related to thread stacks and registers.
-  static void IgnoreNonThreadLiveObjectsLocked();
-
-  // Helper for IgnoreNonThreadLiveObjectsLocked and IgnoreLiveThreadsLocked
-  // to discover and ignore all heap objects
-  // reachable from currently considered live objects
-  // (live_objects static global variable in out .cc file).
-  // "name", "name2" are two strings that we print one after another
-  // in a debug message to describe what kind of live object sources
-  // are being used.
-  static void IgnoreLiveObjectsLocked(const char* name, const char* name2);
-
-  // Do the overall whole-program heap leak check if needed;
-  // returns true when did the leak check.
-  static bool DoMainHeapCheck();
-
-  // Type of task for UseProcMapsLocked
-  enum ProcMapsTask {
-    RECORD_GLOBAL_DATA,
-    DISABLE_LIBRARY_ALLOCS
-  };
-
-  // Success/Error Return codes for UseProcMapsLocked.
-  enum ProcMapsResult {
-    PROC_MAPS_USED,
-    CANT_OPEN_PROC_MAPS,
-    NO_SHARED_LIBS_IN_PROC_MAPS
-  };
-
-  // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line.
-  static ProcMapsResult UseProcMapsLocked(ProcMapsTask proc_maps_task);
-
-  // A ProcMapsTask to disable allocations from 'library'
-  // that is mapped to [start_address..end_address)
-  // (only if library is a certain system library).
-  static void DisableLibraryAllocsLocked(const char* library,
-                                         uintptr_t start_address,
-                                         uintptr_t end_address);
-
-  // Return true iff "*ptr" points to a heap object
-  // ("*ptr" can point at the start or inside of a heap object
-  //  so that this works e.g. for pointers to C++ arrays, C++ strings,
-  //  multiple-inherited objects, or pointers to members).
-  // We also fill *object_size for this object then
-  // and we move "*ptr" to point to the very start of the heap object.
-  static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size);
-
-  // Helper to shutdown heap leak checker when it's not needed
-  // or can't function properly.
-  static void TurnItselfOffLocked();
-
-  // Internally-used c-tor to start whole-executable checking.
-  HeapLeakChecker();
-
-  // ----------------------------------------------------------------------- //
-  // Friends and externally accessed helpers.
-
-  // Helper for VerifyHeapProfileTableStackGet in the unittest
-  // to get the recorded allocation caller for ptr,
-  // which must be a heap object.
-  static const void* GetAllocCaller(void* ptr);
-  friend void VerifyHeapProfileTableStackGet();
-
-  // This gets to execute before constructors for all global objects
-  static void BeforeConstructorsLocked();
-  friend void HeapLeakChecker_BeforeConstructors();
-
-  // This gets to execute after destructors for all global objects
-  friend void HeapLeakChecker_AfterDestructors();
-
-  // Full starting of recommended whole-program checking.
-  friend void HeapLeakChecker_InternalInitStart();
-
-  // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially
-  // calls DoMainHeapCheck
-  friend void HeapLeakChecker_RunHeapCleanups();
-
-  // ----------------------------------------------------------------------- //
-  // Member data.
-
-  class SpinLock* lock_;  // to make HeapLeakChecker objects thread-safe
-  const char* name_;  // our remembered name (we own it)
-                      // NULL means this leak checker is a noop
-
-  // Snapshot taken when the checker was created.  May be NULL
-  // for the global heap checker object.  We use void* instead of
-  // HeapProfileTable::Snapshot* to avoid including heap-profile-table.h.
-  void* start_snapshot_;
-
-  bool has_checked_;  // if we have done the leak check, so these are ready:
-  ssize_t inuse_bytes_increase_;  // bytes-in-use increase for this checker
-  ssize_t inuse_allocs_increase_;  // allocations-in-use increase
-                                   // for this checker
-  bool keep_profiles_;  // iff we should keep the heap profiles we've made
-
-  // ----------------------------------------------------------------------- //
-
-  // Disallow "evil" constructors.
-  HeapLeakChecker(const HeapLeakChecker&);
-  void operator=(const HeapLeakChecker&);
-};
-
-
-// Holds a pointer that will not be traversed by the heap checker.
-// Contrast with HeapLeakChecker::IgnoreObject(o), in which o and
-// all objects reachable from o are ignored by the heap checker.
-template <class T>
-class HiddenPointer {
- public:
-  explicit HiddenPointer(T* t)
-      : masked_t_(reinterpret_cast<uintptr_t>(t) ^ kHideMask) {
-  }
-  // Returns unhidden pointer.  Be careful where you save the result.
-  T* get() const { return reinterpret_cast<T*>(masked_t_ ^ kHideMask); }
-
- private:
-  // Arbitrary value, but not such that xor'ing with it is likely
-  // to map one valid pointer to another valid pointer:
-  static const uintptr_t kHideMask =
-      static_cast<uintptr_t>(0xF03A5F7BF03A5F7Bll);
-  uintptr_t masked_t_;
-};
-
-// A class that exists solely to run its destructor.  This class should not be
-// used directly, but instead by the REGISTER_HEAPCHECK_CLEANUP macro below.
-class PERFTOOLS_DLL_DECL HeapCleaner {
- public:
-  typedef void (*void_function)(void);
-  HeapCleaner(void_function f);
-  static void RunHeapCleanups();
- private:
-  static std::vector<void_function>* heap_cleanups_;
-};
-
-// A macro to declare module heap check cleanup tasks
-// (they run only if we are doing heap leak checking.)
-// 'body' should be the cleanup code to run.  'name' doesn't matter,
-// but must be unique amongst all REGISTER_HEAPCHECK_CLEANUP calls.
-#define REGISTER_HEAPCHECK_CLEANUP(name, body)  \
-  namespace { \
-  void heapcheck_cleanup_##name() { body; } \
-  static HeapCleaner heapcheck_cleaner_##name(&heapcheck_cleanup_##name); \
-  }
-
-#endif  // BASE_HEAP_CHECKER_H_
diff --git a/contrib/libtcmalloc/include/gperftools/heap-profiler.h b/contrib/libtcmalloc/include/gperftools/heap-profiler.h
deleted file mode 100644
index 9b673645747..00000000000
--- a/contrib/libtcmalloc/include/gperftools/heap-profiler.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- *
- * Module for heap-profiling.
- *
- * For full(er) information, see doc/heapprofile.html
- *
- * This module can be linked into your program with
- * no slowdown caused by this unless you activate the profiler
- * using one of the following methods:
- *
- *    1. Before starting the program, set the environment variable
- *       "HEAPPROFILE" to be the name of the file to which the profile
- *       data should be written.
- *
- *    2. Programmatically, start and stop the profiler using the
- *       routines "HeapProfilerStart(filename)" and "HeapProfilerStop()".
- *
- */
-
-#ifndef BASE_HEAP_PROFILER_H_
-#define BASE_HEAP_PROFILER_H_
-
-#include <stddef.h>
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-/* All this code should be usable from within C apps. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Start profiling and arrange to write profile data to file names
- * of the form: "prefix.0000", "prefix.0001", ...
- */
-PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix);
-
-/* Returns non-zero if we are currently profiling the heap.  (Returns
- * an int rather than a bool so it's usable from C.)  This is true
- * between calls to HeapProfilerStart() and HeapProfilerStop(), and
- * also if the program has been run with HEAPPROFILER, or some other
- * way to turn on whole-program profiling.
- */
-int IsHeapProfilerRunning();
-
-/* Stop heap profiling.  Can be restarted again with HeapProfilerStart(),
- * but the currently accumulated profiling information will be cleared.
- */
-PERFTOOLS_DLL_DECL void HeapProfilerStop();
-
-/* Dump a profile now - can be used for dumping at a hopefully
- * quiescent state in your program, in order to more easily track down
- * memory leaks. Will include the reason in the logged message
- */
-PERFTOOLS_DLL_DECL void HeapProfilerDump(const char *reason);
-
-/* Generate current heap profiling information.
- * Returns an empty string when heap profiling is not active.
- * The returned pointer is a '\0'-terminated string allocated using malloc()
- * and should be free()-ed as soon as the caller does not need it anymore.
- */
-PERFTOOLS_DLL_DECL char* GetHeapProfile();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  /* BASE_HEAP_PROFILER_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/malloc_extension.h b/contrib/libtcmalloc/include/gperftools/malloc_extension.h
deleted file mode 100644
index 689b5f17cef..00000000000
--- a/contrib/libtcmalloc/include/gperftools/malloc_extension.h
+++ /dev/null
@@ -1,434 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Extra extensions exported by some malloc implementations.  These
-// extensions are accessed through a virtual base class so an
-// application can link against a malloc that does not implement these
-// extensions, and it will get default versions that do nothing.
-//
-// NOTE FOR C USERS: If you wish to use this functionality from within
-// a C program, see malloc_extension_c.h.
-
-#ifndef BASE_MALLOC_EXTENSION_H_
-#define BASE_MALLOC_EXTENSION_H_
-
-#include <stddef.h>
-// I can't #include config.h in this public API file, but I should
-// really use configure (and make malloc_extension.h a .in file) to
-// figure out if the system has stdint.h or not.  But I'm lazy, so
-// for now I'm assuming it's a problem only with MSVC.
-#ifndef _MSC_VER
-#include <stdint.h>
-#endif
-#include <string>
-#include <vector>
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-static const int kMallocHistogramSize = 64;
-
-// One day, we could support other types of writers (perhaps for C?)
-typedef std::string MallocExtensionWriter;
-
-namespace base {
-struct MallocRange;
-}
-
-// Interface to a pluggable system allocator.
-class PERFTOOLS_DLL_DECL SysAllocator {
- public:
-  SysAllocator() {
-  }
-  virtual ~SysAllocator();
-
-  // Allocates "size"-byte of memory from system aligned with "alignment".
-  // Returns NULL if failed. Otherwise, the returned pointer p up to and
-  // including (p + actual_size -1) have been allocated.
-  virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0;
-};
-
-// The default implementations of the following routines do nothing.
-// All implementations should be thread-safe; the current one
-// (TCMallocImplementation) is.
-class PERFTOOLS_DLL_DECL MallocExtension {
- public:
-  virtual ~MallocExtension();
-
-  // Call this very early in the program execution -- say, in a global
-  // constructor -- to set up parameters and state needed by all
-  // instrumented malloc implemenatations.  One example: this routine
-  // sets environemnt variables to tell STL to use libc's malloc()
-  // instead of doing its own memory management.  This is safe to call
-  // multiple times, as long as each time is before threads start up.
-  static void Initialize();
-
-  // See "verify_memory.h" to see what these routines do
-  virtual bool VerifyAllMemory();
-  virtual bool VerifyNewMemory(const void* p);
-  virtual bool VerifyArrayNewMemory(const void* p);
-  virtual bool VerifyMallocMemory(const void* p);
-  virtual bool MallocMemoryStats(int* blocks, size_t* total,
-                                 int histogram[kMallocHistogramSize]);
-
-  // Get a human readable description of the following malloc data structures.
-  // - Total inuse memory by application.
-  // - Free memory(thread, central and page heap),
-  // - Freelist of central cache, each class.
-  // - Page heap freelist.
-  // The state is stored as a null-terminated string
-  // in a prefix of "buffer[0,buffer_length-1]".
-  // REQUIRES: buffer_length > 0.
-  virtual void GetStats(char* buffer, int buffer_length);
-
-  // Outputs to "writer" a sample of live objects and the stack traces
-  // that allocated these objects.  The format of the returned output
-  // is equivalent to the output of the heap profiler and can
-  // therefore be passed to "pprof". This function is equivalent to
-  // ReadStackTraces. The main difference is that this function returns
-  // serialized data appropriately formatted for use by the pprof tool.
-  // NOTE: by default, tcmalloc does not do any heap sampling, and this
-  //       function will always return an empty sample.  To get useful
-  //       data from GetHeapSample, you must also set the environment
-  //       variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288.
-  virtual void GetHeapSample(MallocExtensionWriter* writer);
-
-  // Outputs to "writer" the stack traces that caused growth in the
-  // address space size.  The format of the returned output is
-  // equivalent to the output of the heap profiler and can therefore
-  // be passed to "pprof". This function is equivalent to
-  // ReadHeapGrowthStackTraces. The main difference is that this function
-  // returns serialized data appropriately formatted for use by the
-  // pprof tool.  (This does not depend on, or require,
-  // TCMALLOC_SAMPLE_PARAMETER.)
-  virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer);
-
-  // Invokes func(arg, range) for every controlled memory
-  // range.  *range is filled in with information about the range.
-  //
-  // This is a best-effort interface useful only for performance
-  // analysis.  The implementation may not call func at all.
-  typedef void (RangeFunction)(void*, const base::MallocRange*);
-  virtual void Ranges(void* arg, RangeFunction func);
-
-  // -------------------------------------------------------------------
-  // Control operations for getting and setting malloc implementation
-  // specific parameters.  Some currently useful properties:
-  //
-  // generic
-  // -------
-  // "generic.current_allocated_bytes"
-  //      Number of bytes currently allocated by application
-  //      This property is not writable.
-  //
-  // "generic.heap_size"
-  //      Number of bytes in the heap ==
-  //            current_allocated_bytes +
-  //            fragmentation +
-  //            freed memory regions
-  //      This property is not writable.
-  //
-  // tcmalloc
-  // --------
-  // "tcmalloc.max_total_thread_cache_bytes"
-  //      Upper limit on total number of bytes stored across all
-  //      per-thread caches.  Default: 16MB.
-  //
-  // "tcmalloc.current_total_thread_cache_bytes"
-  //      Number of bytes used across all thread caches.
-  //      This property is not writable.
-  //
-  // "tcmalloc.central_cache_free_bytes"
-  //      Number of free bytes in the central cache that have been
-  //      assigned to size classes. They always count towards virtual
-  //      memory usage, and unless the underlying memory is swapped out
-  //      by the OS, they also count towards physical memory usage.
-  //      This property is not writable.
-  //
-  // "tcmalloc.transfer_cache_free_bytes"
-  //      Number of free bytes that are waiting to be transfered between
-  //      the central cache and a thread cache. They always count
-  //      towards virtual memory usage, and unless the underlying memory
-  //      is swapped out by the OS, they also count towards physical
-  //      memory usage. This property is not writable.
-  //
-  // "tcmalloc.thread_cache_free_bytes"
-  //      Number of free bytes in thread caches. They always count
-  //      towards virtual memory usage, and unless the underlying memory
-  //      is swapped out by the OS, they also count towards physical
-  //      memory usage. This property is not writable.
-  //
-  // "tcmalloc.pageheap_free_bytes"
-  //      Number of bytes in free, mapped pages in page heap.  These
-  //      bytes can be used to fulfill allocation requests.  They
-  //      always count towards virtual memory usage, and unless the
-  //      underlying memory is swapped out by the OS, they also count
-  //      towards physical memory usage.  This property is not writable.
-  //
-  // "tcmalloc.pageheap_unmapped_bytes"
-  //        Number of bytes in free, unmapped pages in page heap.
-  //        These are bytes that have been released back to the OS,
-  //        possibly by one of the MallocExtension "Release" calls.
-  //        They can be used to fulfill allocation requests, but
-  //        typically incur a page fault.  They always count towards
-  //        virtual memory usage, and depending on the OS, typically
-  //        do not count towards physical memory usage.  This property
-  //        is not writable.
-  // -------------------------------------------------------------------
-
-  // Get the named "property"'s value.  Returns true if the property
-  // is known.  Returns false if the property is not a valid property
-  // name for the current malloc implementation.
-  // REQUIRES: property != NULL; value != NULL
-  virtual bool GetNumericProperty(const char* property, size_t* value);
-
-  // Set the named "property"'s value.  Returns true if the property
-  // is known and writable.  Returns false if the property is not a
-  // valid property name for the current malloc implementation, or
-  // is not writable.
-  // REQUIRES: property != NULL
-  virtual bool SetNumericProperty(const char* property, size_t value);
-
-  // Mark the current thread as "idle".  This routine may optionally
-  // be called by threads as a hint to the malloc implementation that
-  // any thread-specific resources should be released.  Note: this may
-  // be an expensive routine, so it should not be called too often.
-  //
-  // Also, if the code that calls this routine will go to sleep for
-  // a while, it should take care to not allocate anything between
-  // the call to this routine and the beginning of the sleep.
-  //
-  // Most malloc implementations ignore this routine.
-  virtual void MarkThreadIdle();
-
-  // Mark the current thread as "busy".  This routine should be
-  // called after MarkThreadIdle() if the thread will now do more
-  // work.  If this method is not called, performance may suffer.
-  //
-  // Most malloc implementations ignore this routine.
-  virtual void MarkThreadBusy();
-
-  // Gets the system allocator used by the malloc extension instance. Returns
-  // NULL for malloc implementations that do not support pluggable system
-  // allocators.
-  virtual SysAllocator* GetSystemAllocator();
-
-  // Sets the system allocator to the specified.
-  //
-  // Users could register their own system allocators for malloc implementation
-  // that supports pluggable system allocators, such as TCMalloc, by doing:
-  //   alloc = new MyOwnSysAllocator();
-  //   MallocExtension::instance()->SetSystemAllocator(alloc);
-  // It's up to users whether to fall back (recommended) to the default
-  // system allocator (use GetSystemAllocator() above) or not. The caller is
-  // responsible to any necessary locking.
-  // See tcmalloc/system-alloc.h for the interface and
-  //     tcmalloc/memfs_malloc.cc for the examples.
-  //
-  // It's a no-op for malloc implementations that do not support pluggable
-  // system allocators.
-  virtual void SetSystemAllocator(SysAllocator *a);
-
-  // Try to release num_bytes of free memory back to the operating
-  // system for reuse.  Use this extension with caution -- to get this
-  // memory back may require faulting pages back in by the OS, and
-  // that may be slow.  (Currently only implemented in tcmalloc.)
-  virtual void ReleaseToSystem(size_t num_bytes);
-
-  // Same as ReleaseToSystem() but release as much memory as possible.
-  virtual void ReleaseFreeMemory();
-
-  // Sets the rate at which we release unused memory to the system.
-  // Zero means we never release memory back to the system.  Increase
-  // this flag to return memory faster; decrease it to return memory
-  // slower.  Reasonable rates are in the range [0,10].  (Currently
-  // only implemented in tcmalloc).
-  virtual void SetMemoryReleaseRate(double rate);
-
-  // Gets the release rate.  Returns a value < 0 if unknown.
-  virtual double GetMemoryReleaseRate();
-
-  // Returns the estimated number of bytes that will be allocated for
-  // a request of "size" bytes.  This is an estimate: an allocation of
-  // SIZE bytes may reserve more bytes, but will never reserve less.
-  // (Currently only implemented in tcmalloc, other implementations
-  // always return SIZE.)
-  // This is equivalent to malloc_good_size() in OS X.
-  virtual size_t GetEstimatedAllocatedSize(size_t size);
-
-  // Returns the actual number N of bytes reserved by tcmalloc for the
-  // pointer p.  The client is allowed to use the range of bytes
-  // [p, p+N) in any way it wishes (i.e. N is the "usable size" of this
-  // allocation).  This number may be equal to or greater than the number
-  // of bytes requested when p was allocated.
-  // p must have been allocated by this malloc implementation,
-  // must not be an interior pointer -- that is, must be exactly
-  // the pointer returned to by malloc() et al., not some offset
-  // from that -- and should not have been freed yet.  p may be NULL.
-  // (Currently only implemented in tcmalloc; other implementations
-  // will return 0.)
-  // This is equivalent to malloc_size() in OS X, malloc_usable_size()
-  // in glibc, and _msize() for windows.
-  virtual size_t GetAllocatedSize(const void* p);
-
-  // Returns kOwned if this malloc implementation allocated the memory
-  // pointed to by p, or kNotOwned if some other malloc implementation
-  // allocated it or p is NULL.  May also return kUnknownOwnership if
-  // the malloc implementation does not keep track of ownership.
-  // REQUIRES: p must be a value returned from a previous call to
-  // malloc(), calloc(), realloc(), memalign(), posix_memalign(),
-  // valloc(), pvalloc(), new, or new[], and must refer to memory that
-  // is currently allocated (so, for instance, you should not pass in
-  // a pointer after having called free() on it).
-  enum Ownership {
-    // NOTE: Enum values MUST be kept in sync with the version in
-    // malloc_extension_c.h
-    kUnknownOwnership = 0,
-    kOwned,
-    kNotOwned
-  };
-  virtual Ownership GetOwnership(const void* p);
-
-  // The current malloc implementation.  Always non-NULL.
-  static MallocExtension* instance();
-
-  // Change the malloc implementation.  Typically called by the
-  // malloc implementation during initialization.
-  static void Register(MallocExtension* implementation);
-
-  // Returns detailed information about malloc's freelists. For each list,
-  // return a FreeListInfo:
-  struct FreeListInfo {
-    size_t min_object_size;
-    size_t max_object_size;
-    size_t total_bytes_free;
-    const char* type;
-  };
-  // Each item in the vector refers to a different freelist. The lists
-  // are identified by the range of allocations that objects in the
-  // list can satisfy ([min_object_size, max_object_size]) and the
-  // type of freelist (see below). The current size of the list is
-  // returned in total_bytes_free (which count against a processes
-  // resident and virtual size).
-  //
-  // Currently supported types are:
-  //
-  // "tcmalloc.page{_unmapped}" - tcmalloc's page heap. An entry for each size
-  //          class in the page heap is returned. Bytes in "page_unmapped"
-  //          are no longer backed by physical memory and do not count against
-  //          the resident size of a process.
-  //
-  // "tcmalloc.large{_unmapped}" - tcmalloc's list of objects larger
-  //          than the largest page heap size class. Only one "large"
-  //          entry is returned. There is no upper-bound on the size
-  //          of objects in the large free list; this call returns
-  //          kint64max for max_object_size.  Bytes in
-  //          "large_unmapped" are no longer backed by physical memory
-  //          and do not count against the resident size of a process.
-  //
-  // "tcmalloc.central" - tcmalloc's central free-list. One entry per
-  //          size-class is returned. Never unmapped.
-  //
-  // "debug.free_queue" - free objects queued by the debug allocator
-  //                      and not returned to tcmalloc.
-  //
-  // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped.
-  virtual void GetFreeListSizes(std::vector<FreeListInfo>* v);
-
-  // Get a list of stack traces of sampled allocation points.  Returns
-  // a pointer to a "new[]-ed" result array, and stores the sample
-  // period in "sample_period".
-  //
-  // The state is stored as a sequence of adjacent entries
-  // in the returned array.  Each entry has the following form:
-  //    uintptr_t count;        // Number of objects with following trace
-  //    uintptr_t size;         // Total size of objects with following trace
-  //    uintptr_t depth;        // Number of PC values in stack trace
-  //    void*     stack[depth]; // PC values that form the stack trace
-  //
-  // The list of entries is terminated by a "count" of 0.
-  //
-  // It is the responsibility of the caller to "delete[]" the returned array.
-  //
-  // May return NULL to indicate no results.
-  //
-  // This is an internal extension.  Callers should use the more
-  // convenient "GetHeapSample(string*)" method defined above.
-  virtual void** ReadStackTraces(int* sample_period);
-
-  // Like ReadStackTraces(), but returns stack traces that caused growth
-  // in the address space size.
-  virtual void** ReadHeapGrowthStackTraces();
-
-  // Returns the size in bytes of the calling threads cache.
-  virtual size_t GetThreadCacheSize();
-
-  // Like MarkThreadIdle, but does not destroy the internal data
-  // structures of the thread cache. When the thread resumes, it wil
-  // have an empty cache but will not need to pay to reconstruct the
-  // cache data structures.
-  virtual void MarkThreadTemporarilyIdle();
-};
-
-namespace base {
-
-// Information passed per range.  More fields may be added later.
-struct MallocRange {
-  enum Type {
-    INUSE,                // Application is using this range
-    FREE,                 // Range is currently free
-    UNMAPPED,             // Backing physical memory has been returned to the OS
-    UNKNOWN
-    // More enum values may be added in the future
-  };
-
-  uintptr_t address;    // Address of range
-  size_t length;        // Byte length of range
-  Type type;            // Type of this range
-  double fraction;      // Fraction of range that is being used (0 if !INUSE)
-
-  // Perhaps add the following:
-  // - stack trace if this range was sampled
-  // - heap growth stack trace if applicable to this range
-  // - age when allocated (for inuse) or freed (if not in use)
-};
-
-} // namespace base
-
-#endif  // BASE_MALLOC_EXTENSION_H_
diff --git a/contrib/libtcmalloc/include/gperftools/malloc_extension_c.h b/contrib/libtcmalloc/include/gperftools/malloc_extension_c.h
deleted file mode 100644
index 70ff6868ecf..00000000000
--- a/contrib/libtcmalloc/include/gperftools/malloc_extension_c.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * --
- * Author: Craig Silverstein
- *
- * C shims for the C++ malloc_extension.h.  See malloc_extension.h for
- * details.  Note these C shims always work on
- * MallocExtension::instance(); it is not possible to have more than
- * one MallocExtension object in C applications.
- */
-
-#ifndef _MALLOC_EXTENSION_C_H_
-#define _MALLOC_EXTENSION_C_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-
-/* Annoying stuff for windows -- makes sure clients can import these fns */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define kMallocExtensionHistogramSize 64
-
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(const void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(const void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(const void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total,
-                                      int histogram[kMallocExtensionHistogramSize]);
-PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length);
-
-/* TODO(csilvers): write a C version of these routines, that perhaps
- * takes a function ptr and a void *.
- */
-/* void MallocExtension_GetHeapSample(string* result); */
-/* void MallocExtension_GetHeapGrowthStacks(string* result); */
-
-PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, size_t* value);
-PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value);
-PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void);
-PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void);
-PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes);
-PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(const void* p);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetThreadCacheSize(void);
-PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadTemporarilyIdle(void);
-
-/*
- * NOTE: These enum values MUST be kept in sync with the version in
- *       malloc_extension.h
- */
-typedef enum {
-  MallocExtension_kUnknownOwnership = 0,
-  MallocExtension_kOwned,
-  MallocExtension_kNotOwned
-} MallocExtension_Ownership;
-
-PERFTOOLS_DLL_DECL MallocExtension_Ownership MallocExtension_GetOwnership(const void* p);
-
-#ifdef __cplusplus
-}   /* extern "C" */
-#endif
-
-#endif /* _MALLOC_EXTENSION_C_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/malloc_hook.h b/contrib/libtcmalloc/include/gperftools/malloc_hook.h
deleted file mode 100644
index b76411fb590..00000000000
--- a/contrib/libtcmalloc/include/gperftools/malloc_hook.h
+++ /dev/null
@@ -1,359 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Some of our malloc implementations can invoke the following hooks whenever
-// memory is allocated or deallocated.  MallocHook is thread-safe, and things
-// you do before calling AddFooHook(MyHook) are visible to any resulting calls
-// to MyHook.  Hooks must be thread-safe.  If you write:
-//
-//   CHECK(MallocHook::AddNewHook(&MyNewHook));
-//
-// MyNewHook will be invoked in subsequent calls in the current thread, but
-// there are no guarantees on when it might be invoked in other threads.
-//
-// There are a limited number of slots available for each hook type.  Add*Hook
-// will return false if there are no slots available.  Remove*Hook will return
-// false if the given hook was not already installed.
-//
-// The order in which individual hooks are called in Invoke*Hook is undefined.
-//
-// It is safe for a hook to remove itself within Invoke*Hook and add other
-// hooks.  Any hooks added inside a hook invocation (for the same hook type)
-// will not be invoked for the current invocation.
-//
-// One important user of these hooks is the heap profiler.
-//
-// CAVEAT: If you add new MallocHook::Invoke* calls then those calls must be
-// directly in the code of the (de)allocation function that is provided to the
-// user and that function must have an ATTRIBUTE_SECTION(malloc_hook) attribute.
-//
-// Note: the Invoke*Hook() functions are defined in malloc_hook-inl.h.  If you
-// need to invoke a hook (which you shouldn't unless you're part of tcmalloc),
-// be sure to #include malloc_hook-inl.h in addition to malloc_hook.h.
-//
-// NOTE FOR C USERS: If you want to use malloc_hook functionality from
-// a C program, #include malloc_hook_c.h instead of this file.
-
-#ifndef _MALLOC_HOOK_H_
-#define _MALLOC_HOOK_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-extern "C" {
-#include "malloc_hook_c.h"  // a C version of the malloc_hook interface
-}
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-// The C++ methods below call the C version (MallocHook_*), and thus
-// convert between an int and a bool.  Windows complains about this
-// (a "performance warning") which we don't care about, so we suppress.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable:4800)
-#endif
-
-// Note: malloc_hook_c.h defines MallocHook_*Hook and
-// MallocHook_{Add,Remove}*Hook.  The version of these inside the MallocHook
-// class are defined in terms of the malloc_hook_c version.  See malloc_hook_c.h
-// for details of these types/functions.
-
-class PERFTOOLS_DLL_DECL MallocHook {
- public:
-  // The NewHook is invoked whenever an object is allocated.
-  // It may be passed NULL if the allocator returned NULL.
-  typedef MallocHook_NewHook NewHook;
-  inline static bool AddNewHook(NewHook hook) {
-    return MallocHook_AddNewHook(hook);
-  }
-  inline static bool RemoveNewHook(NewHook hook) {
-    return MallocHook_RemoveNewHook(hook);
-  }
-  inline static void InvokeNewHook(const void* p, size_t s);
-
-  // The DeleteHook is invoked whenever an object is deallocated.
-  // It may be passed NULL if the caller is trying to delete NULL.
-  typedef MallocHook_DeleteHook DeleteHook;
-  inline static bool AddDeleteHook(DeleteHook hook) {
-    return MallocHook_AddDeleteHook(hook);
-  }
-  inline static bool RemoveDeleteHook(DeleteHook hook) {
-    return MallocHook_RemoveDeleteHook(hook);
-  }
-  inline static void InvokeDeleteHook(const void* p);
-
-  // The PreMmapHook is invoked with mmap or mmap64 arguments just
-  // before the call is actually made.  Such a hook may be useful
-  // in memory limited contexts, to catch allocations that will exceed
-  // a memory limit, and take outside actions to increase that limit.
-  typedef MallocHook_PreMmapHook PreMmapHook;
-  inline static bool AddPreMmapHook(PreMmapHook hook) {
-    return MallocHook_AddPreMmapHook(hook);
-  }
-  inline static bool RemovePreMmapHook(PreMmapHook hook) {
-    return MallocHook_RemovePreMmapHook(hook);
-  }
-  inline static void InvokePreMmapHook(const void* start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset);
-
-  // The MmapReplacement is invoked after the PreMmapHook but before
-  // the call is actually made. The MmapReplacement should return true
-  // if it handled the call, or false if it is still necessary to
-  // call mmap/mmap64.
-  // This should be used only by experts, and users must be be
-  // extremely careful to avoid recursive calls to mmap. The replacement
-  // should be async signal safe.
-  // Only one MmapReplacement is supported. After setting an MmapReplacement
-  // you must call RemoveMmapReplacement before calling SetMmapReplacement
-  // again.
-  typedef MallocHook_MmapReplacement MmapReplacement;
-  inline static bool SetMmapReplacement(MmapReplacement hook) {
-    return MallocHook_SetMmapReplacement(hook);
-  }
-  inline static bool RemoveMmapReplacement(MmapReplacement hook) {
-    return MallocHook_RemoveMmapReplacement(hook);
-  }
-  inline static bool InvokeMmapReplacement(const void* start,
-                                           size_t size,
-                                           int protection,
-                                           int flags,
-                                           int fd,
-                                           off_t offset,
-                                           void** result);
-
-
-  // The MmapHook is invoked whenever a region of memory is mapped.
-  // It may be passed MAP_FAILED if the mmap failed.
-  typedef MallocHook_MmapHook MmapHook;
-  inline static bool AddMmapHook(MmapHook hook) {
-    return MallocHook_AddMmapHook(hook);
-  }
-  inline static bool RemoveMmapHook(MmapHook hook) {
-    return MallocHook_RemoveMmapHook(hook);
-  }
-  inline static void InvokeMmapHook(const void* result,
-                                    const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-
-  // The MunmapReplacement is invoked with munmap arguments just before
-  // the call is actually made. The MunmapReplacement should return true
-  // if it handled the call, or false if it is still necessary to
-  // call munmap.
-  // This should be used only by experts. The replacement should be
-  // async signal safe.
-  // Only one MunmapReplacement is supported. After setting an
-  // MunmapReplacement you must call RemoveMunmapReplacement before
-  // calling SetMunmapReplacement again.
-  typedef MallocHook_MunmapReplacement MunmapReplacement;
-  inline static bool SetMunmapReplacement(MunmapReplacement hook) {
-    return MallocHook_SetMunmapReplacement(hook);
-  }
-  inline static bool RemoveMunmapReplacement(MunmapReplacement hook) {
-    return MallocHook_RemoveMunmapReplacement(hook);
-  }
-  inline static bool InvokeMunmapReplacement(const void* p,
-                                             size_t size,
-                                             int* result);
-
-  // The MunmapHook is invoked whenever a region of memory is unmapped.
-  typedef MallocHook_MunmapHook MunmapHook;
-  inline static bool AddMunmapHook(MunmapHook hook) {
-    return MallocHook_AddMunmapHook(hook);
-  }
-  inline static bool RemoveMunmapHook(MunmapHook hook) {
-    return MallocHook_RemoveMunmapHook(hook);
-  }
-  inline static void InvokeMunmapHook(const void* p, size_t size);
-
-  // The MremapHook is invoked whenever a region of memory is remapped.
-  typedef MallocHook_MremapHook MremapHook;
-  inline static bool AddMremapHook(MremapHook hook) {
-    return MallocHook_AddMremapHook(hook);
-  }
-  inline static bool RemoveMremapHook(MremapHook hook) {
-    return MallocHook_RemoveMremapHook(hook);
-  }
-  inline static void InvokeMremapHook(const void* result,
-                                      const void* old_addr,
-                                      size_t old_size,
-                                      size_t new_size,
-                                      int flags,
-                                      const void* new_addr);
-
-  // The PreSbrkHook is invoked just before sbrk is called -- except when
-  // the increment is 0.  This is because sbrk(0) is often called
-  // to get the top of the memory stack, and is not actually a
-  // memory-allocation call.  It may be useful in memory-limited contexts,
-  // to catch allocations that will exceed the limit and take outside
-  // actions to increase such a limit.
-  typedef MallocHook_PreSbrkHook PreSbrkHook;
-  inline static bool AddPreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_AddPreSbrkHook(hook);
-  }
-  inline static bool RemovePreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_RemovePreSbrkHook(hook);
-  }
-  inline static void InvokePreSbrkHook(ptrdiff_t increment);
-
-  // The SbrkHook is invoked whenever sbrk is called -- except when
-  // the increment is 0.  This is because sbrk(0) is often called
-  // to get the top of the memory stack, and is not actually a
-  // memory-allocation call.
-  typedef MallocHook_SbrkHook SbrkHook;
-  inline static bool AddSbrkHook(SbrkHook hook) {
-    return MallocHook_AddSbrkHook(hook);
-  }
-  inline static bool RemoveSbrkHook(SbrkHook hook) {
-    return MallocHook_RemoveSbrkHook(hook);
-  }
-  inline static void InvokeSbrkHook(const void* result, ptrdiff_t increment);
-
-  // Get the current stack trace.  Try to skip all routines up to and
-  // and including the caller of MallocHook::Invoke*.
-  // Use "skip_count" (similarly to GetStackTrace from stacktrace.h)
-  // as a hint about how many routines to skip if better information
-  // is not available.
-  inline static int GetCallerStackTrace(void** result, int max_depth,
-                                        int skip_count) {
-    return MallocHook_GetCallerStackTrace(result, max_depth, skip_count);
-  }
-
-  // Unhooked versions of mmap() and munmap().   These should be used
-  // only by experts, since they bypass heapchecking, etc.
-  // Note: These do not run hooks, but they still use the MmapReplacement
-  // and MunmapReplacement.
-  static void* UnhookedMMap(void *start, size_t length, int prot, int flags,
-                            int fd, off_t offset);
-  static int UnhookedMUnmap(void *start, size_t length);
-
-  // The following are DEPRECATED.
-  inline static NewHook GetNewHook();
-  inline static NewHook SetNewHook(NewHook hook) {
-    return MallocHook_SetNewHook(hook);
-  }
-
-  inline static DeleteHook GetDeleteHook();
-  inline static DeleteHook SetDeleteHook(DeleteHook hook) {
-    return MallocHook_SetDeleteHook(hook);
-  }
-
-  inline static PreMmapHook GetPreMmapHook();
-  inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) {
-    return MallocHook_SetPreMmapHook(hook);
-  }
-
-  inline static MmapHook GetMmapHook();
-  inline static MmapHook SetMmapHook(MmapHook hook) {
-    return MallocHook_SetMmapHook(hook);
-  }
-
-  inline static MunmapHook GetMunmapHook();
-  inline static MunmapHook SetMunmapHook(MunmapHook hook) {
-    return MallocHook_SetMunmapHook(hook);
-  }
-
-  inline static MremapHook GetMremapHook();
-  inline static MremapHook SetMremapHook(MremapHook hook) {
-    return MallocHook_SetMremapHook(hook);
-  }
-
-  inline static PreSbrkHook GetPreSbrkHook();
-  inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_SetPreSbrkHook(hook);
-  }
-
-  inline static SbrkHook GetSbrkHook();
-  inline static SbrkHook SetSbrkHook(SbrkHook hook) {
-    return MallocHook_SetSbrkHook(hook);
-  }
-  // End of DEPRECATED methods.
-
- private:
-  // Slow path versions of Invoke*Hook.
-  static void InvokeNewHookSlow(const void* p, size_t s);
-  static void InvokeDeleteHookSlow(const void* p);
-  static void InvokePreMmapHookSlow(const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-  static void InvokeMmapHookSlow(const void* result,
-                                 const void* start,
-                                 size_t size,
-                                 int protection,
-                                 int flags,
-                                 int fd,
-                                 off_t offset);
-  static bool InvokeMmapReplacementSlow(const void* start,
-                                        size_t size,
-                                        int protection,
-                                        int flags,
-                                        int fd,
-                                        off_t offset,
-                                        void** result);
-  static void InvokeMunmapHookSlow(const void* p, size_t size);
-  static bool InvokeMunmapReplacementSlow(const void* p,
-                                          size_t size,
-                                          int* result);
-  static void InvokeMremapHookSlow(const void* result,
-                                   const void* old_addr,
-                                   size_t old_size,
-                                   size_t new_size,
-                                   int flags,
-                                   const void* new_addr);
-  static void InvokePreSbrkHookSlow(ptrdiff_t increment);
-  static void InvokeSbrkHookSlow(const void* result, ptrdiff_t increment);
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-
-#endif /* _MALLOC_HOOK_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/malloc_hook_c.h b/contrib/libtcmalloc/include/gperftools/malloc_hook_c.h
deleted file mode 100644
index 56337e15e83..00000000000
--- a/contrib/libtcmalloc/include/gperftools/malloc_hook_c.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * --
- * Author: Craig Silverstein
- *
- * C shims for the C++ malloc_hook.h.  See malloc_hook.h for details
- * on how to use these.
- */
-
-#ifndef _MALLOC_HOOK_C_H_
-#define _MALLOC_HOOK_C_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Get the current stack trace.  Try to skip all routines up to and
- * and including the caller of MallocHook::Invoke*.
- * Use "skip_count" (similarly to GetStackTrace from stacktrace.h)
- * as a hint about how many routines to skip if better information
- * is not available.
- */
-PERFTOOLS_DLL_DECL
-int MallocHook_GetCallerStackTrace(void** result, int max_depth,
-                                   int skip_count);
-
-/* The MallocHook_{Add,Remove}*Hook functions return 1 on success and 0 on
- * failure.
- */
-
-typedef void (*MallocHook_NewHook)(const void* ptr, size_t size);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddNewHook(MallocHook_NewHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveNewHook(MallocHook_NewHook hook);
-
-typedef void (*MallocHook_DeleteHook)(const void* ptr);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook);
-
-typedef void (*MallocHook_PreMmapHook)(const void *start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook);
-
-typedef void (*MallocHook_MmapHook)(const void* result,
-                                    const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMmapHook(MallocHook_MmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook);
-
-typedef int (*MallocHook_MmapReplacement)(const void* start,
-                                          size_t size,
-                                          int protection,
-                                          int flags,
-                                          int fd,
-                                          off_t offset,
-                                          void** result);
-int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook);
-int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook);
-
-typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook);
-
-typedef int (*MallocHook_MunmapReplacement)(const void* ptr,
-                                            size_t size,
-                                            int* result);
-int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook);
-int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook);
-
-typedef void (*MallocHook_MremapHook)(const void* result,
-                                      const void* old_addr,
-                                      size_t old_size,
-                                      size_t new_size,
-                                      int flags,
-                                      const void* new_addr);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMremapHook(MallocHook_MremapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook);
-
-typedef void (*MallocHook_PreSbrkHook)(ptrdiff_t increment);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook);
-
-typedef void (*MallocHook_SbrkHook)(const void* result, ptrdiff_t increment);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook);
-
-/* The following are DEPRECATED. */
-PERFTOOLS_DLL_DECL
-MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook);
-/* End of DEPRECATED functions. */
-
-#ifdef __cplusplus
-}   // extern "C"
-#endif
-
-#endif /* _MALLOC_HOOK_C_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/profiler.h b/contrib/libtcmalloc/include/gperftools/profiler.h
deleted file mode 100644
index 2d272d616a9..00000000000
--- a/contrib/libtcmalloc/include/gperftools/profiler.h
+++ /dev/null
@@ -1,169 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- *
- * Module for CPU profiling based on periodic pc-sampling.
- *
- * For full(er) information, see doc/cpuprofile.html
- *
- * This module is linked into your program with
- * no slowdown caused by this unless you activate the profiler
- * using one of the following methods:
- *
- *    1. Before starting the program, set the environment variable
- *       "CPUPROFILE" to be the name of the file to which the profile
- *       data should be written.
- *
- *    2. Programmatically, start and stop the profiler using the
- *       routines "ProfilerStart(filename)" and "ProfilerStop()".
- *
- *
- * (Note: if using linux 2.4 or earlier, only the main thread may be
- * profiled.)
- *
- * Use pprof to view the resulting profile output.
- *    % pprof <path_to_executable> <profile_file_name>
- *    % pprof --gv  <path_to_executable> <profile_file_name>
- *
- * These functions are thread-safe.
- */
-
-#ifndef BASE_PROFILER_H_
-#define BASE_PROFILER_H_
-
-#include <time.h>       /* For time_t */
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-/* All this code should be usable from within C apps. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Profiler options, for use with ProfilerStartWithOptions.  To use:
- *
- *   struct ProfilerOptions options;
- *   memset(&options, 0, sizeof options);
- *
- * then fill in fields as needed.
- *
- * This structure is intended to be usable from C code, so no constructor
- * is provided to initialize it.  (Use memset as described above).
- */
-struct ProfilerOptions {
-  /* Filter function and argument.
-   *
-   * If filter_in_thread is not NULL, when a profiling tick is delivered
-   * the profiler will call:
-   *
-   *   (*filter_in_thread)(filter_in_thread_arg)
-   *
-   * If it returns nonzero, the sample will be included in the profile.
-   * Note that filter_in_thread runs in a signal handler, so must be
-   * async-signal-safe.
-   *
-   * A typical use would be to set up filter results for each thread
-   * in the system before starting the profiler, then to make
-   * filter_in_thread be a very simple function which retrieves those
-   * results in an async-signal-safe way.  Retrieval could be done
-   * using thread-specific data, or using a shared data structure that
-   * supports async-signal-safe lookups.
-   */
-  int (*filter_in_thread)(void *arg);
-  void *filter_in_thread_arg;
-};
-
-/* Start profiling and write profile info into fname, discarding any
- * existing profiling data in that file.
- *
- * This is equivalent to calling ProfilerStartWithOptions(fname, NULL).
- */
-PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname);
-
-/* Start profiling and write profile into fname, discarding any
- * existing profiling data in that file.
- *
- * The profiler is configured using the options given by 'options'.
- * Options which are not specified are given default values.
- *
- * 'options' may be NULL, in which case all are given default values.
- *
- * Returns nonzero if profiling was started successfully, or zero else.
- */
-PERFTOOLS_DLL_DECL int ProfilerStartWithOptions(
-    const char *fname, const struct ProfilerOptions *options);
-
-/* Stop profiling. Can be started again with ProfilerStart(), but
- * the currently accumulated profiling data will be cleared.
- */
-PERFTOOLS_DLL_DECL void ProfilerStop(void);
-
-/* Flush any currently buffered profiling state to the profile file.
- * Has no effect if the profiler has not been started.
- */
-PERFTOOLS_DLL_DECL void ProfilerFlush(void);
-
-
-/* DEPRECATED: these functions were used to enable/disable profiling
- * in the current thread, but no longer do anything.
- */
-PERFTOOLS_DLL_DECL void ProfilerEnable(void);
-PERFTOOLS_DLL_DECL void ProfilerDisable(void);
-
-/* Returns nonzero if profile is currently enabled, zero if it's not. */
-PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads(void);
-
-/* Routine for registering new threads with the profiler.
- */
-PERFTOOLS_DLL_DECL void ProfilerRegisterThread(void);
-
-/* Stores state about profiler's current status into "*state". */
-struct ProfilerState {
-  int    enabled;             /* Is profiling currently enabled? */
-  time_t start_time;          /* If enabled, when was profiling started? */
-  char   profile_name[1024];  /* Name of profile file being written, or '\0' */
-  int    samples_gathered;    /* Number of samples gathered so far (or 0) */
-};
-PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  /* BASE_PROFILER_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/stacktrace.h b/contrib/libtcmalloc/include/gperftools/stacktrace.h
deleted file mode 100644
index 2b9c5a13209..00000000000
--- a/contrib/libtcmalloc/include/gperftools/stacktrace.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Routines to extract the current stack trace.  These functions are
-// thread-safe.
-
-#ifndef GOOGLE_STACKTRACE_H_
-#define GOOGLE_STACKTRACE_H_
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-
-// Skips the most recent "skip_count" stack frames (also skips the
-// frame generated for the "GetStackFrames" routine itself), and then
-// records the pc values for up to the next "max_depth" frames in
-// "result", and the corresponding stack frame sizes in "sizes".
-// Returns the number of values recorded in "result"/"sizes".
-//
-// Example:
-//      main() { foo(); }
-//      foo() { bar(); }
-//      bar() {
-//        void* result[10];
-//        int sizes[10];
-//        int depth = GetStackFrames(result, sizes, 10, 1);
-//      }
-//
-// The GetStackFrames call will skip the frame for "bar".  It will
-// return 2 and will produce pc values that map to the following
-// procedures:
-//      result[0]       foo
-//      result[1]       main
-// (Actually, there may be a few more entries after "main" to account for
-// startup procedures.)
-// And corresponding stack frame sizes will also be recorded:
-//    sizes[0]       16
-//    sizes[1]       16
-// (Stack frame sizes of 16 above are just for illustration purposes.)
-// Stack frame sizes of 0 or less indicate that those frame sizes couldn't
-// be identified.
-//
-// This routine may return fewer stack frame entries than are
-// available. Also note that "result" and "sizes" must both be non-NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
-                          int skip_count);
-
-// Same as above, but to be used from a signal handler. The "uc" parameter
-// should be the pointer to ucontext_t which was passed as the 3rd parameter
-// to sa_sigaction signal handler. It may help the unwinder to get a
-// better stack trace under certain conditions. The "uc" may safely be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth,
-                                     int skip_count, const void *uc);
-
-// This is similar to the GetStackFrames routine, except that it returns
-// the stack trace only, and not the stack frame sizes as well.
-// Example:
-//      main() { foo(); }
-//      foo() { bar(); }
-//      bar() {
-//        void* result[10];
-//        int depth = GetStackTrace(result, 10, 1);
-//      }
-//
-// This produces:
-//      result[0]       foo
-//      result[1]       main
-//           ....       ...
-//
-// "result" must not be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth,
-                                            int skip_count);
-
-// Same as above, but to be used from a signal handler. The "uc" parameter
-// should be the pointer to ucontext_t which was passed as the 3rd parameter
-// to sa_sigaction signal handler. It may help the unwinder to get a
-// better stack trace under certain conditions. The "uc" may safely be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth,
-                                    int skip_count, const void *uc);
-
-#endif /* GOOGLE_STACKTRACE_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/tcmalloc.h b/contrib/libtcmalloc/include/gperftools/tcmalloc.h
deleted file mode 100644
index a5b39dbffbe..00000000000
--- a/contrib/libtcmalloc/include/gperftools/tcmalloc.h
+++ /dev/null
@@ -1,160 +0,0 @@
-// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2003, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat <opensource@google.com>
- *         .h file by Craig Silverstein <opensource@google.com>
- */
-
-#ifndef TCMALLOC_TCMALLOC_H_
-#define TCMALLOC_TCMALLOC_H_
-
-#include <stddef.h>                     /* for size_t */
-
-/* Define the version number so folks can check against it */
-#define TC_VERSION_MAJOR  2
-#define TC_VERSION_MINOR  5
-#define TC_VERSION_PATCH  ""
-#define TC_VERSION_STRING "gperftools 2.5"
-
-/* For struct mallinfo, if it's defined. */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-# include <malloc.h>
-#else
-struct mallinfo {
-   size_t arena;    /* non-mmapped space allocated from system */
-   size_t ordblks;  /* number of free chunks */
-   size_t smblks;   /* always 0 */
-   size_t hblks;    /* always 0 */
-   size_t hblkhd;   /* space in mmapped regions */
-   size_t usmblks;  /* maximum total allocated space */
-   size_t fsmblks;  /* always 0 */
-   size_t uordblks; /* total allocated space */
-   size_t fordblks; /* total free space */
-   size_t keepcost; /* releasable (via malloc_trim) space */
-};
-#endif
-
-#ifdef __cplusplus
-#define PERFTOOLS_THROW throw()
-#else
-# ifdef __GNUC__
-#  define PERFTOOLS_THROW __attribute__((__nothrow__))
-# else
-#  define PERFTOOLS_THROW
-# endif
-#endif
-
-#ifndef PERFTOOLS_DLL_DECL
-#define PERFTOOLS_DLL_DECL_DEFINED
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-#ifdef __cplusplus
-namespace std {
-struct nothrow_t;
-}
-
-extern "C" {
-#endif
-  /*
-   * Returns a human-readable version string.  If major, minor,
-   * and/or patch are not NULL, they are set to the major version,
-   * minor version, and patch-code (a string, usually "").
-   */
-  PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor,
-                                            const char** patch) PERFTOOLS_THROW;
-
-  PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW;
-
-  PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment,
-                                       size_t __size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr,
-                                           size_t align, size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_THROW;
-
-  PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW;
-#if 1
-  PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW;
-#endif
-
-  /*
-   * This is an alias for MallocExtension::instance()->GetAllocatedSize().
-   * It is equivalent to
-   *    OS X: malloc_size()
-   *    glibc: malloc_usable_size()
-   *    Windows: _msize()
-   */
-  PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW;
-
-#ifdef __cplusplus
-  PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_new(size_t size);
-  PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
-                                          const std::nothrow_t&) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw();
-  PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
-                                            const std::nothrow_t&) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
-  PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
-                                               const std::nothrow_t&) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw();
-  PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p,
-                                                 const std::nothrow_t&) PERFTOOLS_THROW;
-}
-#endif
-
-/* We're only un-defining those for public */
-#if !defined(GPERFTOOLS_CONFIG_H_)
-
-#undef PERFTOOLS_THROW
-
-#ifdef PERFTOOLS_DLL_DECL_DEFINED
-#undef PERFTOOLS_DLL_DECL
-#undef PERFTOOLS_DLL_DECL_DEFINED
-#endif
-
-#endif /* GPERFTOOLS_CONFIG_H_ */
-
-#endif  /* #ifndef TCMALLOC_TCMALLOC_H_ */
diff --git a/contrib/libtcmalloc/src/addressmap-inl.h b/contrib/libtcmalloc/src/addressmap-inl.h
deleted file mode 100644
index fd1dc5b6ffe..00000000000
--- a/contrib/libtcmalloc/src/addressmap-inl.h
+++ /dev/null
@@ -1,422 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// A fast map from addresses to values.  Assumes that addresses are
-// clustered.  The main use is intended to be for heap-profiling.
-// May be too memory-hungry for other uses.
-//
-// We use a user-defined allocator/de-allocator so that we can use
-// this data structure during heap-profiling.
-//
-// IMPLEMENTATION DETAIL:
-//
-// Some default definitions/parameters:
-//  * Block      -- aligned 128-byte region of the address space
-//  * Cluster    -- aligned 1-MB region of the address space
-//  * Block-ID   -- block-number within a cluster
-//  * Cluster-ID -- Starting address of cluster divided by cluster size
-//
-// We use a three-level map to represent the state:
-//  1. A hash-table maps from a cluster-ID to the data for that cluster.
-//  2. For each non-empty cluster we keep an array indexed by
-//     block-ID tht points to the first entry in the linked-list
-//     for the block.
-//  3. At the bottom, we keep a singly-linked list of all
-//     entries in a block (for non-empty blocks).
-//
-//    hash table
-//  +-------------+
-//  | id->cluster |---> ...
-//  |     ...     |
-//  | id->cluster |--->  Cluster
-//  +-------------+     +-------+    Data for one block
-//                      |  nil  |   +------------------------------------+
-//                      |   ----+---|->[addr/value]-->[addr/value]-->... |
-//                      |  nil  |   +------------------------------------+
-//                      |   ----+--> ...
-//                      |  nil  |
-//                      |  ...  |
-//                      +-------+
-//
-// Note that we require zero-bytes of overhead for completely empty
-// clusters.  The minimum space requirement for a cluster is the size
-// of the hash-table entry plus a pointer value for each block in
-// the cluster.  Empty blocks impose no extra space requirement.
-//
-// The cost of a lookup is:
-//      a. A hash-table lookup to find the cluster
-//      b. An array access in the cluster structure
-//      c. A traversal over the linked-list for a block
-
-#ifndef BASE_ADDRESSMAP_INL_H_
-#define BASE_ADDRESSMAP_INL_H_
-
-#include "config.h"
-#include <stddef.h>
-#include <string.h>
-#if defined HAVE_STDINT_H
-#include <stdint.h>             // to get uint16_t (ISO naming madness)
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>           // another place uint16_t might be defined
-#else
-#include <sys/types.h>          // our last best hope
-#endif
-
-// This class is thread-unsafe -- that is, instances of this class can
-// not be accessed concurrently by multiple threads -- because the
-// callback function for Iterate() may mutate contained values. If the
-// callback functions you pass do not mutate their Value* argument,
-// AddressMap can be treated as thread-compatible -- that is, it's
-// safe for multiple threads to call "const" methods on this class,
-// but not safe for one thread to call const methods on this class
-// while another thread is calling non-const methods on the class.
-template <class Value>
-class AddressMap {
- public:
-  typedef void* (*Allocator)(size_t size);
-  typedef void  (*DeAllocator)(void* ptr);
-  typedef const void* Key;
-
-  // Create an AddressMap that uses the specified allocator/deallocator.
-  // The allocator/deallocator should behave like malloc/free.
-  // For instance, the allocator does not need to return initialized memory.
-  AddressMap(Allocator alloc, DeAllocator dealloc);
-  ~AddressMap();
-
-  // If the map contains an entry for "key", return it. Else return NULL.
-  inline const Value* Find(Key key) const;
-  inline Value* FindMutable(Key key);
-
-  // Insert <key,value> into the map.  Any old value associated
-  // with key is forgotten.
-  void Insert(Key key, Value value);
-
-  // Remove any entry for key in the map.  If an entry was found
-  // and removed, stores the associated value in "*removed_value"
-  // and returns true.  Else returns false.
-  bool FindAndRemove(Key key, Value* removed_value);
-
-  // Similar to Find but we assume that keys are addresses of non-overlapping
-  // memory ranges whose sizes are given by size_func.
-  // If the map contains a range into which "key" points
-  // (at its start or inside of it, but not at the end),
-  // return the address of the associated value
-  // and store its key in "*res_key".
-  // Else return NULL.
-  // max_size specifies largest range size possibly in existence now.
-  typedef size_t (*ValueSizeFunc)(const Value& v);
-  const Value* FindInside(ValueSizeFunc size_func, size_t max_size,
-                          Key key, Key* res_key);
-
-  // Iterate over the address map calling 'callback'
-  // for all stored key-value pairs and passing 'arg' to it.
-  // We don't use full Closure/Callback machinery not to add
-  // unnecessary dependencies to this class with low-level uses.
-  template<class Type>
-  inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const;
-
- private:
-  typedef uintptr_t Number;
-
-  // The implementation assumes that addresses inserted into the map
-  // will be clustered.  We take advantage of this fact by splitting
-  // up the address-space into blocks and using a linked-list entry
-  // for each block.
-
-  // Size of each block.  There is one linked-list for each block, so
-  // do not make the block-size too big.  Oterwise, a lot of time
-  // will be spent traversing linked lists.
-  static const int kBlockBits = 7;
-  static const int kBlockSize = 1 << kBlockBits;
-
-  // Entry kept in per-block linked-list
-  struct Entry {
-    Entry* next;
-    Key    key;
-    Value  value;
-  };
-
-  // We further group a sequence of consecutive blocks into a cluster.
-  // The data for a cluster is represented as a dense array of
-  // linked-lists, one list per contained block.
-  static const int kClusterBits = 13;
-  static const Number kClusterSize = 1 << (kBlockBits + kClusterBits);
-  static const int kClusterBlocks = 1 << kClusterBits;
-
-  // We use a simple chaining hash-table to represent the clusters.
-  struct Cluster {
-    Cluster* next;                      // Next cluster in hash table chain
-    Number   id;                        // Cluster ID
-    Entry*   blocks[kClusterBlocks];    // Per-block linked-lists
-  };
-
-  // Number of hash-table entries.  With the block-size/cluster-size
-  // defined above, each cluster covers 1 MB, so an 4K entry
-  // hash-table will give an average hash-chain length of 1 for 4GB of
-  // in-use memory.
-  static const int kHashBits = 12;
-  static const int kHashSize = 1 << 12;
-
-  // Number of entry objects allocated at a time
-  static const int ALLOC_COUNT = 64;
-
-  Cluster**     hashtable_;              // The hash-table
-  Entry*        free_;                   // Free list of unused Entry objects
-
-  // Multiplicative hash function:
-  // The value "kHashMultiplier" is the bottom 32 bits of
-  //    int((sqrt(5)-1)/2 * 2^32)
-  // This is a good multiplier as suggested in CLR, Knuth.  The hash
-  // value is taken to be the top "k" bits of the bottom 32 bits
-  // of the muliplied value.
-  static const uint32_t kHashMultiplier = 2654435769u;
-  static int HashInt(Number x) {
-    // Multiply by a constant and take the top bits of the result.
-    const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier;
-    return static_cast<int>(m >> (32 - kHashBits));
-  }
-
-  // Find cluster object for specified address.  If not found
-  // and "create" is true, create the object.  If not found
-  // and "create" is false, return NULL.
-  //
-  // This method is bitwise-const if create is false.
-  Cluster* FindCluster(Number address, bool create) {
-    // Look in hashtable
-    const Number cluster_id = address >> (kBlockBits + kClusterBits);
-    const int h = HashInt(cluster_id);
-    for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
-      if (c->id == cluster_id) {
-        return c;
-      }
-    }
-
-    // Create cluster if necessary
-    if (create) {
-      Cluster* c = New<Cluster>(1);
-      c->id = cluster_id;
-      c->next = hashtable_[h];
-      hashtable_[h] = c;
-      return c;
-    }
-    return NULL;
-  }
-
-  // Return the block ID for an address within its cluster
-  static int BlockID(Number address) {
-    return (address >> kBlockBits) & (kClusterBlocks - 1);
-  }
-
-  //--------------------------------------------------------------
-  // Memory management -- we keep all objects we allocate linked
-  // together in a singly linked list so we can get rid of them
-  // when we are all done.  Furthermore, we allow the client to
-  // pass in custom memory allocator/deallocator routines.
-  //--------------------------------------------------------------
-  struct Object {
-    Object* next;
-    // The real data starts here
-  };
-
-  Allocator     alloc_;                 // The allocator
-  DeAllocator   dealloc_;               // The deallocator
-  Object*       allocated_;             // List of allocated objects
-
-  // Allocates a zeroed array of T with length "num".  Also inserts
-  // the allocated block into a linked list so it can be deallocated
-  // when we are all done.
-  template <class T> T* New(int num) {
-    void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T));
-    memset(ptr, 0, sizeof(Object) + num*sizeof(T));
-    Object* obj = reinterpret_cast<Object*>(ptr);
-    obj->next = allocated_;
-    allocated_ = obj;
-    return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1);
-  }
-};
-
-// More implementation details follow:
-
-template <class Value>
-AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc)
-  : free_(NULL),
-    alloc_(alloc),
-    dealloc_(dealloc),
-    allocated_(NULL) {
-  hashtable_ = New<Cluster*>(kHashSize);
-}
-
-template <class Value>
-AddressMap<Value>::~AddressMap() {
-  // De-allocate all of the objects we allocated
-  for (Object* obj = allocated_; obj != NULL; /**/) {
-    Object* next = obj->next;
-    (*dealloc_)(obj);
-    obj = next;
-  }
-}
-
-template <class Value>
-inline const Value* AddressMap<Value>::Find(Key key) const {
-  return const_cast<AddressMap*>(this)->FindMutable(key);
-}
-
-template <class Value>
-inline Value* AddressMap<Value>::FindMutable(Key key) {
-  const Number num = reinterpret_cast<Number>(key);
-  const Cluster* const c = FindCluster(num, false/*do not create*/);
-  if (c != NULL) {
-    for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) {
-      if (e->key == key) {
-        return &e->value;
-      }
-    }
-  }
-  return NULL;
-}
-
-template <class Value>
-void AddressMap<Value>::Insert(Key key, Value value) {
-  const Number num = reinterpret_cast<Number>(key);
-  Cluster* const c = FindCluster(num, true/*create*/);
-
-  // Look in linked-list for this block
-  const int block = BlockID(num);
-  for (Entry* e = c->blocks[block]; e != NULL; e = e->next) {
-    if (e->key == key) {
-      e->value = value;
-      return;
-    }
-  }
-
-  // Create entry
-  if (free_ == NULL) {
-    // Allocate a new batch of entries and add to free-list
-    Entry* array = New<Entry>(ALLOC_COUNT);
-    for (int i = 0; i < ALLOC_COUNT-1; i++) {
-      array[i].next = &array[i+1];
-    }
-    array[ALLOC_COUNT-1].next = free_;
-    free_ = &array[0];
-  }
-  Entry* e = free_;
-  free_ = e->next;
-  e->key = key;
-  e->value = value;
-  e->next = c->blocks[block];
-  c->blocks[block] = e;
-}
-
-template <class Value>
-bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) {
-  const Number num = reinterpret_cast<Number>(key);
-  Cluster* const c = FindCluster(num, false/*do not create*/);
-  if (c != NULL) {
-    for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) {
-      Entry* e = *p;
-      if (e->key == key) {
-        *removed_value = e->value;
-        *p = e->next;         // Remove e from linked-list
-        e->next = free_;      // Add e to free-list
-        free_ = e;
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-template <class Value>
-const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func,
-                                           size_t max_size,
-                                           Key key,
-                                           Key* res_key) {
-  const Number key_num = reinterpret_cast<Number>(key);
-  Number num = key_num;  // we'll move this to move back through the clusters
-  while (1) {
-    const Cluster* c = FindCluster(num, false/*do not create*/);
-    if (c != NULL) {
-      while (1) {
-        const int block = BlockID(num);
-        bool had_smaller_key = false;
-        for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) {
-          const Number e_num = reinterpret_cast<Number>(e->key);
-          if (e_num <= key_num) {
-            if (e_num == key_num  ||  // to handle 0-sized ranges
-                key_num < e_num + (*size_func)(e->value)) {
-              *res_key = e->key;
-              return &e->value;
-            }
-            had_smaller_key = true;
-          }
-        }
-        if (had_smaller_key) return NULL;  // got a range before 'key'
-                                           // and it did not contain 'key'
-        if (block == 0) break;
-        // try address-wise previous block
-        num |= kBlockSize - 1;  // start at the last addr of prev block
-        num -= kBlockSize;
-        if (key_num - num > max_size) return NULL;
-      }
-    }
-    if (num < kClusterSize) return NULL;  // first cluster
-    // go to address-wise previous cluster to try
-    num |= kClusterSize - 1;  // start at the last block of previous cluster
-    num -= kClusterSize;
-    if (key_num - num > max_size) return NULL;
-      // Having max_size to limit the search is crucial: else
-      // we have to traverse a lot of empty clusters (or blocks).
-      // We can avoid needing max_size if we put clusters into
-      // a search tree, but performance suffers considerably
-      // if we use this approach by using stl::set.
-  }
-}
-
-template <class Value>
-template <class Type>
-inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type),
-                                       Type arg) const {
-  // We could optimize this by traversing only non-empty clusters and/or blocks
-  // but it does not speed up heap-checker noticeably.
-  for (int h = 0; h < kHashSize; ++h) {
-    for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
-      for (int b = 0; b < kClusterBlocks; ++b) {
-        for (Entry* e = c->blocks[b]; e != NULL; e = e->next) {
-          callback(e->key, &e->value, arg);
-        }
-      }
-    }
-  }
-}
-
-#endif  // BASE_ADDRESSMAP_INL_H_
diff --git a/contrib/libtcmalloc/src/base/arm_instruction_set_select.h b/contrib/libtcmalloc/src/base/arm_instruction_set_select.h
deleted file mode 100644
index 6fde685272c..00000000000
--- a/contrib/libtcmalloc/src/base/arm_instruction_set_select.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: Alexander Levitskiy
-//
-// Generalizes the plethora of ARM flavors available to an easier to manage set
-// Defs reference is at https://wiki.edubuntu.org/ARM/Thumb2PortingHowto
-
-#ifndef ARM_INSTRUCTION_SET_SELECT_H_
-#define ARM_INSTRUCTION_SET_SELECT_H_
-
-#if defined(__ARM_ARCH_8A__)
-# define ARMV8 1
-#endif
-
-#if defined(ARMV8) || \
-    defined(__ARM_ARCH_7__) || \
-    defined(__ARM_ARCH_7R__) || \
-    defined(__ARM_ARCH_7A__)
-# define ARMV7 1
-#endif
-
-#if defined(ARMV7) || \
-    defined(__ARM_ARCH_6__) || \
-    defined(__ARM_ARCH_6J__) || \
-    defined(__ARM_ARCH_6K__) || \
-    defined(__ARM_ARCH_6Z__) || \
-    defined(__ARM_ARCH_6T2__) || \
-    defined(__ARM_ARCH_6ZK__)
-# define ARMV6 1
-#endif
-
-#if defined(ARMV6) || \
-    defined(__ARM_ARCH_5T__) || \
-    defined(__ARM_ARCH_5E__) || \
-    defined(__ARM_ARCH_5TE__) || \
-    defined(__ARM_ARCH_5TEJ__)
-# define ARMV5 1
-#endif
-
-#if defined(ARMV5) || \
-    defined(__ARM_ARCH_4__) || \
-    defined(__ARM_ARCH_4T__)
-# define ARMV4 1
-#endif
-
-#if defined(ARMV4) || \
-    defined(__ARM_ARCH_3__) || \
-    defined(__ARM_ARCH_3M__)
-# define ARMV3 1
-#endif
-
-#if defined(ARMV3) || \
-    defined(__ARM_ARCH_2__)
-# define ARMV2 1
-#endif
-
-#endif  // ARM_INSTRUCTION_SET_SELECT_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-arm-generic.h b/contrib/libtcmalloc/src/base/atomicops-internals-arm-generic.h
deleted file mode 100644
index d0f941309bb..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-arm-generic.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2003, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// ---
-//
-// Author: Lei Zhang, Sasha Levitskiy
-//
-// This file is an internal atomic implementation, use base/atomicops.h instead.
-//
-// LinuxKernelCmpxchg is from Google Gears.
-
-#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_
-#define BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "base/basictypes.h"
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-// 0xffff0fc0 is the hard coded address of a function provided by
-// the kernel which implements an atomic compare-exchange. On older
-// ARM architecture revisions (pre-v6) this may be implemented using
-// a syscall. This address is stable, and in active use (hard coded)
-// by at least glibc-2.7 and the Android C library.
-// pLinuxKernelCmpxchg has both acquire and release barrier sematincs.
-typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value,
-                                           Atomic32 new_value,
-                                           volatile Atomic32* ptr);
-LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg ATTRIBUTE_WEAK =
-    (LinuxKernelCmpxchgFunc) 0xffff0fc0;
-
-typedef void (*LinuxKernelMemoryBarrierFunc)(void);
-LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK =
-    (LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
-
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value = *ptr;
-  do {
-    if (!pLinuxKernelCmpxchg(old_value, new_value,
-                             const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (pLinuxKernelCmpxchg(old_value, new_value,
-                               const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void MemoryBarrier() {
-  pLinuxKernelMemoryBarrier();
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-
-// 64-bit versions are not implemented yet.
-
-inline void NotImplementedFatalError(const char *function_name) {
-  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
-          function_name);
-  abort();
-}
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_CompareAndSwap");
-  return 0;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_AtomicExchange");
-  return 0;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("NoBarrier_Store");
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("Acquire_Store64");
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("Release_Store");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("NoBarrier_Load");
-  return 0;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("Atomic64 Acquire_Load");
-  return 0;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("Atomic64 Release_Load");
-  return 0;
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  NotImplementedFatalError("Atomic64 Acquire_CompareAndSwap");
-  return 0;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  NotImplementedFatalError("Atomic64 Release_CompareAndSwap");
-  return 0;
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-arm-v6plus.h b/contrib/libtcmalloc/src/base/atomicops-internals-arm-v6plus.h
deleted file mode 100644
index 35f10481b04..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-arm-v6plus.h
+++ /dev/null
@@ -1,330 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// ---
-//
-// Author: Sasha Levitskiy
-// based on atomicops-internals by Sanjay Ghemawat
-//
-// This file is an internal atomic implementation, use base/atomicops.h instead.
-//
-// This code implements ARM atomics for architectures V6 and  newer.
-
-#ifndef BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_
-#define BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "base/basictypes.h"  // For COMPILE_ASSERT
-
-// The LDREXD and STREXD instructions in ARM all v7 variants or above.  In v6,
-// only some variants support it.  For simplicity, we only use exclusive
-// 64-bit load/store in V7 or above.
-#if defined(ARMV7)
-# define BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
-#endif
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-// 32-bit low-level ops
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 oldval, res;
-  do {
-    __asm__ __volatile__(
-    "ldrex   %1, [%3]\n"
-    "mov     %0, #0\n"
-    "teq     %1, %4\n"
-    // The following IT (if-then) instruction is needed for the subsequent
-    // conditional instruction STREXEQ when compiling in THUMB mode.
-    // In ARM mode, the compiler/assembler will not generate any code for it.
-    "it      eq\n"
-    "strexeq %0, %5, [%3]\n"
-        : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
-        : "r" (ptr), "Ir" (old_value), "r" (new_value)
-        : "cc");
-  } while (res);
-  return oldval;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  Atomic32 tmp, old;
-  __asm__ __volatile__(
-      "1:\n"
-      "ldrex  %1, [%2]\n"
-      "strex  %0, %3, [%2]\n"
-      "teq    %0, #0\n"
-      "bne    1b"
-      : "=&r" (tmp), "=&r" (old)
-      : "r" (ptr), "r" (new_value)
-      : "cc", "memory");
-  return old;
-}
-
-inline void MemoryBarrier() {
-#if !defined(ARMV7)
-  uint32_t dest = 0;
-  __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory");
-#else
-  __asm__ __volatile__("dmb" : : : "memory");
-#endif
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
-  MemoryBarrier();
-  return old_value;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  MemoryBarrier();
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  MemoryBarrier();
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-// 64-bit versions are only available if LDREXD and STREXD instructions
-// are available.
-#ifdef BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
-
-#define BASE_HAS_ATOMIC64 1
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 oldval, res;
-  do {
-    __asm__ __volatile__(
-    "ldrexd   %1, [%3]\n"
-    "mov      %0, #0\n"
-    "teq      %Q1, %Q4\n"
-    // The following IT (if-then) instructions are needed for the subsequent
-    // conditional instructions when compiling in THUMB mode.
-    // In ARM mode, the compiler/assembler will not generate any code for it.
-    "it       eq\n"
-    "teqeq    %R1, %R4\n"
-    "it       eq\n"
-    "strexdeq %0, %5, [%3]\n"
-        : "=&r" (res), "=&r" (oldval), "+Q" (*ptr)
-        : "r" (ptr), "Ir" (old_value), "r" (new_value)
-        : "cc");
-  } while (res);
-  return oldval;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  int store_failed;
-  Atomic64 old;
-  __asm__ __volatile__(
-      "1:\n"
-      "ldrexd  %1, [%2]\n"
-      "strexd  %0, %3, [%2]\n"
-      "teq     %0, #0\n"
-      "bne     1b"
-      : "=&r" (store_failed), "=&r" (old)
-      : "r" (ptr), "r" (new_value)
-      : "cc", "memory");
-  return old;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
-  MemoryBarrier();
-  return old_value;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  MemoryBarrier();
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  int store_failed;
-  Atomic64 dummy;
-  __asm__ __volatile__(
-      "1:\n"
-      // Dummy load to lock cache line.
-      "ldrexd  %1, [%3]\n"
-      "strexd  %0, %2, [%3]\n"
-      "teq     %0, #0\n"
-      "bne     1b"
-      : "=&r" (store_failed), "=&r"(dummy)
-      : "r"(value), "r" (ptr)
-      : "cc", "memory");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  Atomic64 res;
-  __asm__ __volatile__(
-  "ldrexd   %0, [%1]\n"
-  "clrex\n"
-      : "=r" (res)
-      : "r"(ptr), "Q"(*ptr));
-  return res;
-}
-
-#else // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
-
-inline void NotImplementedFatalError(const char *function_name) {
-  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
-          function_name);
-  abort();
-}
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_CompareAndSwap");
-  return 0;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_AtomicExchange");
-  return 0;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  NotImplementedFatalError("Acquire_AtomicExchange");
-  return 0;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  NotImplementedFatalError("Release_AtomicExchange");
-  return 0;
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("NoBarrier_Store");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("NoBarrier_Load");
-  return 0;
-}
-
-#endif // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_Store(ptr, value);
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  MemoryBarrier();
-  NoBarrier_Store(ptr, value);
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  MemoryBarrier();
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-}  // namespace subtle ends
-}  // namespace base ends
-
-#endif  // BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-gcc.h b/contrib/libtcmalloc/src/base/atomicops-internals-gcc.h
deleted file mode 100644
index f8d27863cb7..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-gcc.h
+++ /dev/null
@@ -1,203 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2014, Linaro
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// ---
-//
-// Author: Riku Voipio, riku.voipio@linaro.org
-//
-// atomic primitives implemented with gcc atomic intrinsics:
-// http://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
-//
-
-#ifndef BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_
-#define BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "base/basictypes.h"
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-inline void MemoryBarrier() {
-    __sync_synchronize();
-}
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELAXED);
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value,  __ATOMIC_ACQUIRE);
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELEASE);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-// 64-bit versions
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELAXED);
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value,  __ATOMIC_ACQUIRE);
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELEASE);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-linuxppc.h b/contrib/libtcmalloc/src/base/atomicops-internals-linuxppc.h
deleted file mode 100644
index b52fdf0d1ec..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-linuxppc.h
+++ /dev/null
@@ -1,437 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- */
-
-// Implementation of atomic operations for ppc-linux.  This file should not
-// be included directly.  Clients should instead include
-// "base/atomicops.h".
-
-#ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
-#define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
-
-typedef int32_t Atomic32;
-
-#ifdef __PPC64__
-#define BASE_HAS_ATOMIC64 1
-#endif
-
-namespace base {
-namespace subtle {
-
-static inline void _sync(void) {
-  __asm__ __volatile__("sync": : : "memory");
-}
-
-static inline void _lwsync(void) {
-  // gcc defines __NO_LWSYNC__ when appropriate; see
-  //    http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01238.html
-#ifdef __NO_LWSYNC__
-  __asm__ __volatile__("msync": : : "memory");
-#else
-  __asm__ __volatile__("lwsync": : : "memory");
-#endif
-}
-
-static inline void _isync(void) {
-  __asm__ __volatile__("isync": : : "memory");
-}
-
-static inline Atomic32 OSAtomicAdd32(Atomic32 amount, Atomic32 *value) {
-  Atomic32 t;
-  __asm__ __volatile__(
-"1:		lwarx   %0,0,%3\n\
-		add     %0,%2,%0\n\
-		stwcx.  %0,0,%3 \n\
-		bne-    1b"
-		: "=&r" (t), "+m" (*value)
-		: "r" (amount), "r" (value)
-                : "cc");
-  return t;
-}
-
-static inline Atomic32 OSAtomicAdd32Barrier(Atomic32 amount, Atomic32 *value) {
-  Atomic32 t;
-  _lwsync();
-  t = OSAtomicAdd32(amount, value);
-  // This is based on the code snippet in the architecture manual (Vol
-  // 2, Appendix B).  It's a little tricky: correctness depends on the
-  // fact that the code right before this (in OSAtomicAdd32) has a
-  // conditional branch with a data dependency on the update.
-  // Otherwise, we'd have to use sync.
-  _isync();
-  return t;
-}
-
-static inline bool OSAtomicCompareAndSwap32(Atomic32 old_value,
-                                            Atomic32 new_value,
-                                            Atomic32 *value) {
-  Atomic32 prev;
-  __asm__ __volatile__(
-"1:		lwarx   %0,0,%2\n\
-		cmpw    0,%0,%3\n\
-		bne-    2f\n\
-		stwcx.  %4,0,%2\n\
-		bne-    1b\n\
-2:"
-                : "=&r" (prev), "+m" (*value)
-                : "r" (value), "r" (old_value), "r" (new_value)
-                : "cc");
-  return prev == old_value;
-}
-
-static inline Atomic32 OSAtomicCompareAndSwap32Acquire(Atomic32 old_value,
-                                                       Atomic32 new_value,
-                                                       Atomic32 *value) {
-  Atomic32 t;
-  t = OSAtomicCompareAndSwap32(old_value, new_value, value);
-  // This is based on the code snippet in the architecture manual (Vol
-  // 2, Appendix B).  It's a little tricky: correctness depends on the
-  // fact that the code right before this (in
-  // OSAtomicCompareAndSwap32) has a conditional branch with a data
-  // dependency on the update.  Otherwise, we'd have to use sync.
-  _isync();
-  return t;
-}
-
-static inline Atomic32 OSAtomicCompareAndSwap32Release(Atomic32 old_value,
-                                                       Atomic32 new_value,
-                                                       Atomic32 *value) {
-  _lwsync();
-  return OSAtomicCompareAndSwap32(old_value, new_value, value);
-}
-
-typedef int64_t Atomic64;
-
-inline void MemoryBarrier() {
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-}
-
-// 32-bit Versions.
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32(old_value, new_value,
-                                 const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr,
-                                         Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32(old_value, new_value,
-                                     const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value,
-                                            const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32Release(old_value, new_value,
-                                            const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32Acquire(old_value, new_value,
-                                        const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32Release(old_value, new_value,
-                                        const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-#ifdef __PPC64__
-
-// 64-bit Versions.
-
-static inline Atomic64 OSAtomicAdd64(Atomic64 amount, Atomic64 *value) {
-  Atomic64 t;
-  __asm__ __volatile__(
-"1:		ldarx   %0,0,%3\n\
-		add     %0,%2,%0\n\
-		stdcx.  %0,0,%3 \n\
-		bne-    1b"
-		: "=&r" (t), "+m" (*value)
-		: "r" (amount), "r" (value)
-                : "cc");
-  return t;
-}
-
-static inline Atomic64 OSAtomicAdd64Barrier(Atomic64 amount, Atomic64 *value) {
-  Atomic64 t;
-  _lwsync();
-  t = OSAtomicAdd64(amount, value);
-  // This is based on the code snippet in the architecture manual (Vol
-  // 2, Appendix B).  It's a little tricky: correctness depends on the
-  // fact that the code right before this (in OSAtomicAdd64) has a
-  // conditional branch with a data dependency on the update.
-  // Otherwise, we'd have to use sync.
-  _isync();
-  return t;
-}
-
-static inline bool OSAtomicCompareAndSwap64(Atomic64 old_value,
-                                            Atomic64 new_value,
-                                            Atomic64 *value) {
-  Atomic64 prev;
-  __asm__ __volatile__(
-"1:		ldarx   %0,0,%2\n\
-		cmpd    0,%0,%3\n\
-		bne-    2f\n\
-		stdcx.  %4,0,%2\n\
-		bne-    1b\n\
-2:"
-                : "=&r" (prev), "+m" (*value)
-                : "r" (value), "r" (old_value), "r" (new_value)
-                : "cc");
-  return prev == old_value;
-}
-
-static inline Atomic64 OSAtomicCompareAndSwap64Acquire(Atomic64 old_value,
-                                                       Atomic64 new_value,
-                                                       Atomic64 *value) {
-  Atomic64 t;
-  t = OSAtomicCompareAndSwap64(old_value, new_value, value);
-  // This is based on the code snippet in the architecture manual (Vol
-  // 2, Appendix B).  It's a little tricky: correctness depends on the
-  // fact that the code right before this (in
-  // OSAtomicCompareAndSwap64) has a conditional branch with a data
-  // dependency on the update.  Otherwise, we'd have to use sync.
-  _isync();
-  return t;
-}
-
-static inline Atomic64 OSAtomicCompareAndSwap64Release(Atomic64 old_value,
-                                                       Atomic64 new_value,
-                                                       Atomic64 *value) {
-  _lwsync();
-  return OSAtomicCompareAndSwap64(old_value, new_value, value);
-}
-
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64(old_value, new_value,
-                                 const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr,
-                                         Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64(old_value, new_value,
-                                     const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value,
-                                            const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64Release(old_value, new_value,
-                                            const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64Acquire(old_value, new_value,
-                                        const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64Release(old_value, new_value,
-                                        const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-#endif
-
-inline void NoBarrier_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  *ptr = value;
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-}
-
-inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  _lwsync();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32 *ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) {
-  Atomic32 value = *ptr;
-  _lwsync();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32 *ptr) {
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-  return *ptr;
-}
-
-#ifdef __PPC64__
-
-// 64-bit Versions.
-
-inline void NoBarrier_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  *ptr = value;
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-}
-
-inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  _lwsync();
-  *ptr = value;
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64 *ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
-  Atomic64 value = *ptr;
-  _lwsync();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-  return *ptr;
-}
-
-#endif
-
-}   // namespace base::subtle
-}   // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-macosx.h b/contrib/libtcmalloc/src/base/atomicops-internals-macosx.h
deleted file mode 100644
index b5130d4f4d7..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-macosx.h
+++ /dev/null
@@ -1,370 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Implementation of atomic operations for Mac OS X.  This file should not
-// be included directly.  Clients should instead include
-// "base/atomicops.h".
-
-#ifndef BASE_ATOMICOPS_INTERNALS_MACOSX_H_
-#define BASE_ATOMICOPS_INTERNALS_MACOSX_H_
-
-typedef int32_t Atomic32;
-
-// MacOS uses long for intptr_t, AtomicWord and Atomic32 are always different
-// on the Mac, even when they are the same size.  Similarly, on __ppc64__,
-// AtomicWord and Atomic64 are always different.  Thus, we need explicit
-// casting.
-#ifdef __LP64__
-#define AtomicWordCastType base::subtle::Atomic64
-#else
-#define AtomicWordCastType Atomic32
-#endif
-
-#if defined(__LP64__) || defined(__i386__)
-#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
-#endif
-
-#include <libkern/OSAtomic.h>
-
-namespace base {
-namespace subtle {
-
-#if !defined(__LP64__) && defined(__ppc__)
-
-// The Mac 64-bit OSAtomic implementations are not available for 32-bit PowerPC,
-// while the underlying assembly instructions are available only some
-// implementations of PowerPC.
-
-// The following inline functions will fail with the error message at compile
-// time ONLY IF they are called.  So it is safe to use this header if user
-// code only calls AtomicWord and Atomic32 operations.
-//
-// NOTE(vchen): Implementation notes to implement the atomic ops below may
-// be found in "PowerPC Virtual Environment Architecture, Book II,
-// Version 2.02", January 28, 2005, Appendix B, page 46.  Unfortunately,
-// extra care must be taken to ensure data are properly 8-byte aligned, and
-// that data are returned correctly according to Mac OS X ABI specs.
-
-inline int64_t OSAtomicCompareAndSwap64(
-    int64_t oldValue, int64_t newValue, int64_t *theValue) {
-  __asm__ __volatile__(
-      "_OSAtomicCompareAndSwap64_not_supported_for_32_bit_ppc\n\t");
-  return 0;
-}
-
-inline int64_t OSAtomicAdd64(int64_t theAmount, int64_t *theValue) {
-  __asm__ __volatile__(
-      "_OSAtomicAdd64_not_supported_for_32_bit_ppc\n\t");
-  return 0;
-}
-
-inline int64_t OSAtomicCompareAndSwap64Barrier(
-    int64_t oldValue, int64_t newValue, int64_t *theValue) {
-  int64_t prev = OSAtomicCompareAndSwap64(oldValue, newValue, theValue);
-  OSMemoryBarrier();
-  return prev;
-}
-
-inline int64_t OSAtomicAdd64Barrier(
-    int64_t theAmount, int64_t *theValue) {
-  int64_t new_val = OSAtomicAdd64(theAmount, theValue);
-  OSMemoryBarrier();
-  return new_val;
-}
-#endif
-
-typedef int64_t Atomic64;
-
-inline void MemoryBarrier() {
-  OSMemoryBarrier();
-}
-
-// 32-bit Versions.
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32(old_value, new_value,
-                                 const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr,
-                                         Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32(old_value, new_value,
-                                     const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value,
-                                            const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
-                                       Atomic32 new_value) {
-  return Acquire_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32Barrier(old_value, new_value,
-                                        const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32 *ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-// 64-bit version
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64(old_value, new_value,
-                                 const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr,
-                                         Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64(old_value, new_value,
-                                     const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value,
-                                            const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
-                                       Atomic64 new_value) {
-  return Acquire_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64Barrier(old_value, new_value,
-                                        const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  // The lib kern interface does not distinguish between
-  // Acquire and Release memory barriers; they are equivalent.
-  return Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-
-#ifdef __LP64__
-
-// 64-bit implementation on 64-bit platform
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
-  Atomic64 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-#else
-
-// 64-bit implementation on 32-bit platform
-
-#if defined(__ppc__)
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-   __asm__ __volatile__(
-       "_NoBarrier_Store_not_supported_for_32_bit_ppc\n\t");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-   __asm__ __volatile__(
-       "_NoBarrier_Load_not_supported_for_32_bit_ppc\n\t");
-   return 0;
-}
-
-#elif defined(__i386__)
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  __asm__ __volatile__("movq %1, %%mm0\n\t"    // Use mmx reg for 64-bit atomic
-                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
-                       "emms\n\t"              // Reset FP registers
-                       : "=m" (*ptr)
-                       : "m" (value)
-                       : // mark the FP stack and mmx registers as clobbered
-                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
-                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
-                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
-
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  Atomic64 value;
-  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
-                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
-                       "emms\n\t"            // Reset FP registers
-                       : "=m" (value)
-                       : "m" (*ptr)
-                       : // mark the FP stack and mmx registers as clobbered
-                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
-                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
-                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
-
-  return value;
-}
-#endif
-
-
-inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  NoBarrier_Store(ptr, value);
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  MemoryBarrier();
-  NoBarrier_Store(ptr, value);
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-#endif  // __LP64__
-
-}   // namespace base::subtle
-}   // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_MACOSX_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-mips.h b/contrib/libtcmalloc/src/base/atomicops-internals-mips.h
deleted file mode 100644
index 4bfd7f6c70d..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-mips.h
+++ /dev/null
@@ -1,323 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2013, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Author: Jovan Zelincevic <jovan.zelincevic@imgtec.com>
-// based on atomicops-internals by Sanjay Ghemawat
-
-// This file is an internal atomic implementation, use base/atomicops.h instead.
-//
-// This code implements MIPS atomics.
-
-#ifndef BASE_ATOMICOPS_INTERNALS_MIPS_H_
-#define BASE_ATOMICOPS_INTERNALS_MIPS_H_
-
-#if (_MIPS_ISA == _MIPS_ISA_MIPS64)
-#define BASE_HAS_ATOMIC64 1
-#endif
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-// Atomically execute:
-// result = *ptr;
-// if (*ptr == old_value)
-// *ptr = new_value;
-// return result;
-//
-// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value".
-// Always return the old value of "*ptr"
-//
-// This routine implies no memory barriers.
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value)
-{
-    Atomic32 prev, tmp;
-    __asm__ volatile(
-        ".set   push                \n"
-        ".set   noreorder           \n"
-
-    "1:                             \n"
-        "ll     %0,     %5          \n" // prev = *ptr
-        "bne    %0,     %3,     2f  \n" // if (prev != old_value) goto 2
-        " move  %2,     %4          \n" // tmp = new_value
-        "sc     %2,     %1          \n" // *ptr = tmp (with atomic check)
-        "beqz   %2,     1b          \n" // start again on atomic error
-        " nop                       \n" // delay slot nop
-    "2:                             \n"
-
-        ".set   pop                 \n"
-        : "=&r" (prev), "=m" (*ptr),
-          "=&r" (tmp)
-        : "Ir" (old_value), "r" (new_value),
-          "m" (*ptr)
-        : "memory"
-    );
-    return prev;
-}
-
-// Atomically store new_value into *ptr, returning the previous value held in
-// *ptr. This routine implies no memory barriers.
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value)
-{
-    Atomic32 temp, old;
-    __asm__ volatile(
-        ".set   push                \n"
-        ".set   noreorder           \n"
-
-    "1:                             \n"
-        "ll     %1,     %2          \n" // old = *ptr
-        "move   %0,     %3          \n" // temp = new_value
-        "sc     %0,     %2          \n" // *ptr = temp (with atomic check)
-        "beqz   %0,     1b          \n" // start again on atomic error
-        " nop                       \n" // delay slot nop
-
-        ".set   pop                 \n"
-        : "=&r" (temp), "=&r" (old),
-          "=m" (*ptr)
-        : "r" (new_value), "m" (*ptr)
-        : "memory"
-    );
-    return old;
-}
-
-inline void MemoryBarrier()
-{
-    __asm__ volatile("sync" : : : "memory");
-}
-
-// "Acquire" operations
-// ensure that no later memory access can be reordered ahead of the operation.
-// "Release" operations ensure that no previous memory access can be reordered
-// after the operation. "Barrier" operations have both "Acquire" and "Release"
-// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
-// access.
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value)
-{
-    Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-    MemoryBarrier();
-    return res;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value)
-{
-    MemoryBarrier();
-    Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-    return res;
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value)
-{
-    *ptr = value;
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value)
-{
-    Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
-    MemoryBarrier();
-    return old_value;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value)
-{
-    MemoryBarrier();
-    return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value)
-{
-    *ptr = value;
-    MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value)
-{
-    MemoryBarrier();
-    *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr)
-{
-    return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr)
-{
-    Atomic32 value = *ptr;
-    MemoryBarrier();
-    return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr)
-{
-    MemoryBarrier();
-    return *ptr;
-}
-
-#if (_MIPS_ISA == _MIPS_ISA_MIPS64) || (_MIPS_SIM == _MIPS_SIM_ABI64)
-
-typedef int64_t Atomic64;
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value)
-{
-    Atomic64 prev, tmp;
-    __asm__ volatile(
-        ".set   push                \n"
-        ".set   noreorder           \n"
-
-    "1:                             \n"
-        "lld    %0,     %5          \n" // prev = *ptr
-        "bne    %0,     %3,     2f  \n" // if (prev != old_value) goto 2
-        " move  %2,     %4          \n" // tmp = new_value
-        "scd    %2,     %1          \n" // *ptr = tmp (with atomic check)
-        "beqz   %2,     1b          \n" // start again on atomic error
-        " nop                       \n" // delay slot nop
-    "2:                             \n"
-
-        ".set   pop                 \n"
-        : "=&r" (prev), "=m" (*ptr),
-          "=&r" (tmp)
-        : "Ir" (old_value), "r" (new_value),
-          "m" (*ptr)
-        : "memory"
-    );
-    return prev;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value)
-{
-    Atomic64 temp, old;
-    __asm__ volatile(
-        ".set   push                \n"
-        ".set   noreorder           \n"
-
-    "1:                             \n"
-        "lld    %1,     %2          \n" // old = *ptr
-        "move   %0,     %3          \n" // temp = new_value
-        "scd    %0,     %2          \n" // *ptr = temp (with atomic check)
-        "beqz   %0,     1b          \n" // start again on atomic error
-        " nop                       \n" // delay slot nop
-
-        ".set   pop                 \n"
-        : "=&r" (temp), "=&r" (old),
-          "=m" (*ptr)
-        : "r" (new_value), "m" (*ptr)
-        : "memory"
-    );
-    return old;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value)
-{
-    Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
-    MemoryBarrier();
-    return old_value;
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value)
-{
-    Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-    MemoryBarrier();
-    return res;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value)
-{
-    MemoryBarrier();
-    Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-    return res;
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value)
-{
-    *ptr = value;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value)
-{
-    MemoryBarrier();
-    return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value)
-{
-    *ptr = value;
-    MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value)
-{
-    MemoryBarrier();
-    *ptr = value;
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr)
-{
-    return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr)
-{
-    Atomic64 value = *ptr;
-    MemoryBarrier();
-    return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr)
-{
-    MemoryBarrier();
-    return *ptr;
-}
-
-#endif
-
-}   // namespace base::subtle
-}   // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_MIPS_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-windows.h b/contrib/libtcmalloc/src/base/atomicops-internals-windows.h
deleted file mode 100644
index 93ced8770d4..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-windows.h
+++ /dev/null
@@ -1,457 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-// Implementation of atomic operations using Windows API
-// functions.  This file should not be included directly.  Clients
-// should instead include "base/atomicops.h".
-
-#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
-#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "base/basictypes.h"  // For COMPILE_ASSERT
-
-typedef int32 Atomic32;
-
-#if defined(_WIN64)
-#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
-#endif
-
-namespace base {
-namespace subtle {
-
-typedef int64 Atomic64;
-
-// 32-bit low-level operations on any platform
-
-extern "C" {
-// We use windows intrinsics when we can (they seem to be supported
-// well on MSVC 8.0 and above).  Unfortunately, in some
-// environments, <windows.h> and <intrin.h> have conflicting
-// declarations of some other intrinsics, breaking compilation:
-//   http://connect.microsoft.com/VisualStudio/feedback/details/262047
-// Therefore, we simply declare the relevant intrinsics ourself.
-
-// MinGW has a bug in the header files where it doesn't indicate the
-// first argument is volatile -- they're not up to date.  See
-//   http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html
-// We have to const_cast away the volatile to avoid compiler warnings.
-// TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h
-#if defined(__MINGW32__)
-inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
-                                           LONG newval, LONG oldval) {
-  return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval);
-}
-inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
-  return ::InterlockedExchange(const_cast<LONG*>(ptr), newval);
-}
-inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
-  return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment);
-}
-
-#elif _MSC_VER >= 1400   // intrinsics didn't work so well before MSVC 8.0
-// Unfortunately, in some environments, <windows.h> and <intrin.h>
-// have conflicting declarations of some intrinsics, breaking
-// compilation.  So we declare the intrinsics we need ourselves.  See
-//   http://connect.microsoft.com/VisualStudio/feedback/details/262047
-LONG _InterlockedCompareExchange(volatile LONG* ptr, LONG newval, LONG oldval);
-#pragma intrinsic(_InterlockedCompareExchange)
-inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
-                                           LONG newval, LONG oldval) {
-  return _InterlockedCompareExchange(ptr, newval, oldval);
-}
-
-LONG _InterlockedExchange(volatile LONG* ptr, LONG newval);
-#pragma intrinsic(_InterlockedExchange)
-inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
-  return _InterlockedExchange(ptr, newval);
-}
-
-LONG _InterlockedExchangeAdd(volatile LONG* ptr, LONG increment);
-#pragma intrinsic(_InterlockedExchangeAdd)
-inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
-  return _InterlockedExchangeAdd(ptr, increment);
-}
-
-#else
-inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
-                                           LONG newval, LONG oldval) {
-  return ::InterlockedCompareExchange(ptr, newval, oldval);
-}
-inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
-  return ::InterlockedExchange(ptr, newval);
-}
-inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
-  return ::InterlockedExchangeAdd(ptr, increment);
-}
-
-#endif  // ifdef __MINGW32__
-}  // extern "C"
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  LONG result = FastInterlockedCompareExchange(
-      reinterpret_cast<volatile LONG*>(ptr),
-      static_cast<LONG>(new_value),
-      static_cast<LONG>(old_value));
-  return static_cast<Atomic32>(result);
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  LONG result = FastInterlockedExchange(
-      reinterpret_cast<volatile LONG*>(ptr),
-      static_cast<LONG>(new_value));
-  return static_cast<Atomic32>(result);
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // FastInterlockedExchange has both acquire and release memory barriers.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // FastInterlockedExchange has both acquire and release memory barriers.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-
-// In msvc8/vs2005, winnt.h already contains a definition for
-// MemoryBarrier in the global namespace.  Add it there for earlier
-// versions and forward to it from within the namespace.
-#if !(defined(_MSC_VER) && _MSC_VER >= 1400)
-inline void MemoryBarrier() {
-  Atomic32 value = 0;
-  base::subtle::NoBarrier_AtomicExchange(&value, 0);
-                        // actually acts as a barrier in thisd implementation
-}
-#endif
-
-namespace base {
-namespace subtle {
-
-inline void MemoryBarrier() {
-  ::MemoryBarrier();
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  Acquire_AtomicExchange(ptr, value);
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value; // works w/o barrier for current Intel chips as of June 2005
-  // See comments in Atomic64 version of Release_Store() below.
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-// 64-bit operations
-
-#if defined(_WIN64) || defined(__MINGW64__)
-
-// 64-bit low-level operations on 64-bit platform.
-
-COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic);
-
-// These are the intrinsics needed for 64-bit operations.  Similar to the
-// 32-bit case above.
-
-extern "C" {
-#if defined(__MINGW64__)
-inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
-                                                   PVOID newval, PVOID oldval) {
-  return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr),
-                                             newval, oldval);
-}
-inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
-  return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval);
-}
-inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
-                                             LONGLONG increment) {
-  return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment);
-}
-
-#elif _MSC_VER >= 1400   // intrinsics didn't work so well before MSVC 8.0
-// Like above, we need to declare the intrinsics ourselves.
-PVOID _InterlockedCompareExchangePointer(volatile PVOID* ptr,
-                                         PVOID newval, PVOID oldval);
-#pragma intrinsic(_InterlockedCompareExchangePointer)
-inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
-                                                   PVOID newval, PVOID oldval) {
-  return _InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr),
-                                            newval, oldval);
-}
-
-PVOID _InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval);
-#pragma intrinsic(_InterlockedExchangePointer)
-inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
-  return _InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval);
-}
-
-LONGLONG _InterlockedExchangeAdd64(volatile LONGLONG* ptr, LONGLONG increment);
-#pragma intrinsic(_InterlockedExchangeAdd64)
-inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
-                                             LONGLONG increment) {
-  return _InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment);
-}
-
-#else
-inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
-                                                   PVOID newval, PVOID oldval) {
-  return ::InterlockedCompareExchangePointer(ptr, newval, oldval);
-}
-inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
-  return ::InterlockedExchangePointer(ptr, newval);
-}
-inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
-                                         LONGLONG increment) {
-  return ::InterlockedExchangeAdd64(ptr, increment);
-}
-
-#endif  // ifdef __MINGW64__
-}  // extern "C"
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  PVOID result = FastInterlockedCompareExchangePointer(
-    reinterpret_cast<volatile PVOID*>(ptr),
-    reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value));
-  return reinterpret_cast<Atomic64>(result);
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  PVOID result = FastInterlockedExchangePointer(
-    reinterpret_cast<volatile PVOID*>(ptr),
-    reinterpret_cast<PVOID>(new_value));
-  return reinterpret_cast<Atomic64>(result);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_AtomicExchange(ptr, value);
-              // acts as a barrier in this implementation
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value; // works w/o barrier for current Intel chips as of June 2005
-
-  // When new chips come out, check:
-  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
-  //  System Programming Guide, Chatper 7: Multiple-processor management,
-  //  Section 7.2, Memory Ordering.
-  // Last seen at:
-  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = *ptr;
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-#else  // defined(_WIN64) || defined(__MINGW64__)
-
-// 64-bit low-level operations on 32-bit platform
-
-// TODO(vchen): The GNU assembly below must be converted to MSVC inline
-// assembly.  Then the file should be renamed to ...-x86-msvc.h, probably.
-
-inline void NotImplementedFatalError(const char *function_name) {
-  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
-          function_name);
-  abort();
-}
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-#if 0 // Not implemented
-  Atomic64 prev;
-  __asm__ __volatile__("movl (%3), %%ebx\n\t"    // Move 64-bit new_value into
-                       "movl 4(%3), %%ecx\n\t"   // ecx:ebx
-                       "lock; cmpxchg8b %1\n\t"  // If edx:eax (old_value) same
-                       : "=A" (prev)             // as contents of ptr:
-                       : "m" (*ptr),             //   ecx:ebx => ptr
-                         "0" (old_value),        // else:
-                         "r" (&new_value)        //   old *ptr => edx:eax
-                       : "memory", "%ebx", "%ecx");
-  return prev;
-#else
-  NotImplementedFatalError("NoBarrier_CompareAndSwap");
-  return 0;
-#endif
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-#if 0 // Not implemented
-  __asm__ __volatile__(
-                       "movl (%2), %%ebx\n\t"    // Move 64-bit new_value into
-                       "movl 4(%2), %%ecx\n\t"   // ecx:ebx
-                       "0:\n\t"
-                       "movl %1, %%eax\n\t"      // Read contents of ptr into
-                       "movl 4%1, %%edx\n\t"     // edx:eax
-                       "lock; cmpxchg8b %1\n\t"  // Attempt cmpxchg; if *ptr
-                       "jnz 0b\n\t"              // is no longer edx:eax, loop
-                       : "=A" (new_value)
-                       : "m" (*ptr),
-                         "r" (&new_value)
-                       : "memory", "%ebx", "%ecx");
-  return new_value;  // Now it's the previous value.
-#else
-  NotImplementedFatalError("NoBarrier_AtomicExchange");
-  return 0;
-#endif
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptrValue, Atomic64 value)
-{
- 	__asm {
-    	movq mm0, value;  // Use mmx reg for 64-bit atomic moves
-    	mov eax, ptrValue;
-    	movq [eax], mm0;
-    	emms;            // Empty mmx state to enable FP registers
-  	}
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_AtomicExchange(ptr, value);
-              // acts as a barrier in this implementation
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_Store(ptr, value);
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptrValue)
-{
-  	Atomic64 value;
-  	__asm {
-    	mov eax, ptrValue;
-    	movq mm0, [eax]; // Use mmx reg for 64-bit atomic moves
-    	movq value, mm0;
-    	emms; // Empty mmx state to enable FP registers
-  }
-  return value;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-
-#endif  // defined(_WIN64) || defined(__MINGW64__)
-
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // FastInterlockedExchange has both acquire and release memory barriers.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // FastInterlockedExchange has both acquire and release memory barriers.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-x86.cc b/contrib/libtcmalloc/src/base/atomicops-internals-x86.cc
deleted file mode 100644
index c3391e78234..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-x86.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2007, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This module gets enough CPU information to optimize the
- * atomicops module on x86.
- */
-
-#include "base/atomicops.h"
-#include "base/basictypes.h"
-#include "base/googleinit.h"
-#include "base/logging.h"
-#include <string.h>
-
-// This file only makes sense with atomicops-internals-x86.h -- it
-// depends on structs that are defined in that file.  If atomicops.h
-// doesn't sub-include that file, then we aren't needed, and shouldn't
-// try to do anything.
-#ifdef BASE_ATOMICOPS_INTERNALS_X86_H_
-
-// Inline cpuid instruction.  In PIC compilations, %ebx contains the address
-// of the global offset table.  To avoid breaking such executables, this code
-// must preserve that register's value across cpuid instructions.
-#if defined(__i386__)
-#define cpuid(a, b, c, d, inp) \
-  asm ("mov %%ebx, %%edi\n"    \
-       "cpuid\n"               \
-       "xchg %%edi, %%ebx\n"   \
-       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
-#elif defined (__x86_64__)
-#define cpuid(a, b, c, d, inp) \
-  asm ("mov %%rbx, %%rdi\n"    \
-       "cpuid\n"               \
-       "xchg %%rdi, %%rbx\n"   \
-       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
-#endif
-
-#if defined(cpuid)        // initialize the struct only on x86
-
-// Set the flags so that code will run correctly and conservatively
-// until InitGoogle() is called.
-struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = {
-  false,          // no SSE2
-  false           // no cmpxchg16b
-};
-
-// Initialize the AtomicOps_Internalx86CPUFeatures struct.
-static void AtomicOps_Internalx86CPUFeaturesInit() {
-  uint32 eax;
-  uint32 ebx;
-  uint32 ecx;
-  uint32 edx;
-
-  // Get vendor string (issue CPUID with eax = 0)
-  cpuid(eax, ebx, ecx, edx, 0);
-  char vendor[13];
-  memcpy(vendor, &ebx, 4);
-  memcpy(vendor + 4, &edx, 4);
-  memcpy(vendor + 8, &ecx, 4);
-  vendor[12] = 0;
-
-  // get feature flags in ecx/edx, and family/model in eax
-  cpuid(eax, ebx, ecx, edx, 1);
-
-  int family = (eax >> 8) & 0xf;        // family and model fields
-  int model = (eax >> 4) & 0xf;
-  if (family == 0xf) {                  // use extended family and model fields
-    family += (eax >> 20) & 0xff;
-    model += ((eax >> 16) & 0xf) << 4;
-  }
-
-  // edx bit 26 is SSE2 which we use to tell use whether we can use mfence
-  AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1);
-
-  // ecx bit 13 indicates whether the cmpxchg16b instruction is supported
-  AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1);
-}
-
-REGISTER_MODULE_INITIALIZER(atomicops_x86, {
-  AtomicOps_Internalx86CPUFeaturesInit();
-});
-
-#endif
-
-#endif  /* ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ */
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-x86.h b/contrib/libtcmalloc/src/base/atomicops-internals-x86.h
deleted file mode 100644
index e441ac7e673..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-x86.h
+++ /dev/null
@@ -1,391 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-// Implementation of atomic operations for x86.  This file should not
-// be included directly.  Clients should instead include
-// "base/atomicops.h".
-
-#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_
-#define BASE_ATOMICOPS_INTERNALS_X86_H_
-#include "base/basictypes.h"
-
-typedef int32_t Atomic32;
-#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
-
-
-// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it
-// already matches Atomic32 or Atomic64, depending on the platform.
-
-
-// This struct is not part of the public API of this module; clients may not
-// use it.
-// Features of this x86.  Values may not be correct before main() is run,
-// but are set conservatively.
-struct AtomicOps_x86CPUFeatureStruct {
-  bool has_sse2;            // Processor has SSE2.
-  bool has_cmpxchg16b;      // Processor supports cmpxchg16b instruction.
-};
-
-ATTRIBUTE_VISIBILITY_HIDDEN
-extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
-
-
-#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
-
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-// 32-bit low-level operations on any platform.
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev;
-  __asm__ __volatile__("lock; cmpxchgl %1,%2"
-                       : "=a" (prev)
-                       : "q" (new_value), "m" (*ptr), "0" (old_value)
-                       : "memory");
-  return prev;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
-                       : "=r" (new_value)
-                       : "m" (*ptr), "0" (new_value)
-                       : "memory");
-  return new_value;  // Now it's the previous value.
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
-  return old_val;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // xchgl already has release memory barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-  return x;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-#if defined(__x86_64__)
-
-// 64-bit implementations of memory barrier can be simpler, because it
-// "mfence" is guaranteed to exist.
-inline void MemoryBarrier() {
-  __asm__ __volatile__("mfence" : : : "memory");
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-#else
-
-inline void MemoryBarrier() {
-  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
-    __asm__ __volatile__("mfence" : : : "memory");
-  } else { // mfence is faster but not present on PIII
-    Atomic32 x = 0;
-    Acquire_AtomicExchange(&x, 0);
-  }
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
-    *ptr = value;
-    __asm__ __volatile__("mfence" : : : "memory");
-  } else {
-    Acquire_AtomicExchange(ptr, value);
-  }
-}
-#endif
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  ATOMICOPS_COMPILER_BARRIER();
-  *ptr = value; // An x86 store acts as a release barrier.
-  // See comments in Atomic64 version of Release_Store(), below.
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
-  // See comments in Atomic64 version of Release_Store(), below.
-  ATOMICOPS_COMPILER_BARRIER();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-#if defined(__x86_64__)
-
-// 64-bit low-level operations on 64-bit platform.
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 prev;
-  __asm__ __volatile__("lock; cmpxchgq %1,%2"
-                       : "=a" (prev)
-                       : "q" (new_value), "m" (*ptr), "0" (old_value)
-                       : "memory");
-  return prev;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
-                       : "=r" (new_value)
-                       : "m" (*ptr), "0" (new_value)
-                       : "memory");
-  return new_value;  // Now it's the previous value.
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
-  return old_val;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // xchgq already has release memory barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  ATOMICOPS_COMPILER_BARRIER();
-
-  *ptr = value; // An x86 store acts as a release barrier
-                // for current AMD/Intel chips as of Jan 2008.
-                // See also Acquire_Load(), below.
-
-  // When new chips come out, check:
-  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
-  //  System Programming Guide, Chatper 7: Multiple-processor management,
-  //  Section 7.2, Memory Ordering.
-  // Last seen at:
-  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
-  //
-  // x86 stores/loads fail to act as barriers for a few instructions (clflush
-  // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
-  // not generated by the compiler, and are rare.  Users of these instructions
-  // need to know about cache behaviour in any case since all of these involve
-  // either flushing cache lines or non-temporal cache hints.
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
-                         // for current AMD/Intel chips as of Jan 2008.
-                         // See also Release_Store(), above.
-  ATOMICOPS_COMPILER_BARRIER();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-#else // defined(__x86_64__)
-
-// 64-bit low-level operations on 32-bit platform.
-
-#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
-// For compilers older than gcc 4.1, we use inline asm.
-//
-// Potential pitfalls:
-//
-// 1. %ebx points to Global offset table (GOT) with -fPIC.
-//    We need to preserve this register.
-// 2. When explicit registers are used in inline asm, the
-//    compiler may not be aware of it and might try to reuse
-//    the same register for another argument which has constraints
-//    that allow it ("r" for example).
-
-inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,
-                                            Atomic64 old_value,
-                                            Atomic64 new_value) {
-  Atomic64 prev;
-  __asm__ __volatile__("push %%ebx\n\t"
-                       "movl (%3), %%ebx\n\t"    // Move 64-bit new_value into
-                       "movl 4(%3), %%ecx\n\t"   // ecx:ebx
-                       "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same
-                       "pop %%ebx\n\t"
-                       : "=A" (prev)             // as contents of ptr:
-                       : "D" (ptr),              //   ecx:ebx => ptr
-                         "0" (old_value),        // else:
-                         "S" (&new_value)        //   old *ptr => edx:eax
-                       : "memory", "%ecx");
-  return prev;
-}
-#endif  // Compiler < gcc-4.1
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_val,
-                                         Atomic64 new_val) {
-  return __sync_val_compare_and_swap(ptr, old_val, new_val);
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_val) {
-  Atomic64 old_val;
-
-  do {
-    old_val = *ptr;
-  } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
-
-  return old_val;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_val) {
-  Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
-  return old_val;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_val) {
- return NoBarrier_AtomicExchange(ptr, new_val);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
-                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
-                       "emms\n\t"            // Empty mmx state/Reset FP regs
-                       : "=m" (*ptr)
-                       : "m" (value)
-                       : // mark the FP stack and mmx registers as clobbered
-			 "st", "st(1)", "st(2)", "st(3)", "st(4)",
-                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
-                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_Store(ptr, value);
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  ATOMICOPS_COMPILER_BARRIER();
-  NoBarrier_Store(ptr, value);
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  Atomic64 value;
-  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
-                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
-                       "emms\n\t"            // Empty mmx state/Reset FP regs
-                       : "=m" (value)
-                       : "m" (*ptr)
-                       : // mark the FP stack and mmx registers as clobbered
-                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
-                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
-                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
-  return value;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  ATOMICOPS_COMPILER_BARRIER();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-
-#endif // defined(__x86_64__)
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-  return x;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-} // namespace base::subtle
-} // namespace base
-
-#undef ATOMICOPS_COMPILER_BARRIER
-
-#endif  // BASE_ATOMICOPS_INTERNALS_X86_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops.h b/contrib/libtcmalloc/src/base/atomicops.h
deleted file mode 100644
index 46a4b9bb7a2..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops.h
+++ /dev/null
@@ -1,399 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-// For atomic operations on statistics counters, see atomic_stats_counter.h.
-// For atomic operations on sequence numbers, see atomic_sequence_num.h.
-// For atomic operations on reference counts, see atomic_refcount.h.
-
-// Some fast atomic operations -- typically with machine-dependent
-// implementations.  This file may need editing as Google code is
-// ported to different architectures.
-
-// The routines exported by this module are subtle.  If you use them, even if
-// you get the code right, it will depend on careful reasoning about atomicity
-// and memory ordering; it will be less readable, and harder to maintain.  If
-// you plan to use these routines, you should have a good reason, such as solid
-// evidence that performance would otherwise suffer, or there being no
-// alternative.  You should assume only properties explicitly guaranteed by the
-// specifications in this file.  You are almost certainly _not_ writing code
-// just for the x86; if you assume x86 semantics, x86 hardware bugs and
-// implementations on other archtectures will cause your code to break.  If you
-// do not know what you are doing, avoid these routines, and use a Mutex.
-//
-// These following lower-level operations are typically useful only to people
-// implementing higher-level synchronization operations like spinlocks,
-// mutexes, and condition-variables.  They combine CompareAndSwap(), a load, or
-// a store with appropriate memory-ordering instructions.  "Acquire" operations
-// ensure that no later memory access can be reordered ahead of the operation.
-// "Release" operations ensure that no previous memory access can be reordered
-// after the operation.  "Barrier" operations have both "Acquire" and "Release"
-// semantics.   A MemoryBarrier() has "Barrier" semantics, but does no memory
-// access.
-//
-// It is incorrect to make direct assignments to/from an atomic variable.
-// You should use one of the Load or Store routines.  The NoBarrier
-// versions are provided when no barriers are needed:
-//   NoBarrier_Store()
-//   NoBarrier_Load()
-// Although there are currently no compiler enforcement, you are encouraged
-// to use these.  Moreover, if you choose to use base::subtle::Atomic64 type,
-// you MUST use one of the Load or Store routines to get correct behavior
-// on 32-bit platforms.
-//
-// The intent is eventually to put all of these routines in namespace
-// base::subtle
-
-#ifndef THREAD_ATOMICOPS_H_
-#define THREAD_ATOMICOPS_H_
-
-#include "../config.h"
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-
-// ------------------------------------------------------------------------
-// Include the platform specific implementations of the types
-// and operations listed below.  Implementations are to provide Atomic32
-// and Atomic64 operations. If there is a mismatch between intptr_t and
-// the Atomic32 or Atomic64 types for a platform, the platform-specific header
-// should define the macro, AtomicWordCastType in a clause similar to the
-// following:
-// #if ...pointers are 64 bits...
-// # define AtomicWordCastType base::subtle::Atomic64
-// #else
-// # define AtomicWordCastType Atomic32
-// #endif
-// TODO(csilvers): figure out ARCH_PIII/ARCH_K8 (perhaps via ./configure?)
-// ------------------------------------------------------------------------
-
-#include "base/arm_instruction_set_select.h"
-#define GCC_VERSION (__GNUC__ * 10000                 \
-                     + __GNUC_MINOR__ * 100           \
-                     + __GNUC_PATCHLEVEL__)
-
-#define CLANG_VERSION (__clang_major__ * 10000         \
-                       + __clang_minor__ * 100         \
-                       + __clang_patchlevel__)
-
-#if defined(TCMALLOC_PREFER_GCC_ATOMICS) && defined(__GNUC__) && GCC_VERSION >= 40700
-#include "base/atomicops-internals-gcc.h"
-#elif defined(TCMALLOC_PREFER_GCC_ATOMICS) && defined(__clang__) && CLANG_VERSION >= 30400
-#include "base/atomicops-internals-gcc.h"
-#elif defined(__MACH__) && defined(__APPLE__)
-#include "base/atomicops-internals-macosx.h"
-#elif defined(__GNUC__) && defined(ARMV6)
-#include "base/atomicops-internals-arm-v6plus.h"
-#elif defined(ARMV3)
-#include "base/atomicops-internals-arm-generic.h"
-#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__))
-#include "base/atomicops-internals-x86.h"
-#elif defined(_WIN32)
-#include "base/atomicops-internals-windows.h"
-#elif defined(__linux__) && defined(__PPC__)
-#include "base/atomicops-internals-linuxppc.h"
-#elif defined(__GNUC__) && defined(__mips__)
-#include "base/atomicops-internals-mips.h"
-#elif defined(__GNUC__) && GCC_VERSION >= 40700
-#include "base/atomicops-internals-gcc.h"
-#elif defined(__clang__) && CLANG_VERSION >= 30400
-#include "base/atomicops-internals-gcc.h"
-#else
-#error You need to implement atomic operations for this architecture
-#endif
-
-// Signed type that can hold a pointer and supports the atomic ops below, as
-// well as atomic loads and stores.  Instances must be naturally-aligned.
-typedef intptr_t AtomicWord;
-
-#ifdef AtomicWordCastType
-// ------------------------------------------------------------------------
-// This section is needed only when explicit type casting is required to
-// cast AtomicWord to one of the basic atomic types (Atomic64 or Atomic32).
-// It also serves to document the AtomicWord interface.
-// ------------------------------------------------------------------------
-
-namespace base {
-namespace subtle {
-
-// Atomically execute:
-//      result = *ptr;
-//      if (*ptr == old_value)
-//        *ptr = new_value;
-//      return result;
-//
-// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value".
-// Always return the old value of "*ptr"
-//
-// This routine implies no memory barriers.
-inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr,
-                                           AtomicWord old_value,
-                                           AtomicWord new_value) {
-  return NoBarrier_CompareAndSwap(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
-      old_value, new_value);
-}
-
-// Atomically store new_value into *ptr, returning the previous value held in
-// *ptr.  This routine implies no memory barriers.
-inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr,
-                                           AtomicWord new_value) {
-  return NoBarrier_AtomicExchange(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
-}
-
-inline AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr,
-                                         AtomicWord new_value) {
-  return Acquire_AtomicExchange(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
-}
-
-inline AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr,
-                                         AtomicWord new_value) {
-  return Release_AtomicExchange(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
-}
-
-inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
-                                         AtomicWord old_value,
-                                         AtomicWord new_value) {
-  return base::subtle::Acquire_CompareAndSwap(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
-      old_value, new_value);
-}
-
-inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr,
-                                         AtomicWord old_value,
-                                         AtomicWord new_value) {
-  return base::subtle::Release_CompareAndSwap(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
-      old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile AtomicWord *ptr, AtomicWord value) {
-  NoBarrier_Store(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
-}
-
-inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) {
-  return base::subtle::Acquire_Store(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
-}
-
-inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) {
-  return base::subtle::Release_Store(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
-}
-
-inline AtomicWord NoBarrier_Load(volatile const AtomicWord *ptr) {
-  return NoBarrier_Load(
-      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
-}
-
-inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) {
-  return base::subtle::Acquire_Load(
-      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
-}
-
-inline AtomicWord Release_Load(volatile const AtomicWord* ptr) {
-  return base::subtle::Release_Load(
-      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
-}
-
-}  // namespace base::subtle
-}  // namespace base
-#endif  // AtomicWordCastType
-
-// ------------------------------------------------------------------------
-// Commented out type definitions and method declarations for documentation
-// of the interface provided by this module.
-// ------------------------------------------------------------------------
-
-#if 0
-
-// Signed 32-bit type that supports the atomic ops below, as well as atomic
-// loads and stores.  Instances must be naturally aligned.  This type differs
-// from AtomicWord in 64-bit binaries where AtomicWord is 64-bits.
-typedef int32_t Atomic32;
-
-// Corresponding operations on Atomic32
-namespace base {
-namespace subtle {
-
-// Signed 64-bit type that supports the atomic ops below, as well as atomic
-// loads and stores.  Instances must be naturally aligned.  This type differs
-// from AtomicWord in 32-bit binaries where AtomicWord is 32-bits.
-typedef int64_t Atomic64;
-
-Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                  Atomic32 old_value,
-                                  Atomic32 new_value);
-Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
-Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
-Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
-Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                Atomic32 old_value,
-                                Atomic32 new_value);
-Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                Atomic32 old_value,
-                                Atomic32 new_value);
-void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value);
-void Acquire_Store(volatile Atomic32* ptr, Atomic32 value);
-void Release_Store(volatile Atomic32* ptr, Atomic32 value);
-Atomic32 NoBarrier_Load(volatile const Atomic32* ptr);
-Atomic32 Acquire_Load(volatile const Atomic32* ptr);
-Atomic32 Release_Load(volatile const Atomic32* ptr);
-
-// Corresponding operations on Atomic64
-Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                  Atomic64 old_value,
-                                  Atomic64 new_value);
-Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
-Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
-Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
-
-Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                Atomic64 old_value,
-                                Atomic64 new_value);
-Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                Atomic64 old_value,
-                                Atomic64 new_value);
-void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value);
-void Acquire_Store(volatile Atomic64* ptr, Atomic64 value);
-void Release_Store(volatile Atomic64* ptr, Atomic64 value);
-Atomic64 NoBarrier_Load(volatile const Atomic64* ptr);
-Atomic64 Acquire_Load(volatile const Atomic64* ptr);
-Atomic64 Release_Load(volatile const Atomic64* ptr);
-}  // namespace base::subtle
-}  // namespace base
-
-void MemoryBarrier();
-
-#endif  // 0
-
-
-// ------------------------------------------------------------------------
-// The following are to be deprecated when all uses have been changed to
-// use the base::subtle namespace.
-// ------------------------------------------------------------------------
-
-#ifdef AtomicWordCastType
-// AtomicWord versions to be deprecated
-inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
-                                         AtomicWord old_value,
-                                         AtomicWord new_value) {
-  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr,
-                                         AtomicWord old_value,
-                                         AtomicWord new_value) {
-  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) {
-  return base::subtle::Acquire_Store(ptr, value);
-}
-
-inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) {
-  return base::subtle::Release_Store(ptr, value);
-}
-
-inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) {
-  return base::subtle::Acquire_Load(ptr);
-}
-
-inline AtomicWord Release_Load(volatile const AtomicWord* ptr) {
-  return base::subtle::Release_Load(ptr);
-}
-#endif  // AtomicWordCastType
-
-// 32-bit Acquire/Release operations to be deprecated.
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
-}
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  base::subtle::Acquire_Store(ptr, value);
-}
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  return base::subtle::Release_Store(ptr, value);
-}
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  return base::subtle::Acquire_Load(ptr);
-}
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  return base::subtle::Release_Load(ptr);
-}
-
-#ifdef BASE_HAS_ATOMIC64
-
-// 64-bit Acquire/Release operations to be deprecated.
-
-inline base::subtle::Atomic64 Acquire_CompareAndSwap(
-    volatile base::subtle::Atomic64* ptr,
-    base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) {
-  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-inline base::subtle::Atomic64 Release_CompareAndSwap(
-    volatile base::subtle::Atomic64* ptr,
-    base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) {
-  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
-}
-inline void Acquire_Store(
-    volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) {
-  base::subtle::Acquire_Store(ptr, value);
-}
-inline void Release_Store(
-    volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) {
-  return base::subtle::Release_Store(ptr, value);
-}
-inline base::subtle::Atomic64 Acquire_Load(
-    volatile const base::subtle::Atomic64* ptr) {
-  return base::subtle::Acquire_Load(ptr);
-}
-inline base::subtle::Atomic64 Release_Load(
-    volatile const base::subtle::Atomic64* ptr) {
-  return base::subtle::Release_Load(ptr);
-}
-
-#endif  // BASE_HAS_ATOMIC64
-
-#endif  // THREAD_ATOMICOPS_H_
diff --git a/contrib/libtcmalloc/src/base/basictypes.h b/contrib/libtcmalloc/src/base/basictypes.h
deleted file mode 100644
index a81d0466c27..00000000000
--- a/contrib/libtcmalloc/src/base/basictypes.h
+++ /dev/null
@@ -1,408 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef _BASICTYPES_H_
-#define _BASICTYPES_H_
-
-#include "../config.h"
-#include <string.h>       // for memcpy()
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>     // gets us PRId64, etc
-#endif
-
-// To use this in an autoconf setting, make sure you run the following
-// autoconf macros:
-//    AC_HEADER_STDC              /* for stdint_h and inttypes_h */
-//    AC_CHECK_TYPES([__int64])   /* defined in some windows platforms */
-
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>           // uint16_t might be here; PRId64 too.
-#endif
-#ifdef HAVE_STDINT_H
-#include <stdint.h>             // to get uint16_t (ISO naming madness)
-#endif
-#include <sys/types.h>          // our last best hope for uint16_t
-
-// Standard typedefs
-// All Google code is compiled with -funsigned-char to make "char"
-// unsigned.  Google code therefore doesn't need a "uchar" type.
-// TODO(csilvers): how do we make sure unsigned-char works on non-gcc systems?
-typedef signed char         schar;
-typedef int8_t              int8;
-typedef int16_t             int16;
-typedef int32_t             int32;
-typedef int64_t             int64;
-
-// NOTE: unsigned types are DANGEROUS in loops and other arithmetical
-// places.  Use the signed types unless your variable represents a bit
-// pattern (eg a hash value) or you really need the extra bit.  Do NOT
-// use 'unsigned' to express "this value should always be positive";
-// use assertions for this.
-
-typedef uint8_t            uint8;
-typedef uint16_t           uint16;
-typedef uint32_t           uint32;
-typedef uint64_t           uint64;
-
-const uint16 kuint16max = (   (uint16) 0xFFFF);
-const uint32 kuint32max = (   (uint32) 0xFFFFFFFF);
-const uint64 kuint64max = ( (((uint64) kuint32max) << 32) | kuint32max );
-
-const  int8  kint8max   = (   (  int8) 0x7F);
-const  int16 kint16max  = (   ( int16) 0x7FFF);
-const  int32 kint32max  = (   ( int32) 0x7FFFFFFF);
-const  int64 kint64max =  ( ((( int64) kint32max) << 32) | kuint32max );
-
-const  int8  kint8min   = (   (  int8) 0x80);
-const  int16 kint16min  = (   ( int16) 0x8000);
-const  int32 kint32min  = (   ( int32) 0x80000000);
-const  int64 kint64min =  ( (((uint64) kint32min) << 32) | 0 );
-
-// Define the "portable" printf and scanf macros, if they're not
-// already there (via the inttypes.h we #included above, hopefully).
-// Mostly it's old systems that don't support inttypes.h, so we assume
-// they're 32 bit.
-#ifndef PRIx64
-#define PRIx64 "llx"
-#endif
-#ifndef SCNx64
-#define SCNx64 "llx"
-#endif
-#ifndef PRId64
-#define PRId64 "lld"
-#endif
-#ifndef SCNd64
-#define SCNd64 "lld"
-#endif
-#ifndef PRIu64
-#define PRIu64 "llu"
-#endif
-#ifndef PRIxPTR
-#define PRIxPTR "lx"
-#endif
-
-// Also allow for printing of a pthread_t.
-#define GPRIuPTHREAD "lu"
-#define GPRIxPTHREAD "lx"
-#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) || defined(__FreeBSD__)
-#define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt)
-#else
-#define PRINTABLE_PTHREAD(pthreadt) pthreadt
-#endif
-
-// A macro to disallow the evil copy constructor and operator= functions
-// This should be used in the private: declarations for a class
-#define DISALLOW_EVIL_CONSTRUCTORS(TypeName)    \
-  TypeName(const TypeName&);                    \
-  void operator=(const TypeName&)
-
-// An alternate name that leaves out the moral judgment... :-)
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) DISALLOW_EVIL_CONSTRUCTORS(TypeName)
-
-// The COMPILE_ASSERT macro can be used to verify that a compile time
-// expression is true. For example, you could use it to verify the
-// size of a static array:
-//
-//   COMPILE_ASSERT(sizeof(num_content_type_names) == sizeof(int),
-//                  content_type_names_incorrect_size);
-//
-// or to make sure a struct is smaller than a certain size:
-//
-//   COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
-//
-// The second argument to the macro is the name of the variable. If
-// the expression is false, most compilers will issue a warning/error
-// containing the name of the variable.
-//
-// Implementation details of COMPILE_ASSERT:
-//
-// - COMPILE_ASSERT works by defining an array type that has -1
-//   elements (and thus is invalid) when the expression is false.
-//
-// - The simpler definition
-//
-//     #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
-//
-//   does not work, as gcc supports variable-length arrays whose sizes
-//   are determined at run-time (this is gcc's extension and not part
-//   of the C++ standard).  As a result, gcc fails to reject the
-//   following code with the simple definition:
-//
-//     int foo;
-//     COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
-//                               // not a compile-time constant.
-//
-// - By using the type CompileAssert<(bool(expr))>, we ensures that
-//   expr is a compile-time constant.  (Template arguments must be
-//   determined at compile-time.)
-//
-// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
-//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
-//
-//     CompileAssert<bool(expr)>
-//
-//   instead, these compilers will refuse to compile
-//
-//     COMPILE_ASSERT(5 > 0, some_message);
-//
-//   (They seem to think the ">" in "5 > 0" marks the end of the
-//   template argument list.)
-//
-// - The array size is (bool(expr) ? 1 : -1), instead of simply
-//
-//     ((expr) ? 1 : -1).
-//
-//   This is to avoid running into a bug in MS VC 7.1, which
-//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
-
-template <bool>
-struct CompileAssert {
-};
-
-#ifdef HAVE___ATTRIBUTE__
-# define ATTRIBUTE_UNUSED __attribute__((unused))
-#else
-# define ATTRIBUTE_UNUSED
-#endif
-
-#if defined(HAVE___ATTRIBUTE__) && defined(HAVE_TLS)
-#define ATTR_INITIAL_EXEC __attribute__ ((tls_model ("initial-exec")))
-#else
-#define ATTR_INITIAL_EXEC
-#endif
-
-#define COMPILE_ASSERT(expr, msg)                               \
-  typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED
-
-#define arraysize(a)  (sizeof(a) / sizeof(*(a)))
-
-#define OFFSETOF_MEMBER(strct, field)                                   \
-   (reinterpret_cast<char*>(&reinterpret_cast<strct*>(16)->field) -     \
-    reinterpret_cast<char*>(16))
-
-// bit_cast<Dest,Source> implements the equivalent of
-// "*reinterpret_cast<Dest*>(&source)".
-//
-// The reinterpret_cast method would produce undefined behavior
-// according to ISO C++ specification section 3.10 -15 -.
-// bit_cast<> calls memcpy() which is blessed by the standard,
-// especially by the example in section 3.9.
-//
-// Fortunately memcpy() is very fast.  In optimized mode, with a
-// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
-// code with the minimal amount of data movement.  On a 32-bit system,
-// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
-// compiles to two loads and two stores.
-
-template <class Dest, class Source>
-inline Dest bit_cast(const Source& source) {
-  COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes);
-  Dest dest;
-  memcpy(&dest, &source, sizeof(dest));
-  return dest;
-}
-
-// bit_store<Dest,Source> implements the equivalent of
-// "dest = *reinterpret_cast<Dest*>(&source)".
-//
-// This prevents undefined behavior when the dest pointer is unaligned.
-template <class Dest, class Source>
-inline void bit_store(Dest *dest, const Source *source) {
-  COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes);
-  memcpy(dest, source, sizeof(Dest));
-}
-
-#ifdef HAVE___ATTRIBUTE__
-# define ATTRIBUTE_WEAK      __attribute__((weak))
-# define ATTRIBUTE_NOINLINE  __attribute__((noinline))
-#else
-# define ATTRIBUTE_WEAK
-# define ATTRIBUTE_NOINLINE
-#endif
-
-#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__)
-# define ATTRIBUTE_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
-#else
-# define ATTRIBUTE_VISIBILITY_HIDDEN
-#endif
-
-// Section attributes are supported for both ELF and Mach-O, but in
-// very different ways.  Here's the API we provide:
-// 1) ATTRIBUTE_SECTION: put this with the declaration of all functions
-//    you want to be in the same linker section
-// 2) DEFINE_ATTRIBUTE_SECTION_VARS: must be called once per unique
-//    name.  You want to make sure this is executed before any
-//    DECLARE_ATTRIBUTE_SECTION_VARS; the easiest way is to put them
-//    in the same .cc file.  Put this call at the global level.
-// 3) INIT_ATTRIBUTE_SECTION_VARS: you can scatter calls to this in
-//    multiple places to help ensure execution before any
-//    DECLARE_ATTRIBUTE_SECTION_VARS.  You must have at least one
-//    DEFINE, but you can have many INITs.  Put each in its own scope.
-// 4) DECLARE_ATTRIBUTE_SECTION_VARS: must be called before using
-//    ATTRIBUTE_SECTION_START or ATTRIBUTE_SECTION_STOP on a name.
-//    Put this call at the global level.
-// 5) ATTRIBUTE_SECTION_START/ATTRIBUTE_SECTION_STOP: call this to say
-//    where in memory a given section is.  All functions declared with
-//    ATTRIBUTE_SECTION are guaranteed to be between START and STOP.
-
-#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__)
-# define ATTRIBUTE_SECTION(name) __attribute__ ((section (#name)))
-
-  // Weak section declaration to be used as a global declaration
-  // for ATTRIBUTE_SECTION_START|STOP(name) to compile and link
-  // even without functions with ATTRIBUTE_SECTION(name).
-# define DECLARE_ATTRIBUTE_SECTION_VARS(name) \
-    extern char __start_##name[] ATTRIBUTE_WEAK; \
-    extern char __stop_##name[] ATTRIBUTE_WEAK
-# define INIT_ATTRIBUTE_SECTION_VARS(name)     // no-op for ELF
-# define DEFINE_ATTRIBUTE_SECTION_VARS(name)   // no-op for ELF
-
-  // Return void* pointers to start/end of a section of code with functions
-  // having ATTRIBUTE_SECTION(name), or 0 if no such function exists.
-  // One must DECLARE_ATTRIBUTE_SECTION(name) for this to compile and link.
-# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name))
-# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name))
-# define HAVE_ATTRIBUTE_SECTION_START 1
-
-#elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__)
-# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name)))
-
-#include <mach-o/getsect.h>
-#include <mach-o/dyld.h>
-class AssignAttributeStartEnd {
- public:
-  AssignAttributeStartEnd(const char* name, char** pstart, char** pend) {
-    // Find out what dynamic library name is defined in
-    if (_dyld_present()) {
-      for (int i = _dyld_image_count() - 1; i >= 0; --i) {
-        const mach_header* hdr = _dyld_get_image_header(i);
-#ifdef MH_MAGIC_64
-        if (hdr->magic == MH_MAGIC_64) {
-          uint64_t len;
-          *pstart = getsectdatafromheader_64((mach_header_64*)hdr,
-                                             "__TEXT", name, &len);
-          if (*pstart) {   // NULL if not defined in this dynamic library
-            *pstart += _dyld_get_image_vmaddr_slide(i);   // correct for reloc
-            *pend = *pstart + len;
-            return;
-          }
-        }
-#endif
-        if (hdr->magic == MH_MAGIC) {
-          uint32_t len;
-          *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len);
-          if (*pstart) {   // NULL if not defined in this dynamic library
-            *pstart += _dyld_get_image_vmaddr_slide(i);   // correct for reloc
-            *pend = *pstart + len;
-            return;
-          }
-        }
-      }
-    }
-    // If we get here, not defined in a dll at all.  See if defined statically.
-    unsigned long len;    // don't ask me why this type isn't uint32_t too...
-    *pstart = getsectdata("__TEXT", name, &len);
-    *pend = *pstart + len;
-  }
-};
-
-#define DECLARE_ATTRIBUTE_SECTION_VARS(name)    \
-  extern char* __start_##name;                  \
-  extern char* __stop_##name
-
-#define INIT_ATTRIBUTE_SECTION_VARS(name)               \
-  DECLARE_ATTRIBUTE_SECTION_VARS(name);                 \
-  static const AssignAttributeStartEnd __assign_##name( \
-    #name, &__start_##name, &__stop_##name)
-
-#define DEFINE_ATTRIBUTE_SECTION_VARS(name)     \
-  char* __start_##name, *__stop_##name;         \
-  INIT_ATTRIBUTE_SECTION_VARS(name)
-
-# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name))
-# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name))
-# define HAVE_ATTRIBUTE_SECTION_START 1
-
-#else  // not HAVE___ATTRIBUTE__ && __ELF__, nor HAVE___ATTRIBUTE__ && __MACH__
-# define ATTRIBUTE_SECTION(name)
-# define DECLARE_ATTRIBUTE_SECTION_VARS(name)
-# define INIT_ATTRIBUTE_SECTION_VARS(name)
-# define DEFINE_ATTRIBUTE_SECTION_VARS(name)
-# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(0))
-# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(0))
-
-#endif  // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__
-
-#if defined(HAVE___ATTRIBUTE__)
-# if (defined(__i386__) || defined(__x86_64__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
-# elif (defined(__PPC__) || defined(__PPC64__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(16)))
-# elif (defined(__arm__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
-    // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
-# elif (defined(__mips__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(128)))
-# elif (defined(__aarch64__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
-    // implementation specific, Cortex-A53 and 57 should have 64 bytes
-# elif (defined(__s390__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(256)))
-# else
-#   error Could not determine cache line length - unknown architecture
-# endif
-#else
-# define CACHELINE_ALIGNED
-#endif  // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__)
-
-// Structure for discovering alignment
-union MemoryAligner {
-  void*  p;
-  double d;
-  size_t s;
-} CACHELINE_ALIGNED;
-
-// The following enum should be used only as a constructor argument to indicate
-// that the variable has static storage class, and that the constructor should
-// do nothing to its state.  It indicates to the reader that it is legal to
-// declare a static nistance of the class, provided the constructor is given
-// the base::LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a
-// static variable that has a constructor or a destructor because invocation
-// order is undefined.  However, IF the type can be initialized by filling with
-// zeroes (which the loader does for static variables), AND the destructor also
-// does nothing to the storage, then a constructor declared as
-//       explicit MyClass(base::LinkerInitialized x) {}
-// and invoked as
-//       static MyClass my_variable_name(base::LINKER_INITIALIZED);
-namespace base {
-enum LinkerInitialized { LINKER_INITIALIZED };
-}
-
-#endif  // _BASICTYPES_H_
diff --git a/contrib/libtcmalloc/src/base/commandlineflags.h b/contrib/libtcmalloc/src/base/commandlineflags.h
deleted file mode 100644
index e940edd3791..00000000000
--- a/contrib/libtcmalloc/src/base/commandlineflags.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// This file is a compatibility layer that defines Google's version of
-// command line flags that are used for configuration.
-//
-// We put flags into their own namespace.  It is purposefully
-// named in an opaque way that people should have trouble typing
-// directly.  The idea is that DEFINE puts the flag in the weird
-// namespace, and DECLARE imports the flag from there into the
-// current namespace.  The net result is to force people to use
-// DECLARE to get access to a flag, rather than saying
-//   extern bool FLAGS_logtostderr;
-// or some such instead.  We want this so we can put extra
-// functionality (like sanity-checking) in DECLARE if we want,
-// and make sure it is picked up everywhere.
-//
-// We also put the type of the variable in the namespace, so that
-// people can't DECLARE_int32 something that they DEFINE_bool'd
-// elsewhere.
-#ifndef BASE_COMMANDLINEFLAGS_H_
-#define BASE_COMMANDLINEFLAGS_H_
-
-#include "../config.h"
-#include <string>
-#include <string.h>               // for memchr
-#include <stdlib.h>               // for getenv
-#include "base/basictypes.h"
-
-#define DECLARE_VARIABLE(type, name)                                          \
-  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
-  extern PERFTOOLS_DLL_DECL type FLAGS_##name;                                \
-  }                                                                           \
-  using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
-
-#define DEFINE_VARIABLE(type, name, value, meaning) \
-  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
-  PERFTOOLS_DLL_DECL type FLAGS_##name(value);                                \
-  char FLAGS_no##name;                                                        \
-  }                                                                           \
-  using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
-
-// bool specialization
-#define DECLARE_bool(name) \
-  DECLARE_VARIABLE(bool, name)
-#define DEFINE_bool(name, value, meaning) \
-  DEFINE_VARIABLE(bool, name, value, meaning)
-
-// int32 specialization
-#define DECLARE_int32(name) \
-  DECLARE_VARIABLE(int32, name)
-#define DEFINE_int32(name, value, meaning) \
-  DEFINE_VARIABLE(int32, name, value, meaning)
-
-// int64 specialization
-#define DECLARE_int64(name) \
-  DECLARE_VARIABLE(int64, name)
-#define DEFINE_int64(name, value, meaning) \
-  DEFINE_VARIABLE(int64, name, value, meaning)
-
-#define DECLARE_uint64(name) \
-  DECLARE_VARIABLE(uint64, name)
-#define DEFINE_uint64(name, value, meaning) \
-  DEFINE_VARIABLE(uint64, name, value, meaning)
-
-// double specialization
-#define DECLARE_double(name) \
-  DECLARE_VARIABLE(double, name)
-#define DEFINE_double(name, value, meaning) \
-  DEFINE_VARIABLE(double, name, value, meaning)
-
-// Special case for string, because we have to specify the namespace
-// std::string, which doesn't play nicely with our FLAG__namespace hackery.
-#define DECLARE_string(name)                                          \
-  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead {  \
-  extern std::string FLAGS_##name;                                                   \
-  }                                                                           \
-  using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name
-#define DEFINE_string(name, value, meaning) \
-  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead {  \
-  std::string FLAGS_##name(value);                                                   \
-  char FLAGS_no##name;                                                        \
-  }                                                                           \
-  using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name
-
-// implemented in sysinfo.cc
-namespace tcmalloc {
-  namespace commandlineflags {
-
-    inline bool StringToBool(const char *value, bool def) {
-      if (!value) {
-        return def;
-      }
-      return memchr("tTyY1\0", value[0], 6) != NULL;
-    }
-
-    inline int StringToInt(const char *value, int def) {
-      if (!value) {
-        return def;
-      }
-      return strtol(value, NULL, 10);
-    }
-
-    inline long long StringToLongLong(const char *value, long long def) {
-      if (!value) {
-        return def;
-      }
-      return strtoll(value, NULL, 10);
-    }
-
-    inline double StringToDouble(const char *value, double def) {
-      if (!value) {
-        return def;
-      }
-      return strtod(value, NULL);
-    }
-  }
-}
-
-// These macros (could be functions, but I don't want to bother with a .cc
-// file), make it easier to initialize flags from the environment.
-
-#define EnvToString(envname, dflt)   \
-  (!getenv(envname) ? (dflt) : getenv(envname))
-
-#define EnvToBool(envname, dflt)   \
-  tcmalloc::commandlineflags::StringToBool(getenv(envname), dflt)
-
-#define EnvToInt(envname, dflt)  \
-  tcmalloc::commandlineflags::StringToInt(getenv(envname), dflt)
-
-#define EnvToInt64(envname, dflt)  \
-  tcmalloc::commandlineflags::StringToLongLong(getenv(envname), dflt)
-
-#define EnvToDouble(envname, dflt)  \
-  tcmalloc::commandlineflags::StringToDouble(getenv(envname), dflt)
-
-#endif  // BASE_COMMANDLINEFLAGS_H_
diff --git a/contrib/libtcmalloc/src/base/dynamic_annotations.c b/contrib/libtcmalloc/src/base/dynamic_annotations.c
deleted file mode 100644
index 87bd2ecde97..00000000000
--- a/contrib/libtcmalloc/src/base/dynamic_annotations.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/* Copyright (c) 2008-2009, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Kostya Serebryany
- */
-
-#ifdef __cplusplus
-# error "This file should be built as pure C to avoid name mangling"
-#endif
-
-#include "config.h"
-#include <stdlib.h>
-#include <string.h>
-
-#include "base/dynamic_annotations.h"
-#include "getenv_safe.h" // for TCMallocGetenvSafe
-
-#ifdef __GNUC__
-/* valgrind.h uses gcc extensions so it won't build with other compilers */
-# ifdef HAVE_VALGRIND_H    /* prefer the user's copy if they have it */
-#  include <valgrind.h>
-# else                     /* otherwise just use the copy that we have */
-#  include "third_party/valgrind.h"
-# endif
-#endif
-
-/* Compiler-based ThreadSanitizer defines
-   DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL = 1
-   and provides its own definitions of the functions. */
-
-#ifndef DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL
-# define DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL 0
-#endif
-
-/* Each function is empty and called (via a macro) only in debug mode.
-   The arguments are captured by dynamic tools at runtime. */
-
-#if DYNAMIC_ANNOTATIONS_ENABLED == 1 \
-    && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0
-
-void AnnotateRWLockCreate(const char *file, int line,
-                          const volatile void *lock){}
-void AnnotateRWLockDestroy(const char *file, int line,
-                           const volatile void *lock){}
-void AnnotateRWLockAcquired(const char *file, int line,
-                            const volatile void *lock, long is_w){}
-void AnnotateRWLockReleased(const char *file, int line,
-                            const volatile void *lock, long is_w){}
-void AnnotateBarrierInit(const char *file, int line,
-                         const volatile void *barrier, long count,
-                         long reinitialization_allowed) {}
-void AnnotateBarrierWaitBefore(const char *file, int line,
-                               const volatile void *barrier) {}
-void AnnotateBarrierWaitAfter(const char *file, int line,
-                              const volatile void *barrier) {}
-void AnnotateBarrierDestroy(const char *file, int line,
-                            const volatile void *barrier) {}
-
-void AnnotateCondVarWait(const char *file, int line,
-                         const volatile void *cv,
-                         const volatile void *lock){}
-void AnnotateCondVarSignal(const char *file, int line,
-                           const volatile void *cv){}
-void AnnotateCondVarSignalAll(const char *file, int line,
-                              const volatile void *cv){}
-void AnnotatePublishMemoryRange(const char *file, int line,
-                                const volatile void *address,
-                                long size){}
-void AnnotateUnpublishMemoryRange(const char *file, int line,
-                                  const volatile void *address,
-                                  long size){}
-void AnnotatePCQCreate(const char *file, int line,
-                       const volatile void *pcq){}
-void AnnotatePCQDestroy(const char *file, int line,
-                        const volatile void *pcq){}
-void AnnotatePCQPut(const char *file, int line,
-                    const volatile void *pcq){}
-void AnnotatePCQGet(const char *file, int line,
-                    const volatile void *pcq){}
-void AnnotateNewMemory(const char *file, int line,
-                       const volatile void *mem,
-                       long size){}
-void AnnotateExpectRace(const char *file, int line,
-                        const volatile void *mem,
-                        const char *description){}
-void AnnotateBenignRace(const char *file, int line,
-                        const volatile void *mem,
-                        const char *description){}
-void AnnotateBenignRaceSized(const char *file, int line,
-                             const volatile void *mem,
-                             long size,
-                             const char *description) {}
-void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
-                                  const volatile void *mu){}
-void AnnotateTraceMemory(const char *file, int line,
-                         const volatile void *arg){}
-void AnnotateThreadName(const char *file, int line,
-                        const char *name){}
-void AnnotateIgnoreReadsBegin(const char *file, int line){}
-void AnnotateIgnoreReadsEnd(const char *file, int line){}
-void AnnotateIgnoreWritesBegin(const char *file, int line){}
-void AnnotateIgnoreWritesEnd(const char *file, int line){}
-void AnnotateEnableRaceDetection(const char *file, int line, int enable){}
-void AnnotateNoOp(const char *file, int line,
-                  const volatile void *arg){}
-void AnnotateFlushState(const char *file, int line){}
-
-#endif  /* DYNAMIC_ANNOTATIONS_ENABLED == 1
-    && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */
-
-#if DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0
-
-static int GetRunningOnValgrind(void) {
-#ifdef RUNNING_ON_VALGRIND
-  if (RUNNING_ON_VALGRIND) return 1;
-#endif
-  const char *running_on_valgrind_str = TCMallocGetenvSafe("RUNNING_ON_VALGRIND");
-  if (running_on_valgrind_str) {
-    return strcmp(running_on_valgrind_str, "0") != 0;
-  }
-  return 0;
-}
-
-/* See the comments in dynamic_annotations.h */
-int RunningOnValgrind(void) {
-  static volatile int running_on_valgrind = -1;
-  int local_running_on_valgrind = running_on_valgrind;
-  /* C doesn't have thread-safe initialization of statics, and we
-     don't want to depend on pthread_once here, so hack it. */
-  ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack");
-  if (local_running_on_valgrind == -1)
-    running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
-  return local_running_on_valgrind;
-}
-
-#endif  /* DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */
-
-/* See the comments in dynamic_annotations.h */
-double ValgrindSlowdown(void) {
-  /* Same initialization hack as in RunningOnValgrind(). */
-  static volatile double slowdown = 0.0;
-  double local_slowdown = slowdown;
-  ANNOTATE_BENIGN_RACE(&slowdown, "safe hack");
-  if (RunningOnValgrind() == 0) {
-    return 1.0;
-  }
-  if (local_slowdown == 0.0) {
-    char *env = getenv("VALGRIND_SLOWDOWN");
-    slowdown = local_slowdown = env ? atof(env) : 50.0;
-  }
-  return local_slowdown;
-}
diff --git a/contrib/libtcmalloc/src/base/dynamic_annotations.h b/contrib/libtcmalloc/src/base/dynamic_annotations.h
deleted file mode 100644
index 4669315ced3..00000000000
--- a/contrib/libtcmalloc/src/base/dynamic_annotations.h
+++ /dev/null
@@ -1,627 +0,0 @@
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Kostya Serebryany
- */
-
-/* This file defines dynamic annotations for use with dynamic analysis
-   tool such as valgrind, PIN, etc.
-
-   Dynamic annotation is a source code annotation that affects
-   the generated code (that is, the annotation is not a comment).
-   Each such annotation is attached to a particular
-   instruction and/or to a particular object (address) in the program.
-
-   The annotations that should be used by users are macros in all upper-case
-   (e.g., ANNOTATE_NEW_MEMORY).
-
-   Actual implementation of these macros may differ depending on the
-   dynamic analysis tool being used.
-
-   See http://code.google.com/p/data-race-test/  for more information.
-
-   This file supports the following dynamic analysis tools:
-   - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero).
-      Macros are defined empty.
-   - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1).
-      Macros are defined as calls to non-inlinable empty functions
-      that are intercepted by Valgrind. */
-
-#ifndef BASE_DYNAMIC_ANNOTATIONS_H_
-#define BASE_DYNAMIC_ANNOTATIONS_H_
-
-#ifndef DYNAMIC_ANNOTATIONS_ENABLED
-# define DYNAMIC_ANNOTATIONS_ENABLED 0
-#endif
-
-#if DYNAMIC_ANNOTATIONS_ENABLED != 0
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing condition variables such as CondVar,
-     using conditional critical sections (Await/LockWhen) and when constructing
-     user-defined synchronization mechanisms.
-
-     The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can
-     be used to define happens-before arcs in user-defined synchronization
-     mechanisms:  the race detector will infer an arc from the former to the
-     latter when they share the same argument pointer.
-
-     Example 1 (reference counting):
-
-     void Unref() {
-       ANNOTATE_HAPPENS_BEFORE(&refcount_);
-       if (AtomicDecrementByOne(&refcount_) == 0) {
-         ANNOTATE_HAPPENS_AFTER(&refcount_);
-         delete this;
-       }
-     }
-
-     Example 2 (message queue):
-
-     void MyQueue::Put(Type *e) {
-       MutexLock lock(&mu_);
-       ANNOTATE_HAPPENS_BEFORE(e);
-       PutElementIntoMyQueue(e);
-     }
-
-     Type *MyQueue::Get() {
-       MutexLock lock(&mu_);
-       Type *e = GetElementFromMyQueue();
-       ANNOTATE_HAPPENS_AFTER(e);
-       return e;
-     }
-
-     Note: when possible, please use the existing reference counting and message
-     queue implementations instead of inventing new ones. */
-
-  /* Report that wait on the condition variable at address "cv" has succeeded
-     and the lock at address "lock" is held. */
-  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \
-    AnnotateCondVarWait(__FILE__, __LINE__, cv, lock)
-
-  /* Report that wait on the condition variable at "cv" has succeeded.  Variant
-     w/o lock. */
-  #define ANNOTATE_CONDVAR_WAIT(cv) \
-    AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL)
-
-  /* Report that we are about to signal on the condition variable at address
-     "cv". */
-  #define ANNOTATE_CONDVAR_SIGNAL(cv) \
-    AnnotateCondVarSignal(__FILE__, __LINE__, cv)
-
-  /* Report that we are about to signal_all on the condition variable at "cv". */
-  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \
-    AnnotateCondVarSignalAll(__FILE__, __LINE__, cv)
-
-  /* Annotations for user-defined synchronization mechanisms. */
-  #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj)
-  #define ANNOTATE_HAPPENS_AFTER(obj)  ANNOTATE_CONDVAR_WAIT(obj)
-
-  /* Report that the bytes in the range [pointer, pointer+size) are about
-     to be published safely. The race checker will create a happens-before
-     arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to
-     subsequent accesses to this memory.
-     Note: this annotation may not work properly if the race detector uses
-     sampling, i.e. does not observe all memory accesses.
-     */
-  #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \
-    AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size)
-
-  /* DEPRECATED. Don't use it. */
-  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \
-    AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size)
-
-  /* DEPRECATED. Don't use it. */
-  #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size)   \
-    do {                                              \
-      ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \
-      ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size);   \
-    } while (0)
-
-  /* Instruct the tool to create a happens-before arc between mu->Unlock() and
-     mu->Lock(). This annotation may slow down the race detector and hide real
-     races. Normally it is used only when it would be difficult to annotate each
-     of the mutex's critical sections individually using the annotations above.
-     This annotation makes sense only for hybrid race detectors. For pure
-     happens-before detectors this is a no-op. For more details see
-     http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */
-  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \
-    AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
-
-  /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */
-  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \
-    AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
-
-  /* -------------------------------------------------------------
-     Annotations useful when defining memory allocators, or when memory that
-     was protected in one way starts to be protected in another. */
-
-  /* Report that a new memory at "address" of size "size" has been allocated.
-     This might be used when the memory has been retrieved from a free list and
-     is about to be reused, or when a the locking discipline for a variable
-     changes. */
-  #define ANNOTATE_NEW_MEMORY(address, size) \
-    AnnotateNewMemory(__FILE__, __LINE__, address, size)
-
-  /* -------------------------------------------------------------
-     Annotations useful when defining FIFO queues that transfer data between
-     threads. */
-
-  /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at
-     address "pcq" has been created.  The ANNOTATE_PCQ_* annotations
-     should be used only for FIFO queues.  For non-FIFO queues use
-     ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */
-  #define ANNOTATE_PCQ_CREATE(pcq) \
-    AnnotatePCQCreate(__FILE__, __LINE__, pcq)
-
-  /* Report that the queue at address "pcq" is about to be destroyed. */
-  #define ANNOTATE_PCQ_DESTROY(pcq) \
-    AnnotatePCQDestroy(__FILE__, __LINE__, pcq)
-
-  /* Report that we are about to put an element into a FIFO queue at address
-     "pcq". */
-  #define ANNOTATE_PCQ_PUT(pcq) \
-    AnnotatePCQPut(__FILE__, __LINE__, pcq)
-
-  /* Report that we've just got an element from a FIFO queue at address "pcq". */
-  #define ANNOTATE_PCQ_GET(pcq) \
-    AnnotatePCQGet(__FILE__, __LINE__, pcq)
-
-  /* -------------------------------------------------------------
-     Annotations that suppress errors.  It is usually better to express the
-     program's synchronization using the other annotations, but these can
-     be used when all else fails. */
-
-  /* Report that we may have a benign race at "pointer", with size
-     "sizeof(*(pointer))". "pointer" must be a non-void* pointer.  Insert at the
-     point where "pointer" has been allocated, preferably close to the point
-     where the race happens.  See also ANNOTATE_BENIGN_RACE_STATIC. */
-  #define ANNOTATE_BENIGN_RACE(pointer, description) \
-    AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \
-                            sizeof(*(pointer)), description)
-
-  /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to
-     the memory range [address, address+size). */
-  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \
-    AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description)
-
-  /* Request the analysis tool to ignore all reads in the current thread
-     until ANNOTATE_IGNORE_READS_END is called.
-     Useful to ignore intentional racey reads, while still checking
-     other reads and all writes.
-     See also ANNOTATE_UNPROTECTED_READ. */
-  #define ANNOTATE_IGNORE_READS_BEGIN() \
-    AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
-
-  /* Stop ignoring reads. */
-  #define ANNOTATE_IGNORE_READS_END() \
-    AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
-
-  /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */
-  #define ANNOTATE_IGNORE_WRITES_BEGIN() \
-    AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
-
-  /* Stop ignoring writes. */
-  #define ANNOTATE_IGNORE_WRITES_END() \
-    AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
-
-  /* Start ignoring all memory accesses (reads and writes). */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \
-    do {\
-      ANNOTATE_IGNORE_READS_BEGIN();\
-      ANNOTATE_IGNORE_WRITES_BEGIN();\
-    }while(0)\
-
-  /* Stop ignoring all memory accesses. */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \
-    do {\
-      ANNOTATE_IGNORE_WRITES_END();\
-      ANNOTATE_IGNORE_READS_END();\
-    }while(0)\
-
-  /* Enable (enable!=0) or disable (enable==0) race detection for all threads.
-     This annotation could be useful if you want to skip expensive race analysis
-     during some period of program execution, e.g. during initialization. */
-  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \
-    AnnotateEnableRaceDetection(__FILE__, __LINE__, enable)
-
-  /* -------------------------------------------------------------
-     Annotations useful for debugging. */
-
-  /* Request to trace every access to "address". */
-  #define ANNOTATE_TRACE_MEMORY(address) \
-    AnnotateTraceMemory(__FILE__, __LINE__, address)
-
-  /* Report the current thread name to a race detector. */
-  #define ANNOTATE_THREAD_NAME(name) \
-    AnnotateThreadName(__FILE__, __LINE__, name)
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing locks.  They are not
-     normally needed by modules that merely use locks.
-     The "lock" argument is a pointer to the lock object. */
-
-  /* Report that a lock has been created at address "lock". */
-  #define ANNOTATE_RWLOCK_CREATE(lock) \
-    AnnotateRWLockCreate(__FILE__, __LINE__, lock)
-
-  /* Report that the lock at address "lock" is about to be destroyed. */
-  #define ANNOTATE_RWLOCK_DESTROY(lock) \
-    AnnotateRWLockDestroy(__FILE__, __LINE__, lock)
-
-  /* Report that the lock at address "lock" has been acquired.
-     is_w=1 for writer lock, is_w=0 for reader lock. */
-  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \
-    AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w)
-
-  /* Report that the lock at address "lock" is about to be released. */
-  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \
-    AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w)
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing barriers.  They are not
-     normally needed by modules that merely use barriers.
-     The "barrier" argument is a pointer to the barrier object. */
-
-  /* Report that the "barrier" has been initialized with initial "count".
-   If 'reinitialization_allowed' is true, initialization is allowed to happen
-   multiple times w/o calling barrier_destroy() */
-  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \
-    AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \
-                        reinitialization_allowed)
-
-  /* Report that we are about to enter barrier_wait("barrier"). */
-  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \
-    AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier)
-
-  /* Report that we just exited barrier_wait("barrier"). */
-  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \
-    AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier)
-
-  /* Report that the "barrier" has been destroyed. */
-  #define ANNOTATE_BARRIER_DESTROY(barrier) \
-    AnnotateBarrierDestroy(__FILE__, __LINE__, barrier)
-
-  /* -------------------------------------------------------------
-     Annotations useful for testing race detectors. */
-
-  /* Report that we expect a race on the variable at "address".
-     Use only in unit tests for a race detector. */
-  #define ANNOTATE_EXPECT_RACE(address, description) \
-    AnnotateExpectRace(__FILE__, __LINE__, address, description)
-
-  /* A no-op. Insert where you like to test the interceptors. */
-  #define ANNOTATE_NO_OP(arg) \
-    AnnotateNoOp(__FILE__, __LINE__, arg)
-
-  /* Force the race detector to flush its state. The actual effect depends on
-   * the implementation of the detector. */
-  #define ANNOTATE_FLUSH_STATE() \
-    AnnotateFlushState(__FILE__, __LINE__)
-
-
-#else  /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
-
-  #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */
-  #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */
-  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */
-  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */
-  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */
-  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */
-  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */
-  #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */
-  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */
-  #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */
-  #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */
-  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */
-  #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */
-  #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */
-  #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */
-  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size)  /* empty */
-  #define ANNOTATE_SWAP_MEMORY_RANGE(address, size)  /* empty */
-  #define ANNOTATE_PCQ_CREATE(pcq) /* empty */
-  #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */
-  #define ANNOTATE_PCQ_PUT(pcq) /* empty */
-  #define ANNOTATE_PCQ_GET(pcq) /* empty */
-  #define ANNOTATE_NEW_MEMORY(address, size) /* empty */
-  #define ANNOTATE_EXPECT_RACE(address, description) /* empty */
-  #define ANNOTATE_BENIGN_RACE(address, description) /* empty */
-  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */
-  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */
-  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */
-  #define ANNOTATE_TRACE_MEMORY(arg) /* empty */
-  #define ANNOTATE_THREAD_NAME(name) /* empty */
-  #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_READS_END() /* empty */
-  #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_WRITES_END() /* empty */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */
-  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */
-  #define ANNOTATE_NO_OP(arg) /* empty */
-  #define ANNOTATE_FLUSH_STATE() /* empty */
-
-#endif  /* DYNAMIC_ANNOTATIONS_ENABLED */
-
-/* Macro definitions for GCC attributes that allow static thread safety
-   analysis to recognize and use some of the dynamic annotations as
-   escape hatches.
-   TODO(lcwu): remove the check for __SUPPORT_DYN_ANNOTATION__ once the
-   default crosstool/GCC supports these GCC attributes.  */
-
-#define ANNOTALYSIS_STATIC_INLINE
-#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ;
-#define ANNOTALYSIS_IGNORE_READS_BEGIN
-#define ANNOTALYSIS_IGNORE_READS_END
-#define ANNOTALYSIS_IGNORE_WRITES_BEGIN
-#define ANNOTALYSIS_IGNORE_WRITES_END
-#define ANNOTALYSIS_UNPROTECTED_READ
-
-#if defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) && \
-    defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__)
-
-#if DYNAMIC_ANNOTATIONS_ENABLED == 0
-#define ANNOTALYSIS_ONLY 1
-#undef ANNOTALYSIS_STATIC_INLINE
-#define ANNOTALYSIS_STATIC_INLINE static inline
-#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; }
-#endif
-
-/* Only emit attributes when annotalysis is enabled. */
-#if defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__)
-#undef  ANNOTALYSIS_IGNORE_READS_BEGIN
-#define ANNOTALYSIS_IGNORE_READS_BEGIN  __attribute__ ((ignore_reads_begin))
-#undef  ANNOTALYSIS_IGNORE_READS_END
-#define ANNOTALYSIS_IGNORE_READS_END    __attribute__ ((ignore_reads_end))
-#undef  ANNOTALYSIS_IGNORE_WRITES_BEGIN
-#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin))
-#undef  ANNOTALYSIS_IGNORE_WRITES_END
-#define ANNOTALYSIS_IGNORE_WRITES_END   __attribute__ ((ignore_writes_end))
-#undef  ANNOTALYSIS_UNPROTECTED_READ
-#define ANNOTALYSIS_UNPROTECTED_READ    __attribute__ ((unprotected_read))
-#endif
-
-#endif // defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__))
-
-/* Use the macros above rather than using these functions directly. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-void AnnotateRWLockCreate(const char *file, int line,
-                          const volatile void *lock);
-void AnnotateRWLockDestroy(const char *file, int line,
-                           const volatile void *lock);
-void AnnotateRWLockAcquired(const char *file, int line,
-                            const volatile void *lock, long is_w);
-void AnnotateRWLockReleased(const char *file, int line,
-                            const volatile void *lock, long is_w);
-void AnnotateBarrierInit(const char *file, int line,
-                         const volatile void *barrier, long count,
-                         long reinitialization_allowed);
-void AnnotateBarrierWaitBefore(const char *file, int line,
-                               const volatile void *barrier);
-void AnnotateBarrierWaitAfter(const char *file, int line,
-                              const volatile void *barrier);
-void AnnotateBarrierDestroy(const char *file, int line,
-                            const volatile void *barrier);
-void AnnotateCondVarWait(const char *file, int line,
-                         const volatile void *cv,
-                         const volatile void *lock);
-void AnnotateCondVarSignal(const char *file, int line,
-                           const volatile void *cv);
-void AnnotateCondVarSignalAll(const char *file, int line,
-                              const volatile void *cv);
-void AnnotatePublishMemoryRange(const char *file, int line,
-                                const volatile void *address,
-                                long size);
-void AnnotateUnpublishMemoryRange(const char *file, int line,
-                                  const volatile void *address,
-                                  long size);
-void AnnotatePCQCreate(const char *file, int line,
-                       const volatile void *pcq);
-void AnnotatePCQDestroy(const char *file, int line,
-                        const volatile void *pcq);
-void AnnotatePCQPut(const char *file, int line,
-                    const volatile void *pcq);
-void AnnotatePCQGet(const char *file, int line,
-                    const volatile void *pcq);
-void AnnotateNewMemory(const char *file, int line,
-                       const volatile void *address,
-                       long size);
-void AnnotateExpectRace(const char *file, int line,
-                        const volatile void *address,
-                        const char *description);
-void AnnotateBenignRace(const char *file, int line,
-                        const volatile void *address,
-                        const char *description);
-void AnnotateBenignRaceSized(const char *file, int line,
-                        const volatile void *address,
-                        long size,
-                        const char *description);
-void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
-                                  const volatile void *mu);
-void AnnotateTraceMemory(const char *file, int line,
-                         const volatile void *arg);
-void AnnotateThreadName(const char *file, int line,
-                        const char *name);
-ANNOTALYSIS_STATIC_INLINE
-void AnnotateIgnoreReadsBegin(const char *file, int line)
-    ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-ANNOTALYSIS_STATIC_INLINE
-void AnnotateIgnoreReadsEnd(const char *file, int line)
-    ANNOTALYSIS_IGNORE_READS_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-ANNOTALYSIS_STATIC_INLINE
-void AnnotateIgnoreWritesBegin(const char *file, int line)
-    ANNOTALYSIS_IGNORE_WRITES_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-ANNOTALYSIS_STATIC_INLINE
-void AnnotateIgnoreWritesEnd(const char *file, int line)
-    ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-void AnnotateEnableRaceDetection(const char *file, int line, int enable);
-void AnnotateNoOp(const char *file, int line,
-                  const volatile void *arg);
-void AnnotateFlushState(const char *file, int line);
-
-/* Return non-zero value if running under valgrind.
-
-  If "valgrind.h" is included into dynamic_annotations.c,
-  the regular valgrind mechanism will be used.
-  See http://valgrind.org/docs/manual/manual-core-adv.html about
-  RUNNING_ON_VALGRIND and other valgrind "client requests".
-  The file "valgrind.h" may be obtained by doing
-     svn co svn://svn.valgrind.org/valgrind/trunk/include
-
-  If for some reason you can't use "valgrind.h" or want to fake valgrind,
-  there are two ways to make this function return non-zero:
-    - Use environment variable: export RUNNING_ON_VALGRIND=1
-    - Make your tool intercept the function RunningOnValgrind() and
-      change its return value.
- */
-int RunningOnValgrind(void);
-
-/* ValgrindSlowdown returns:
-    * 1.0, if (RunningOnValgrind() == 0)
-    * 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL)
-    * atof(getenv("VALGRIND_SLOWDOWN")) otherwise
-   This function can be used to scale timeout values:
-   EXAMPLE:
-   for (;;) {
-     DoExpensiveBackgroundTask();
-     SleepForSeconds(5 * ValgrindSlowdown());
-   }
- */
-double ValgrindSlowdown(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus)
-
-  /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
-
-     Instead of doing
-        ANNOTATE_IGNORE_READS_BEGIN();
-        ... = x;
-        ANNOTATE_IGNORE_READS_END();
-     one can use
-        ... = ANNOTATE_UNPROTECTED_READ(x); */
-  template <class T>
-  inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
-      ANNOTALYSIS_UNPROTECTED_READ {
-    ANNOTATE_IGNORE_READS_BEGIN();
-    T res = x;
-    ANNOTATE_IGNORE_READS_END();
-    return res;
-  }
-  /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */
-  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)        \
-    namespace {                                                       \
-      class static_var ## _annotator {                                \
-       public:                                                        \
-        static_var ## _annotator() {                                  \
-          ANNOTATE_BENIGN_RACE_SIZED(&static_var,                     \
-                                      sizeof(static_var),             \
-            # static_var ": " description);                           \
-        }                                                             \
-      };                                                              \
-      static static_var ## _annotator the ## static_var ## _annotator;\
-    }
-#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
-
-  #define ANNOTATE_UNPROTECTED_READ(x) (x)
-  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)  /* empty */
-
-#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
-
-/* Annotalysis, a GCC based static analyzer, is able to understand and use
-   some of the dynamic annotations defined in this file. However, dynamic
-   annotations are usually disabled in the opt mode (to avoid additional
-   runtime overheads) while Annotalysis only works in the opt mode.
-   In order for Annotalysis to use these dynamic annotations when they
-   are disabled, we re-define these annotations here. Note that unlike the
-   original macro definitions above, these macros are expanded to calls to
-   static inline functions so that the compiler will be able to remove the
-   calls after the analysis. */
-
-#ifdef ANNOTALYSIS_ONLY
-
-  #undef ANNOTALYSIS_ONLY
-
-  /* Undefine and re-define the macros that the static analyzer understands. */
-  #undef ANNOTATE_IGNORE_READS_BEGIN
-  #define ANNOTATE_IGNORE_READS_BEGIN()           \
-    AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
-
-  #undef ANNOTATE_IGNORE_READS_END
-  #define ANNOTATE_IGNORE_READS_END()             \
-    AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
-
-  #undef ANNOTATE_IGNORE_WRITES_BEGIN
-  #define ANNOTATE_IGNORE_WRITES_BEGIN()          \
-    AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
-
-  #undef ANNOTATE_IGNORE_WRITES_END
-  #define ANNOTATE_IGNORE_WRITES_END()            \
-    AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
-
-  #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN()       \
-    do {                                                 \
-      ANNOTATE_IGNORE_READS_BEGIN();                     \
-      ANNOTATE_IGNORE_WRITES_BEGIN();                    \
-    }while(0)                                            \
-
-  #undef ANNOTATE_IGNORE_READS_AND_WRITES_END
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END()  \
-    do {                                          \
-      ANNOTATE_IGNORE_WRITES_END();               \
-      ANNOTATE_IGNORE_READS_END();                \
-    }while(0)                                     \
-
-  #if defined(__cplusplus)
-    #undef ANNOTATE_UNPROTECTED_READ
-    template <class T>
-    inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
-         ANNOTALYSIS_UNPROTECTED_READ {
-      ANNOTATE_IGNORE_READS_BEGIN();
-      T res = x;
-      ANNOTATE_IGNORE_READS_END();
-      return res;
-    }
-  #endif /* __cplusplus */
-
-#endif /* ANNOTALYSIS_ONLY */
-
-/* Undefine the macros intended only in this file. */
-#undef ANNOTALYSIS_STATIC_INLINE
-#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-
-#endif  /* BASE_DYNAMIC_ANNOTATIONS_H_ */
diff --git a/contrib/libtcmalloc/src/base/elf_mem_image.cc b/contrib/libtcmalloc/src/base/elf_mem_image.cc
deleted file mode 100644
index d9605609e3a..00000000000
--- a/contrib/libtcmalloc/src/base/elf_mem_image.cc
+++ /dev/null
@@ -1,443 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Pluzhnikov
-//
-// Allow dynamic symbol lookup in an in-memory Elf image.
-//
-
-#include "base/elf_mem_image.h"
-
-#ifdef HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
-
-#include <stddef.h>   // for size_t, ptrdiff_t
-#include "base/logging.h"
-
-// From binutils/include/elf/common.h (this doesn't appear to be documented
-// anywhere else).
-//
-//   /* This flag appears in a Versym structure.  It means that the symbol
-//      is hidden, and is only visible with an explicit version number.
-//      This is a GNU extension.  */
-//   #define VERSYM_HIDDEN           0x8000
-//
-//   /* This is the mask for the rest of the Versym information.  */
-//   #define VERSYM_VERSION          0x7fff
-
-#define VERSYM_VERSION 0x7fff
-
-#if __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-const-variable"
-#endif
-
-namespace base {
-
-namespace {
-template <int N> class ElfClass {
- public:
-  static const int kElfClass = -1;
-  static int ElfBind(const ElfW(Sym) *) {
-    CHECK(false); // << "Unexpected word size";
-    return 0;
-  }
-  static int ElfType(const ElfW(Sym) *) {
-    CHECK(false); // << "Unexpected word size";
-    return 0;
-  }
-};
-
-template <> class ElfClass<32> {
- public:
-  static const int kElfClass = ELFCLASS32;
-  static int ElfBind(const ElfW(Sym) *symbol) {
-    return ELF32_ST_BIND(symbol->st_info);
-  }
-  static int ElfType(const ElfW(Sym) *symbol) {
-    return ELF32_ST_TYPE(symbol->st_info);
-  }
-};
-
-template <> class ElfClass<64> {
- public:
-  static const int kElfClass = ELFCLASS64;
-  static int ElfBind(const ElfW(Sym) *symbol) {
-    return ELF64_ST_BIND(symbol->st_info);
-  }
-  static int ElfType(const ElfW(Sym) *symbol) {
-    return ELF64_ST_TYPE(symbol->st_info);
-  }
-};
-
-typedef ElfClass<__WORDSIZE> CurrentElfClass;
-
-// Extract an element from one of the ELF tables, cast it to desired type.
-// This is just a simple arithmetic and a glorified cast.
-// Callers are responsible for bounds checking.
-template <class T>
-const T* GetTableElement(const ElfW(Ehdr) *ehdr,
-                         ElfW(Off) table_offset,
-                         ElfW(Word) element_size,
-                         size_t index) {
-  return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
-                                    + table_offset
-                                    + index * element_size);
-}
-}  // namespace
-
-const void *const ElfMemImage::kInvalidBase =
-    reinterpret_cast<const void *>(~0L);
-
-ElfMemImage::ElfMemImage(const void *base) {
-  CHECK(base != kInvalidBase);
-  Init(base);
-}
-
-int ElfMemImage::GetNumSymbols() const {
-  if (!hash_) {
-    return 0;
-  }
-  // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
-  return hash_[1];
-}
-
-const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
-  CHECK_LT(index, GetNumSymbols());
-  return dynsym_ + index;
-}
-
-const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
-  CHECK_LT(index, GetNumSymbols());
-  return versym_ + index;
-}
-
-const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
-  CHECK_LT(index, ehdr_->e_phnum);
-  return GetTableElement<ElfW(Phdr)>(ehdr_,
-                                     ehdr_->e_phoff,
-                                     ehdr_->e_phentsize,
-                                     index);
-}
-
-const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
-  CHECK_LT(offset, strsize_);
-  return dynstr_ + offset;
-}
-
-const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
-  if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
-    // Symbol corresponds to "special" (e.g. SHN_ABS) section.
-    return reinterpret_cast<const void *>(sym->st_value);
-  }
-  CHECK_LT(link_base_, sym->st_value);
-  return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_;
-}
-
-const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
-  CHECK_LE(index, verdefnum_);
-  const ElfW(Verdef) *version_definition = verdef_;
-  while (version_definition->vd_ndx < index && version_definition->vd_next) {
-    const char *const version_definition_as_char =
-        reinterpret_cast<const char *>(version_definition);
-    version_definition =
-        reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
-                                               version_definition->vd_next);
-  }
-  return version_definition->vd_ndx == index ? version_definition : NULL;
-}
-
-const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
-    const ElfW(Verdef) *verdef) const {
-  return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
-}
-
-const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
-  CHECK_LT(offset, strsize_);
-  return dynstr_ + offset;
-}
-
-void ElfMemImage::Init(const void *base) {
-  ehdr_      = NULL;
-  dynsym_    = NULL;
-  dynstr_    = NULL;
-  versym_    = NULL;
-  verdef_    = NULL;
-  hash_      = NULL;
-  strsize_   = 0;
-  verdefnum_ = 0;
-  link_base_ = ~0L;  // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
-  if (!base) {
-    return;
-  }
-  const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base);
-  // Fake VDSO has low bit set.
-  const bool fake_vdso = ((base_as_uintptr_t & 1) != 0);
-  base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1);
-  const char *const base_as_char = reinterpret_cast<const char *>(base);
-  if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
-      base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
-    RAW_DCHECK(false, "no ELF magic"); // at %p", base);
-    return;
-  }
-  int elf_class = base_as_char[EI_CLASS];
-  if (elf_class != CurrentElfClass::kElfClass) {
-    DCHECK_EQ(elf_class, CurrentElfClass::kElfClass);
-    return;
-  }
-  switch (base_as_char[EI_DATA]) {
-    case ELFDATA2LSB: {
-      if (__LITTLE_ENDIAN != __BYTE_ORDER) {
-        DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
-        return;
-      }
-      break;
-    }
-    case ELFDATA2MSB: {
-      if (__BIG_ENDIAN != __BYTE_ORDER) {
-        DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
-        return;
-      }
-      break;
-    }
-    default: {
-      RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA];
-      return;
-    }
-  }
-
-  ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
-  const ElfW(Phdr) *dynamic_program_header = NULL;
-  for (int i = 0; i < ehdr_->e_phnum; ++i) {
-    const ElfW(Phdr) *const program_header = GetPhdr(i);
-    switch (program_header->p_type) {
-      case PT_LOAD:
-        if (link_base_ == ~0L) {
-          link_base_ = program_header->p_vaddr;
-        }
-        break;
-      case PT_DYNAMIC:
-        dynamic_program_header = program_header;
-        break;
-    }
-  }
-  if (link_base_ == ~0L || !dynamic_program_header) {
-    RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO");
-    RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO");
-    // Mark this image as not present. Can not recur infinitely.
-    Init(0);
-    return;
-  }
-  ptrdiff_t relocation =
-      base_as_char - reinterpret_cast<const char *>(link_base_);
-  ElfW(Dyn) *dynamic_entry =
-      reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr +
-                                    relocation);
-  for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
-    ElfW(Xword) value = dynamic_entry->d_un.d_val;
-    if (fake_vdso) {
-      // A complication: in the real VDSO, dynamic entries are not relocated
-      // (it wasn't loaded by a dynamic loader). But when testing with a
-      // "fake" dlopen()ed vdso library, the loader relocates some (but
-      // not all!) of them before we get here.
-      if (dynamic_entry->d_tag == DT_VERDEF) {
-        // The only dynamic entry (of the ones we care about) libc-2.3.6
-        // loader doesn't relocate.
-        value += relocation;
-      }
-    } else {
-      // Real VDSO. Everything needs to be relocated.
-      value += relocation;
-    }
-    switch (dynamic_entry->d_tag) {
-      case DT_HASH:
-        hash_ = reinterpret_cast<ElfW(Word) *>(value);
-        break;
-      case DT_SYMTAB:
-        dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
-        break;
-      case DT_STRTAB:
-        dynstr_ = reinterpret_cast<const char *>(value);
-        break;
-      case DT_VERSYM:
-        versym_ = reinterpret_cast<ElfW(Versym) *>(value);
-        break;
-      case DT_VERDEF:
-        verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
-        break;
-      case DT_VERDEFNUM:
-        verdefnum_ = dynamic_entry->d_un.d_val;
-        break;
-      case DT_STRSZ:
-        strsize_ = dynamic_entry->d_un.d_val;
-        break;
-      default:
-        // Unrecognized entries explicitly ignored.
-        break;
-    }
-  }
-  if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
-      !verdef_ || !verdefnum_ || !strsize_) {
-    RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)");
-    RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)");
-    RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)");
-    RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)");
-    RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)");
-    RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)");
-    RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)");
-    // Mark this image as not present. Can not recur infinitely.
-    Init(0);
-    return;
-  }
-}
-
-bool ElfMemImage::LookupSymbol(const char *name,
-                               const char *version,
-                               int type,
-                               SymbolInfo *info) const {
-  for (SymbolIterator it = begin(); it != end(); ++it) {
-    if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 &&
-        CurrentElfClass::ElfType(it->symbol) == type) {
-      if (info) {
-        *info = *it;
-      }
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ElfMemImage::LookupSymbolByAddress(const void *address,
-                                        SymbolInfo *info_out) const {
-  for (SymbolIterator it = begin(); it != end(); ++it) {
-    const char *const symbol_start =
-        reinterpret_cast<const char *>(it->address);
-    const char *const symbol_end = symbol_start + it->symbol->st_size;
-    if (symbol_start <= address && address < symbol_end) {
-      if (info_out) {
-        // Client wants to know details for that symbol (the usual case).
-        if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) {
-          // Strong symbol; just return it.
-          *info_out = *it;
-          return true;
-        } else {
-          // Weak or local. Record it, but keep looking for a strong one.
-          *info_out = *it;
-        }
-      } else {
-        // Client only cares if there is an overlapping symbol.
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
-    : index_(index), image_(image) {
-}
-
-const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
-  return &info_;
-}
-
-const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
-  return info_;
-}
-
-bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
-  return this->image_ == rhs.image_ && this->index_ == rhs.index_;
-}
-
-bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
-  return !(*this == rhs);
-}
-
-ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
-  this->Update(1);
-  return *this;
-}
-
-ElfMemImage::SymbolIterator ElfMemImage::begin() const {
-  SymbolIterator it(this, 0);
-  it.Update(0);
-  return it;
-}
-
-ElfMemImage::SymbolIterator ElfMemImage::end() const {
-  return SymbolIterator(this, GetNumSymbols());
-}
-
-void ElfMemImage::SymbolIterator::Update(int increment) {
-  const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
-  CHECK(image->IsPresent() || increment == 0);
-  if (!image->IsPresent()) {
-    return;
-  }
-  index_ += increment;
-  if (index_ >= image->GetNumSymbols()) {
-    index_ = image->GetNumSymbols();
-    return;
-  }
-  const ElfW(Sym)    *symbol = image->GetDynsym(index_);
-  const ElfW(Versym) *version_symbol = image->GetVersym(index_);
-  CHECK(symbol && version_symbol);
-  const char *const symbol_name = image->GetDynstr(symbol->st_name);
-  const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
-  const ElfW(Verdef) *version_definition = NULL;
-  const char *version_name = "";
-  if (symbol->st_shndx == SHN_UNDEF) {
-    // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
-    // version_index could well be greater than verdefnum_, so calling
-    // GetVerdef(version_index) may trigger assertion.
-  } else {
-    version_definition = image->GetVerdef(version_index);
-  }
-  if (version_definition) {
-    // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
-    // optional 2nd if the version has a parent.
-    CHECK_LE(1, version_definition->vd_cnt);
-    CHECK_LE(version_definition->vd_cnt, 2);
-    const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
-    version_name = image->GetVerstr(version_aux->vda_name);
-  }
-  info_.name    = symbol_name;
-  info_.version = version_name;
-  info_.address = image->GetSymAddr(symbol);
-  info_.symbol  = symbol;
-}
-
-}  // namespace base
-
-#if __clang__
-#pragma clang diagnostic pop
-#endif
-
-#endif  // HAVE_ELF_MEM_IMAGE
diff --git a/contrib/libtcmalloc/src/base/elf_mem_image.h b/contrib/libtcmalloc/src/base/elf_mem_image.h
deleted file mode 100644
index df63cf8b4da..00000000000
--- a/contrib/libtcmalloc/src/base/elf_mem_image.h
+++ /dev/null
@@ -1,135 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Pluzhnikov
-//
-// Allow dynamic symbol lookup for in-memory Elf images.
-
-#ifndef BASE_ELF_MEM_IMAGE_H_
-#define BASE_ELF_MEM_IMAGE_H_
-
-#include "../config.h"
-#ifdef HAVE_FEATURES_H
-#include <features.h>   // for __GLIBC__
-#endif
-
-// Maybe one day we can rewrite this file not to require the elf
-// symbol extensions in glibc, but for right now we need them.
-#if defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__)
-
-#define HAVE_ELF_MEM_IMAGE 1
-
-#include <stdlib.h>
-#include <link.h>  // for ElfW
-
-namespace base {
-
-// An in-memory ELF image (may not exist on disk).
-class ElfMemImage {
- public:
-  // Sentinel: there could never be an elf image at this address.
-  static const void *const kInvalidBase;
-
-  // Information about a single vdso symbol.
-  // All pointers are into .dynsym, .dynstr, or .text of the VDSO.
-  // Do not free() them or modify through them.
-  struct SymbolInfo {
-    const char      *name;      // E.g. "__vdso_getcpu"
-    const char      *version;   // E.g. "LINUX_2.6", could be ""
-                                // for unversioned symbol.
-    const void      *address;   // Relocated symbol address.
-    const ElfW(Sym) *symbol;    // Symbol in the dynamic symbol table.
-  };
-
-  // Supports iteration over all dynamic symbols.
-  class SymbolIterator {
-   public:
-    friend class ElfMemImage;
-    const SymbolInfo *operator->() const;
-    const SymbolInfo &operator*() const;
-    SymbolIterator& operator++();
-    bool operator!=(const SymbolIterator &rhs) const;
-    bool operator==(const SymbolIterator &rhs) const;
-   private:
-    SymbolIterator(const void *const image, int index);
-    void Update(int incr);
-    SymbolInfo info_;
-    int index_;
-    const void *const image_;
-  };
-
-
-  explicit ElfMemImage(const void *base);
-  void                 Init(const void *base);
-  bool                 IsPresent() const { return ehdr_ != NULL; }
-  const ElfW(Phdr)*    GetPhdr(int index) const;
-  const ElfW(Sym)*     GetDynsym(int index) const;
-  const ElfW(Versym)*  GetVersym(int index) const;
-  const ElfW(Verdef)*  GetVerdef(int index) const;
-  const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const;
-  const char*          GetDynstr(ElfW(Word) offset) const;
-  const void*          GetSymAddr(const ElfW(Sym) *sym) const;
-  const char*          GetVerstr(ElfW(Word) offset) const;
-  int                  GetNumSymbols() const;
-
-  SymbolIterator begin() const;
-  SymbolIterator end() const;
-
-  // Look up versioned dynamic symbol in the image.
-  // Returns false if image is not present, or doesn't contain given
-  // symbol/version/type combination.
-  // If info_out != NULL, additional details are filled in.
-  bool LookupSymbol(const char *name, const char *version,
-                    int symbol_type, SymbolInfo *info_out) const;
-
-  // Find info about symbol (if any) which overlaps given address.
-  // Returns true if symbol was found; false if image isn't present
-  // or doesn't have a symbol overlapping given address.
-  // If info_out != NULL, additional details are filled in.
-  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
-
- private:
-  const ElfW(Ehdr) *ehdr_;
-  const ElfW(Sym) *dynsym_;
-  const ElfW(Versym) *versym_;
-  const ElfW(Verdef) *verdef_;
-  const ElfW(Word) *hash_;
-  const char *dynstr_;
-  size_t strsize_;
-  size_t verdefnum_;
-  ElfW(Addr) link_base_;     // Link-time base (p_vaddr of first PT_LOAD).
-};
-
-}  // namespace base
-
-#endif  // __ELF__ and __GLIBC__ and !__native_client__
-
-#endif  // BASE_ELF_MEM_IMAGE_H_
diff --git a/contrib/libtcmalloc/src/base/elfcore.h b/contrib/libtcmalloc/src/base/elfcore.h
deleted file mode 100644
index 98fd23b6738..00000000000
--- a/contrib/libtcmalloc/src/base/elfcore.h
+++ /dev/null
@@ -1,401 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005-2008, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke, Carl Crous
- */
-
-#ifndef _ELFCORE_H
-#define _ELFCORE_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* We currently only support x86-32, x86-64, ARM, MIPS, PPC on Linux.
- * Porting to other related platforms should not be difficult.
- */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-     defined(__mips__) || defined(__PPC__)) && defined(__linux)
-
-#include <stdarg.h>
-#include <stdint.h>
-#include <sys/types.h>
-#include "../config.h"
-
-
-/* Define the DUMPER symbol to make sure that there is exactly one
- * core dumper built into the library.
- */
-#define DUMPER "ELF"
-
-/* By the time that we get a chance to read CPU registers in the
- * calling thread, they are already in a not particularly useful
- * state. Besides, there will be multiple frames on the stack that are
- * just making the core file confusing. To fix this problem, we take a
- * snapshot of the frame pointer, stack pointer, and instruction
- * pointer at an earlier time, and then insert these values into the
- * core file.
- */
-
-#if defined(__i386__) || defined(__x86_64__)
-  typedef struct i386_regs {    /* Normal (non-FPU) CPU registers            */
-  #ifdef __x86_64__
-    #define BP rbp
-    #define SP rsp
-    #define IP rip
-    uint64_t  r15,r14,r13,r12,rbp,rbx,r11,r10;
-    uint64_t  r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
-    uint64_t  rip,cs,eflags;
-    uint64_t  rsp,ss;
-    uint64_t  fs_base, gs_base;
-    uint64_t  ds,es,fs,gs;
-  #else
-    #define BP ebp
-    #define SP esp
-    #define IP eip
-    uint32_t  ebx, ecx, edx, esi, edi, ebp, eax;
-    uint16_t  ds, __ds, es, __es;
-    uint16_t  fs, __fs, gs, __gs;
-    uint32_t  orig_eax, eip;
-    uint16_t  cs, __cs;
-    uint32_t  eflags, esp;
-    uint16_t  ss, __ss;
-  #endif
-  } i386_regs;
-#elif defined(__arm__)
-  typedef struct arm_regs {     /* General purpose registers                 */
-    #define BP uregs[11]        /* Frame pointer                             */
-    #define SP uregs[13]        /* Stack pointer                             */
-    #define IP uregs[15]        /* Program counter                           */
-    #define LR uregs[14]        /* Link register                             */
-    long uregs[18];
-  } arm_regs;
-#elif defined(__mips__)
-  typedef struct mips_regs {
-    unsigned long pad[6];       /* Unused padding to match kernel structures */
-    unsigned long uregs[32];    /* General purpose registers.                */
-    unsigned long hi;           /* Used for multiplication and division.     */
-    unsigned long lo;
-    unsigned long cp0_epc;      /* Program counter.                          */
-    unsigned long cp0_badvaddr;
-    unsigned long cp0_status;
-    unsigned long cp0_cause;
-    unsigned long unused;
-  } mips_regs;
-#elif defined (__PPC__)
-  typedef struct ppc_regs {
-    #define SP uregs[1]         /* Stack pointer                             */
-    #define IP rip              /* Program counter                           */
-    #define LR lr               /* Link register                             */
-    unsigned long uregs[32];	/* General Purpose Registers - r0-r31.       */
-    double        fpr[32];	/* Floating-Point Registers - f0-f31.        */
-    unsigned long rip;		/* Program counter.                          */
-    unsigned long msr;
-    unsigned long ccr;
-    unsigned long lr;
-    unsigned long ctr;
-    unsigned long xeq;
-    unsigned long mq;
-  } ppc_regs;
-#endif
-
-#if defined(__i386__) && defined(__GNUC__)
-  /* On x86 we provide an optimized version of the FRAME() macro, if the
-   * compiler supports a GCC-style asm() directive. This results in somewhat
-   * more accurate values for CPU registers.
-   */
-  typedef struct Frame {
-    struct i386_regs uregs;
-    int              errno_;
-    pid_t            tid;
-  } Frame;
-  #define FRAME(f) Frame f;                                           \
-                   do {                                               \
-                     f.errno_ = errno;                                \
-                     f.tid    = sys_gettid();                         \
-                     __asm__ volatile (                               \
-                       "push %%ebp\n"                                 \
-                       "push %%ebx\n"                                 \
-                       "mov  %%ebx,0(%%eax)\n"                        \
-                       "mov  %%ecx,4(%%eax)\n"                        \
-                       "mov  %%edx,8(%%eax)\n"                        \
-                       "mov  %%esi,12(%%eax)\n"                       \
-                       "mov  %%edi,16(%%eax)\n"                       \
-                       "mov  %%ebp,20(%%eax)\n"                       \
-                       "mov  %%eax,24(%%eax)\n"                       \
-                       "mov  %%ds,%%ebx\n"                            \
-                       "mov  %%ebx,28(%%eax)\n"                       \
-                       "mov  %%es,%%ebx\n"                            \
-                       "mov  %%ebx,32(%%eax)\n"                       \
-                       "mov  %%fs,%%ebx\n"                            \
-                       "mov  %%ebx,36(%%eax)\n"                       \
-                       "mov  %%gs,%%ebx\n"                            \
-                       "mov  %%ebx, 40(%%eax)\n"                      \
-                       "call 0f\n"                                    \
-                     "0:pop %%ebx\n"                                  \
-                       "add  $1f-0b,%%ebx\n"                          \
-                       "mov  %%ebx,48(%%eax)\n"                       \
-                       "mov  %%cs,%%ebx\n"                            \
-                       "mov  %%ebx,52(%%eax)\n"                       \
-                       "pushf\n"                                      \
-                       "pop  %%ebx\n"                                 \
-                       "mov  %%ebx,56(%%eax)\n"                       \
-                       "mov  %%esp,%%ebx\n"                           \
-                       "add  $8,%%ebx\n"                              \
-                       "mov  %%ebx,60(%%eax)\n"                       \
-                       "mov  %%ss,%%ebx\n"                            \
-                       "mov  %%ebx,64(%%eax)\n"                       \
-                       "pop  %%ebx\n"                                 \
-                       "pop  %%ebp\n"                                 \
-                     "1:"                                             \
-                       : : "a" (&f) : "memory");                      \
-                     } while (0)
-  #define SET_FRAME(f,r)                                              \
-                     do {                                             \
-                       errno = (f).errno_;                            \
-                       (r)   = (f).uregs;                             \
-                     } while (0)
-#elif defined(__x86_64__) && defined(__GNUC__)
-  /* The FRAME and SET_FRAME macros for x86_64.  */
-  typedef struct Frame {
-    struct i386_regs uregs;
-    int              errno_;
-    pid_t            tid;
-  } Frame;
-  #define FRAME(f) Frame f;                                           \
-                   do {                                               \
-                     f.errno_ = errno;                                \
-                     f.tid    = sys_gettid();                         \
-                     __asm__ volatile (                               \
-                       "push %%rbp\n"                                 \
-                       "push %%rbx\n"                                 \
-                       "mov  %%r15,0(%%rax)\n"                        \
-                       "mov  %%r14,8(%%rax)\n"                        \
-                       "mov  %%r13,16(%%rax)\n"                       \
-                       "mov  %%r12,24(%%rax)\n"                       \
-                       "mov  %%rbp,32(%%rax)\n"                       \
-                       "mov  %%rbx,40(%%rax)\n"                       \
-                       "mov  %%r11,48(%%rax)\n"                       \
-                       "mov  %%r10,56(%%rax)\n"                       \
-                       "mov  %%r9,64(%%rax)\n"                        \
-                       "mov  %%r8,72(%%rax)\n"                        \
-                       "mov  %%rax,80(%%rax)\n"                       \
-                       "mov  %%rcx,88(%%rax)\n"                       \
-                       "mov  %%rdx,96(%%rax)\n"                       \
-                       "mov  %%rsi,104(%%rax)\n"                      \
-                       "mov  %%rdi,112(%%rax)\n"                      \
-                       "mov  %%ds,%%rbx\n"                            \
-                       "mov  %%rbx,184(%%rax)\n"                      \
-                       "mov  %%es,%%rbx\n"                            \
-                       "mov  %%rbx,192(%%rax)\n"                      \
-                       "mov  %%fs,%%rbx\n"                            \
-                       "mov  %%rbx,200(%%rax)\n"                      \
-                       "mov  %%gs,%%rbx\n"                            \
-                       "mov  %%rbx,208(%%rax)\n"                      \
-                       "call 0f\n"                                    \
-                     "0:pop %%rbx\n"                                  \
-                       "add  $1f-0b,%%rbx\n"                          \
-                       "mov  %%rbx,128(%%rax)\n"                      \
-                       "mov  %%cs,%%rbx\n"                            \
-                       "mov  %%rbx,136(%%rax)\n"                      \
-                       "pushf\n"                                      \
-                       "pop  %%rbx\n"                                 \
-                       "mov  %%rbx,144(%%rax)\n"                      \
-                       "mov  %%rsp,%%rbx\n"                           \
-                       "add  $16,%%ebx\n"                             \
-                       "mov  %%rbx,152(%%rax)\n"                      \
-                       "mov  %%ss,%%rbx\n"                            \
-                       "mov  %%rbx,160(%%rax)\n"                      \
-                       "pop  %%rbx\n"                                 \
-                       "pop  %%rbp\n"                                 \
-                     "1:"                                             \
-                       : : "a" (&f) : "memory");                      \
-                     } while (0)
-  #define SET_FRAME(f,r)                                              \
-                     do {                                             \
-                       errno = (f).errno_;                            \
-                       (f).uregs.fs_base = (r).fs_base;               \
-                       (f).uregs.gs_base = (r).gs_base;               \
-                       (r)   = (f).uregs;                             \
-                     } while (0)
-#elif defined(__arm__) && defined(__GNUC__)
-  /* ARM calling conventions are a little more tricky. A little assembly
-   * helps in obtaining an accurate snapshot of all registers.
-   */
-  typedef struct Frame {
-    struct arm_regs arm;
-    int             errno_;
-    pid_t           tid;
-  } Frame;
-  #define FRAME(f) Frame f;                                           \
-                   do {                                               \
-                     long cpsr;                                       \
-                     f.errno_ = errno;                                \
-                     f.tid    = sys_gettid();                         \
-                     __asm__ volatile(                                \
-                       "stmia %0, {r0-r15}\n" /* All integer regs   */\
-                       : : "r"(&f.arm) : "memory");                   \
-                     f.arm.uregs[16] = 0;                             \
-                     __asm__ volatile(                                \
-                       "mrs %0, cpsr\n"       /* Condition code reg */\
-                       : "=r"(cpsr));                                 \
-                     f.arm.uregs[17] = cpsr;                          \
-                   } while (0)
-  #define SET_FRAME(f,r)                                              \
-                     do {                                             \
-                       /* Don't override the FPU status register.   */\
-                       /* Use the value obtained from ptrace(). This*/\
-                       /* works, because our code does not perform  */\
-                       /* any FPU operations, itself.               */\
-                       long fps      = (f).arm.uregs[16];             \
-                       errno         = (f).errno_;                    \
-                       (r)           = (f).arm;                       \
-                       (r).uregs[16] = fps;                           \
-                     } while (0)
-#elif defined(__mips__) && defined(__GNUC__)
-  typedef struct Frame {
-    struct mips_regs mips_regs;
-    int              errno_;
-    pid_t            tid;
-  } Frame;
-  #define MIPSREG(n) ({ register unsigned long r __asm__("$"#n); r; })
-  #define FRAME(f) Frame f = { 0 };                                   \
-                   do {                                               \
-                     unsigned long hi, lo;                            \
-                     register unsigned long pc __asm__("$31");        \
-                     f.mips_regs.uregs[ 0] = MIPSREG( 0);             \
-                     f.mips_regs.uregs[ 1] = MIPSREG( 1);             \
-                     f.mips_regs.uregs[ 2] = MIPSREG( 2);             \
-                     f.mips_regs.uregs[ 3] = MIPSREG( 3);             \
-                     f.mips_regs.uregs[ 4] = MIPSREG( 4);             \
-                     f.mips_regs.uregs[ 5] = MIPSREG( 5);             \
-                     f.mips_regs.uregs[ 6] = MIPSREG( 6);             \
-                     f.mips_regs.uregs[ 7] = MIPSREG( 7);             \
-                     f.mips_regs.uregs[ 8] = MIPSREG( 8);             \
-                     f.mips_regs.uregs[ 9] = MIPSREG( 9);             \
-                     f.mips_regs.uregs[10] = MIPSREG(10);             \
-                     f.mips_regs.uregs[11] = MIPSREG(11);             \
-                     f.mips_regs.uregs[12] = MIPSREG(12);             \
-                     f.mips_regs.uregs[13] = MIPSREG(13);             \
-                     f.mips_regs.uregs[14] = MIPSREG(14);             \
-                     f.mips_regs.uregs[15] = MIPSREG(15);             \
-                     f.mips_regs.uregs[16] = MIPSREG(16);             \
-                     f.mips_regs.uregs[17] = MIPSREG(17);             \
-                     f.mips_regs.uregs[18] = MIPSREG(18);             \
-                     f.mips_regs.uregs[19] = MIPSREG(19);             \
-                     f.mips_regs.uregs[20] = MIPSREG(20);             \
-                     f.mips_regs.uregs[21] = MIPSREG(21);             \
-                     f.mips_regs.uregs[22] = MIPSREG(22);             \
-                     f.mips_regs.uregs[23] = MIPSREG(23);             \
-                     f.mips_regs.uregs[24] = MIPSREG(24);             \
-                     f.mips_regs.uregs[25] = MIPSREG(25);             \
-                     f.mips_regs.uregs[26] = MIPSREG(26);             \
-                     f.mips_regs.uregs[27] = MIPSREG(27);             \
-                     f.mips_regs.uregs[28] = MIPSREG(28);             \
-                     f.mips_regs.uregs[29] = MIPSREG(29);             \
-                     f.mips_regs.uregs[30] = MIPSREG(30);             \
-                     f.mips_regs.uregs[31] = MIPSREG(31);             \
-                     __asm__ volatile ("mfhi %0" : "=r"(hi));         \
-                     __asm__ volatile ("mflo %0" : "=r"(lo));         \
-                     __asm__ volatile ("jal 1f; 1:nop" : "=r"(pc));   \
-                     f.mips_regs.hi       = hi;                       \
-                     f.mips_regs.lo       = lo;                       \
-                     f.mips_regs.cp0_epc  = pc;                       \
-                     f.errno_             = errno;                    \
-                     f.tid                = sys_gettid();             \
-                   } while (0)
-  #define SET_FRAME(f,r)                                              \
-                   do {                                               \
-                     errno       = (f).errno_;                        \
-                     memcpy((r).uregs, (f).mips_regs.uregs,           \
-                            32*sizeof(unsigned long));                \
-                     (r).hi      = (f).mips_regs.hi;                  \
-                     (r).lo      = (f).mips_regs.lo;                  \
-                     (r).cp0_epc = (f).mips_regs.cp0_epc;             \
-                   } while (0)
-#else
-  /* If we do not have a hand-optimized assembly version of the FRAME()
-   * macro, we cannot reliably unroll the stack. So, we show a few additional
-   * stack frames for the coredumper.
-   */
-  typedef struct Frame {
-    pid_t tid;
-  } Frame;
-  #define FRAME(f) Frame f; do { f.tid = sys_gettid(); } while (0)
-  #define SET_FRAME(f,r) do { } while (0)
-#endif
-
-
-/* Internal function for generating a core file. This API can change without
- * notice and is only supposed to be used internally by the core dumper.
- *
- * This function works for both single- and multi-threaded core
- * dumps. If called as
- *
- *   FRAME(frame);
- *   InternalGetCoreDump(&frame, 0, NULL, ap);
- *
- * it creates a core file that only contains information about the
- * calling thread.
- *
- * Optionally, the caller can provide information about other threads
- * by passing their process ids in "thread_pids". The process id of
- * the caller should not be included in this array. All of the threads
- * must have been attached to with ptrace(), prior to calling this
- * function. They will be detached when "InternalGetCoreDump()" returns.
- *
- * This function either returns a file handle that can be read for obtaining
- * a core dump, or "-1" in case of an error. In the latter case, "errno"
- * will be set appropriately.
- *
- * While "InternalGetCoreDump()" is not technically async signal safe, you
- * might be tempted to invoke it from a signal handler. The code goes to
- * great lengths to make a best effort that this will actually work. But in
- * any case, you must make sure that you preserve the value of "errno"
- * yourself. It is guaranteed to be clobbered otherwise.
- *
- * Also, "InternalGetCoreDump" is not strictly speaking re-entrant. Again,
- * it makes a best effort to behave reasonably when called in a multi-
- * threaded environment, but it is ultimately the caller's responsibility
- * to provide locking.
- */
-int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids,
-                        va_list ap
-                     /* const struct CoreDumpParameters *params,
-                        const char *file_name,
-                        const char *PATH
-                      */);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* _ELFCORE_H */
diff --git a/contrib/libtcmalloc/src/base/googleinit.h b/contrib/libtcmalloc/src/base/googleinit.h
deleted file mode 100644
index 3ea411a325a..00000000000
--- a/contrib/libtcmalloc/src/base/googleinit.h
+++ /dev/null
@@ -1,74 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Jacob Hoffman-Andrews
-
-#ifndef _GOOGLEINIT_H
-#define _GOOGLEINIT_H
-
-#include "base/logging.h"
-
-class GoogleInitializer {
- public:
-  typedef void (*VoidFunction)(void);
-  GoogleInitializer(const char* name, VoidFunction ctor, VoidFunction dtor)
-      : name_(name), destructor_(dtor) {
-    RAW_VLOG(10, "<GoogleModuleObject> constructing: %s\n", name_);
-    if (ctor)
-      ctor();
-  }
-  ~GoogleInitializer() {
-    RAW_VLOG(10, "<GoogleModuleObject> destroying: %s\n", name_);
-    if (destructor_)
-      destructor_();
-  }
-
- private:
-  const char* const name_;
-  const VoidFunction destructor_;
-};
-
-#define REGISTER_MODULE_INITIALIZER(name, body)                 \
-  namespace {                                                   \
-    static void google_init_module_##name () { body; }          \
-    GoogleInitializer google_initializer_module_##name(#name,   \
-            google_init_module_##name, NULL);                   \
-  }
-
-#define REGISTER_MODULE_DESTRUCTOR(name, body)                  \
-  namespace {                                                   \
-    static void google_destruct_module_##name () { body; }      \
-    GoogleInitializer google_destructor_module_##name(#name,    \
-            NULL, google_destruct_module_##name);               \
-  }
-
-
-#endif /* _GOOGLEINIT_H */
diff --git a/contrib/libtcmalloc/src/base/linux_syscall_support.h b/contrib/libtcmalloc/src/base/linux_syscall_support.h
deleted file mode 100644
index 6a94dc3fc72..00000000000
--- a/contrib/libtcmalloc/src/base/linux_syscall_support.h
+++ /dev/null
@@ -1,2880 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005-2008, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-/* This file includes Linux-specific support functions common to the
- * coredumper and the thread lister; primarily, this is a collection
- * of direct system calls, and a couple of symbols missing from
- * standard header files.
- * There are a few options that the including file can set to control
- * the behavior of this file:
- *
- * SYS_CPLUSPLUS:
- *   The entire header file will normally be wrapped in 'extern "C" { }",
- *   making it suitable for compilation as both C and C++ source. If you
- *   do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit
- *   the wrapping. N.B. doing so will suppress inclusion of all prerequisite
- *   system header files, too. It is the caller's responsibility to provide
- *   the necessary definitions.
- *
- * SYS_ERRNO:
- *   All system calls will update "errno" unless overriden by setting the
- *   SYS_ERRNO macro prior to including this file. SYS_ERRNO should be
- *   an l-value.
- *
- * SYS_INLINE:
- *   New symbols will be defined "static inline", unless overridden by
- *   the SYS_INLINE macro.
- *
- * SYS_LINUX_SYSCALL_SUPPORT_H
- *   This macro is used to avoid multiple inclusions of this header file.
- *   If you need to include this file more than once, make sure to
- *   unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion.
- *
- * SYS_PREFIX:
- *   New system calls will have a prefix of "sys_" unless overridden by
- *   the SYS_PREFIX macro. Valid values for this macro are [0..9] which
- *   results in prefixes "sys[0..9]_". It is also possible to set this
- *   macro to -1, which avoids all prefixes.
- *
- * This file defines a few internal symbols that all start with "LSS_".
- * Do not access these symbols from outside this file. They are not part
- * of the supported API.
- *
- * NOTE: This is a stripped down version of the official opensource
- * version of linux_syscall_support.h, which lives at
- *    http://code.google.com/p/linux-syscall-support/
- * It includes only the syscalls that are used in perftools, plus a
- * few extra.  Here's the breakdown:
- * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u
- *      sys__exit(
- *      sys_clone(
- *      sys_close(
- *      sys_fcntl(
- *      sys_fstat(
- *      sys_futex(
- *      sys_getcpu(
- *      sys_getdents64(
- *      sys_getppid(
- *      sys_gettid(
- *      sys_lseek(
- *      sys_mmap(
- *      sys_mremap(
- *      sys_munmap(
- *      sys_open(
- *      sys_pipe(
- *      sys_prctl(
- *      sys_ptrace(
- *      sys_ptrace_detach(
- *      sys_read(
- *      sys_sched_yield(
- *      sys_sigaction(
- *      sys_sigaltstack(
- *      sys_sigdelset(
- *      sys_sigfillset(
- *      sys_sigprocmask(
- *      sys_socket(
- *      sys_stat(
- *      sys_waitpid(
- * 2) These are used as subroutines of the above:
- *      sys_getpid       -- gettid
- *      sys_kill         -- ptrace_detach
- *      sys_restore      -- sigaction
- *      sys_restore_rt   -- sigaction
- *      sys_socketcall   -- socket
- *      sys_wait4        -- waitpid
- * 3) I left these in even though they're not used.  They either
- * complement the above (write vs read) or are variants (rt_sigaction):
- *      sys_fstat64
- *      sys_llseek
- *      sys_mmap2
- *      sys_openat
- *      sys_getdents
- *      sys_rt_sigaction
- *      sys_rt_sigprocmask
- *      sys_sigaddset
- *      sys_sigemptyset
- *      sys_stat64
- *      sys_write
- */
-#ifndef SYS_LINUX_SYSCALL_SUPPORT_H
-#define SYS_LINUX_SYSCALL_SUPPORT_H
-
-/* We currently only support x86-32, x86-64, ARM, MIPS, PPC/PPC64, Aarch64, s390 and s390x
- * on Linux.
- * Porting to other related platforms should not be difficult.
- */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-     defined(__mips__) || defined(__PPC__) || \
-     defined(__aarch64__) || defined(__s390__)) \
-  && (defined(__linux))
-
-#ifndef SYS_CPLUSPLUS
-#ifdef __cplusplus
-/* Some system header files in older versions of gcc neglect to properly
- * handle being included from C++. As it appears to be harmless to have
- * multiple nested 'extern "C"' blocks, just add another one here.
- */
-extern "C" {
-#endif
-
-#include <errno.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/ptrace.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <syscall.h>
-#include <unistd.h>
-#include <linux/unistd.h>
-#include <endian.h>
-
-#ifdef __mips__
-/* Include definitions of the ABI currently in use.                          */
-#include <sgidefs.h>
-#endif
-
-#endif
-
-/* As glibc often provides subtly incompatible data structures (and implicit
- * wrapper functions that convert them), we provide our own kernel data
- * structures for use by the system calls.
- * These structures have been developed by using Linux 2.6.23 headers for
- * reference. Note though, we do not care about exact API compatibility
- * with the kernel, and in fact the kernel often does not have a single
- * API that works across architectures. Instead, we try to mimic the glibc
- * API where reasonable, and only guarantee ABI compatibility with the
- * kernel headers.
- * Most notably, here are a few changes that were made to the structures
- * defined by kernel headers:
- *
- * - we only define structures, but not symbolic names for kernel data
- *   types. For the latter, we directly use the native C datatype
- *   (i.e. "unsigned" instead of "mode_t").
- * - in a few cases, it is possible to define identical structures for
- *   both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by
- *   standardizing on the 64bit version of the data types. In particular,
- *   this means that we use "unsigned" where the 32bit headers say
- *   "unsigned long".
- * - overall, we try to minimize the number of cases where we need to
- *   conditionally define different structures.
- * - the "struct kernel_sigaction" class of structures have been
- *   modified to more closely mimic glibc's API by introducing an
- *   anonymous union for the function pointer.
- * - a small number of field names had to have an underscore appended to
- *   them, because glibc defines a global macro by the same name.
- */
-
-/* include/linux/dirent.h                                                    */
-struct kernel_dirent64 {
-  unsigned long long d_ino;
-  long long          d_off;
-  unsigned short     d_reclen;
-  unsigned char      d_type;
-  char               d_name[256];
-};
-
-/* include/linux/dirent.h                                                    */
-struct kernel_dirent {
-  long               d_ino;
-  long               d_off;
-  unsigned short     d_reclen;
-  char               d_name[256];
-};
-
-/* include/linux/time.h                                                      */
-struct kernel_timespec {
-  long               tv_sec;
-  long               tv_nsec;
-};
-
-/* include/linux/time.h                                                      */
-struct kernel_timeval {
-  long               tv_sec;
-  long               tv_usec;
-};
-
-/* include/linux/resource.h                                                  */
-struct kernel_rusage {
-  struct kernel_timeval ru_utime;
-  struct kernel_timeval ru_stime;
-  long               ru_maxrss;
-  long               ru_ixrss;
-  long               ru_idrss;
-  long               ru_isrss;
-  long               ru_minflt;
-  long               ru_majflt;
-  long               ru_nswap;
-  long               ru_inblock;
-  long               ru_oublock;
-  long               ru_msgsnd;
-  long               ru_msgrcv;
-  long               ru_nsignals;
-  long               ru_nvcsw;
-  long               ru_nivcsw;
-};
-
-#if defined(__i386__) || defined(__arm__) \
-  || defined(__PPC__) || (defined(__s390__) && !defined(__s390x__))
-
-/* include/asm-{arm,i386,mips,ppc}/signal.h                                  */
-struct kernel_old_sigaction {
-  union {
-    void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, siginfo_t *, void *);
-  };
-  unsigned long      sa_mask;
-  unsigned long      sa_flags;
-  void               (*sa_restorer)(void);
-} __attribute__((packed,aligned(4)));
-#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
-  #define kernel_old_sigaction kernel_sigaction
-#elif defined(__aarch64__)
-  // No kernel_old_sigaction defined for arm64.
-#endif
-
-/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the
- * exactly match the size of the signal set, even though the API was
- * intended to be extensible. We define our own KERNEL_NSIG to deal with
- * this.
- * Please note that glibc provides signals [1.._NSIG-1], whereas the
- * kernel (and this header) provides the range [1..KERNEL_NSIG]. The
- * actual number of signals is obviously the same, but the constants
- * differ by one.
- */
-#ifdef __mips__
-#define KERNEL_NSIG 128
-#else
-#define KERNEL_NSIG  64
-#endif
-
-/* include/asm-{arm,i386,mips,x86_64}/signal.h                               */
-struct kernel_sigset_t {
-  unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/
-                    (8*sizeof(unsigned long))];
-};
-
-/* include/asm-{arm,generic,i386,mips,x86_64,ppc}/signal.h                   */
-struct kernel_sigaction {
-#ifdef __mips__
-  unsigned long      sa_flags;
-  union {
-    void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, siginfo_t *, void *);
-  };
-  struct kernel_sigset_t sa_mask;
-#else
-  union {
-    void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, siginfo_t *, void *);
-  };
-  unsigned long      sa_flags;
-  void               (*sa_restorer)(void);
-  struct kernel_sigset_t sa_mask;
-#endif
-};
-
-/* include/asm-{arm,i386,mips,ppc,s390}/stat.h                               */
-#ifdef __mips__
-#if _MIPS_SIM == _MIPS_SIM_ABI64
-struct kernel_stat {
-#else
-struct kernel_stat64 {
-#endif
-  unsigned           st_dev;
-  unsigned           __pad0[3];
-  unsigned long long st_ino;
-  unsigned           st_mode;
-  unsigned           st_nlink;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  unsigned           st_rdev;
-  unsigned           __pad1[3];
-  long long          st_size;
-  unsigned           st_atime_;
-  unsigned           st_atime_nsec_;
-  unsigned           st_mtime_;
-  unsigned           st_mtime_nsec_;
-  unsigned           st_ctime_;
-  unsigned           st_ctime_nsec_;
-  unsigned           st_blksize;
-  unsigned           __pad2;
-  unsigned long long st_blocks;
-};
-#elif defined __PPC__
-struct kernel_stat64 {
-  unsigned long long st_dev;
-  unsigned long long st_ino;
-  unsigned           st_nlink;
-  unsigned           st_mode;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  int                __pad2;
-  unsigned long long st_rdev;
-  long long          st_size;
-  long long          st_blksize;
-  long long          st_blocks;
-  kernel_timespec    st_atim;
-  kernel_timespec    st_mtim;
-  kernel_timespec    st_ctim;
-  unsigned long      __unused4;
-  unsigned long      __unused5;
-  unsigned long      __unused6;
-};
-#else
-struct kernel_stat64 {
-  unsigned long long st_dev;
-  unsigned char      __pad0[4];
-  unsigned           __st_ino;
-  unsigned           st_mode;
-  unsigned           st_nlink;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  unsigned long long st_rdev;
-  unsigned char      __pad3[4];
-  long long          st_size;
-  unsigned           st_blksize;
-  unsigned long long st_blocks;
-  unsigned           st_atime_;
-  unsigned           st_atime_nsec_;
-  unsigned           st_mtime_;
-  unsigned           st_mtime_nsec_;
-  unsigned           st_ctime_;
-  unsigned           st_ctime_nsec_;
-  unsigned long long st_ino;
-};
-#endif
-
-/* include/asm-{arm,generic,i386,mips,x86_64,ppc,s390}/stat.h                     */
-#if defined(__i386__) || defined(__arm__)
-struct kernel_stat {
-  /* The kernel headers suggest that st_dev and st_rdev should be 32bit
-   * quantities encoding 12bit major and 20bit minor numbers in an interleaved
-   * format. In reality, we do not see useful data in the top bits. So,
-   * we'll leave the padding in here, until we find a better solution.
-   */
-  unsigned short     st_dev;
-  short              pad1;
-  unsigned           st_ino;
-  unsigned short     st_mode;
-  unsigned short     st_nlink;
-  unsigned short     st_uid;
-  unsigned short     st_gid;
-  unsigned short     st_rdev;
-  short              pad2;
-  unsigned           st_size;
-  unsigned           st_blksize;
-  unsigned           st_blocks;
-  unsigned           st_atime_;
-  unsigned           st_atime_nsec_;
-  unsigned           st_mtime_;
-  unsigned           st_mtime_nsec_;
-  unsigned           st_ctime_;
-  unsigned           st_ctime_nsec_;
-  unsigned           __unused4;
-  unsigned           __unused5;
-};
-#elif defined(__x86_64__)
-struct kernel_stat {
-  uint64_t           st_dev;
-  uint64_t           st_ino;
-  uint64_t           st_nlink;
-  unsigned           st_mode;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  unsigned           __pad0;
-  uint64_t           st_rdev;
-  int64_t            st_size;
-  int64_t            st_blksize;
-  int64_t            st_blocks;
-  uint64_t           st_atime_;
-  uint64_t           st_atime_nsec_;
-  uint64_t           st_mtime_;
-  uint64_t           st_mtime_nsec_;
-  uint64_t           st_ctime_;
-  uint64_t           st_ctime_nsec_;
-  int64_t            __unused[3];
-};
-#elif defined(__PPC__)
-struct kernel_stat {
-  unsigned long long st_dev;
-  unsigned long      st_ino;
-  unsigned long      st_nlink;
-  unsigned long      st_mode;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  int                __pad2;
-  unsigned long long st_rdev;
-  long               st_size;
-  unsigned long      st_blksize;
-  unsigned long      st_blocks;
-  kernel_timespec    st_atim;
-  kernel_timespec    st_mtim;
-  kernel_timespec    st_ctim;
-  unsigned long      __unused4;
-  unsigned long      __unused5;
-  unsigned long      __unused6;
-};
-#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
-struct kernel_stat {
-  unsigned           st_dev;
-  int                st_pad1[3];
-  unsigned           st_ino;
-  unsigned           st_mode;
-  unsigned           st_nlink;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  unsigned           st_rdev;
-  int                st_pad2[2];
-  long               st_size;
-  int                st_pad3;
-  long               st_atime_;
-  long               st_atime_nsec_;
-  long               st_mtime_;
-  long               st_mtime_nsec_;
-  long               st_ctime_;
-  long               st_ctime_nsec_;
-  int                st_blksize;
-  int                st_blocks;
-  int                st_pad4[14];
-};
-#elif defined(__aarch64__)
-struct kernel_stat {
-  unsigned long      st_dev;
-  unsigned long      st_ino;
-  unsigned int       st_mode;
-  unsigned int       st_nlink;
-  unsigned int       st_uid;
-  unsigned int       st_gid;
-  unsigned long      st_rdev;
-  unsigned long      __pad1;
-  long               st_size;
-  int                st_blksize;
-  int                __pad2;
-  long               st_blocks;
-  long               st_atime_;
-  unsigned long      st_atime_nsec_;
-  long               st_mtime_;
-  unsigned long      st_mtime_nsec_;
-  long               st_ctime_;
-  unsigned long      st_ctime_nsec_;
-  unsigned int       __unused4;
-  unsigned int       __unused5;
-};
-#elif defined(__s390x__)
-struct kernel_stat {
-  unsigned long      st_dev;
-  unsigned long      st_ino;
-  unsigned long      st_nlink;
-  unsigned int       st_mode;
-  unsigned int       st_uid;
-  unsigned int       st_gid;
-  unsigned int       __pad1;
-  unsigned long      st_rdev;
-  unsigned long      st_size;
-  unsigned long      st_atime_;
-  unsigned long      st_atime_nsec_;
-  unsigned long      st_mtime_;
-  unsigned long      st_mtime_nsec_;
-  unsigned long      st_ctime_;
-  unsigned long      st_ctime_nsec_;
-  unsigned long      st_blksize;
-  long               st_blocks;
-  unsigned long      __unused[3];
-};
-#elif defined(__s390__)
-struct kernel_stat {
-  unsigned short     st_dev;
-  unsigned short     __pad1;
-  unsigned long      st_ino;
-  unsigned short     st_mode;
-  unsigned short     st_nlink;
-  unsigned short     st_uid;
-  unsigned short     st_gid;
-  unsigned short     st_rdev;
-  unsigned short     __pad2;
-  unsigned long      st_size;
-  unsigned long      st_blksize;
-  unsigned long      st_blocks;
-  unsigned long      st_atime_;
-  unsigned long      st_atime_nsec_;
-  unsigned long      st_mtime_;
-  unsigned long      st_mtime_nsec_;
-  unsigned long      st_ctime_;
-  unsigned long      st_ctime_nsec_;
-  unsigned long      __unused4;
-  unsigned long      __unused5;
-};
-#endif
-
-
-/* Definitions missing from the standard header files                        */
-#ifndef O_DIRECTORY
-#if defined(__arm__)
-#define O_DIRECTORY             0040000
-#else
-#define O_DIRECTORY             0200000
-#endif
-#endif
-#ifndef PR_GET_DUMPABLE
-#define PR_GET_DUMPABLE         3
-#endif
-#ifndef PR_SET_DUMPABLE
-#define PR_SET_DUMPABLE         4
-#endif
-#ifndef AT_FDCWD
-#define AT_FDCWD                (-100)
-#endif
-#ifndef AT_SYMLINK_NOFOLLOW
-#define AT_SYMLINK_NOFOLLOW     0x100
-#endif
-#ifndef AT_REMOVEDIR
-#define AT_REMOVEDIR            0x200
-#endif
-#ifndef MREMAP_FIXED
-#define MREMAP_FIXED            2
-#endif
-#ifndef SA_RESTORER
-#define SA_RESTORER             0x04000000
-#endif
-
-#if defined(__i386__)
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       174
-#define __NR_rt_sigprocmask     175
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64             195
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64            197
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         220
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             224
-#endif
-#ifndef __NR_futex
-#define __NR_futex              240
-#endif
-#ifndef __NR_openat
-#define __NR_openat             295
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             318
-#endif
-/* End of i386 definitions                                                   */
-#elif defined(__arm__)
-#ifndef __syscall
-#if defined(__thumb__) || defined(__ARM_EABI__)
-#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name;
-#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs
-#define __syscall(name) "swi\t0"
-#define __syscall_safe(name)                     \
-  "push  {r7}\n"                                 \
-  "mov   r7,%[sysreg]\n"                         \
-  __syscall(name)"\n"                            \
-  "pop   {r7}"
-#else
-#define __SYS_REG(name)
-#define __SYS_REG_LIST(regs...) regs
-#define __syscall(name) "swi\t" __sys1(__NR_##name) ""
-#define __syscall_safe(name) __syscall(name)
-#endif
-#endif
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       (__NR_SYSCALL_BASE + 174)
-#define __NR_rt_sigprocmask     (__NR_SYSCALL_BASE + 175)
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64             (__NR_SYSCALL_BASE + 195)
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64            (__NR_SYSCALL_BASE + 197)
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         (__NR_SYSCALL_BASE + 217)
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             (__NR_SYSCALL_BASE + 224)
-#endif
-#ifndef __NR_futex
-#define __NR_futex              (__NR_SYSCALL_BASE + 240)
-#endif
-/* End of ARM definitions                                                  */
-#elif defined(__x86_64__)
-#ifndef __NR_gettid
-#define __NR_gettid             186
-#endif
-#ifndef __NR_futex
-#define __NR_futex              202
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         217
-#endif
-#ifndef __NR_openat
-#define __NR_openat             257
-#endif
-/* End of x86-64 definitions                                                 */
-#elif defined(__mips__)
-#if _MIPS_SIM == _MIPS_SIM_ABI32
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       (__NR_Linux + 194)
-#define __NR_rt_sigprocmask     (__NR_Linux + 195)
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64             (__NR_Linux + 213)
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64            (__NR_Linux + 215)
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         (__NR_Linux + 219)
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             (__NR_Linux + 222)
-#endif
-#ifndef __NR_futex
-#define __NR_futex              (__NR_Linux + 238)
-#endif
-#ifndef __NR_openat
-#define __NR_openat             (__NR_Linux + 288)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat            (__NR_Linux + 293)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             (__NR_Linux + 312)
-#endif
-/* End of MIPS (old 32bit API) definitions */
-#elif  _MIPS_SIM == _MIPS_SIM_ABI64
-#ifndef __NR_gettid
-#define __NR_gettid             (__NR_Linux + 178)
-#endif
-#ifndef __NR_futex
-#define __NR_futex              (__NR_Linux + 194)
-#endif
-#ifndef __NR_openat
-#define __NR_openat             (__NR_Linux + 247)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat            (__NR_Linux + 252)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             (__NR_Linux + 271)
-#endif
-/* End of MIPS (64bit API) definitions */
-#else
-#ifndef __NR_gettid
-#define __NR_gettid             (__NR_Linux + 178)
-#endif
-#ifndef __NR_futex
-#define __NR_futex              (__NR_Linux + 194)
-#endif
-#ifndef __NR_openat
-#define __NR_openat             (__NR_Linux + 251)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat            (__NR_Linux + 256)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             (__NR_Linux + 275)
-#endif
-/* End of MIPS (new 32bit API) definitions                                   */
-#endif
-/* End of MIPS definitions                                                   */
-#elif defined(__PPC__)
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       173
-#define __NR_rt_sigprocmask     174
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64             195
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64            197
-#endif
-#ifndef __NR_socket
-#define __NR_socket             198
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         202
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             207
-#endif
-#ifndef __NR_futex
-#define __NR_futex              221
-#endif
-#ifndef __NR_openat
-#define __NR_openat             286
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             302
-#endif
-/* End of powerpc defininitions                                              */
-#elif defined(__aarch64__)
-#ifndef __NR_fstatat
-#define __NR_fstatat             79
-#endif
-/* End of aarch64 defininitions                                              */
-#elif defined(__s390__)
-#ifndef __NR_quotactl
-#define __NR_quotactl           131
-#endif
-#ifndef __NR_rt_sigreturn
-#define __NR_rt_sigreturn       173
-#endif
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       174
-#endif
-#ifndef __NR_rt_sigprocmask
-#define __NR_rt_sigprocmask     175
-#endif
-#ifndef __NR_rt_sigpending
-#define __NR_rt_sigpending      176
-#endif
-#ifndef __NR_rt_sigsuspend
-#define __NR_rt_sigsuspend      179
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64            180
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64           181
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         220
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead          222
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           224
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          225
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           227
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          228
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          230
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         231
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             236
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill              237
-#endif
-#ifndef __NR_futex
-#define __NR_futex              238
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  239
-#endif
-#ifndef __NR_sched_getaffinity
-#define __NR_sched_getaffinity  240
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    252
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      260
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       261
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64           265
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64          266
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         282
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         283
-#endif
-#ifndef __NR_openat
-#define __NR_openat             288
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat           294
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         310
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             311
-#endif
-#ifndef __NR_fallocate
-#define __NR_fallocate          314
-#endif
-/* Some syscalls are named/numbered differently between s390 and s390x. */
-#ifdef __s390x__
-# ifndef __NR_getrlimit
-# define __NR_getrlimit          191
-# endif
-# ifndef __NR_setresuid
-# define __NR_setresuid          208
-# endif
-# ifndef __NR_getresuid
-# define __NR_getresuid          209
-# endif
-# ifndef __NR_setresgid
-# define __NR_setresgid          210
-# endif
-# ifndef __NR_getresgid
-# define __NR_getresgid          211
-# endif
-# ifndef __NR_setfsuid
-# define __NR_setfsuid           215
-# endif
-# ifndef __NR_setfsgid
-# define __NR_setfsgid           216
-# endif
-# ifndef __NR_fadvise64
-# define __NR_fadvise64          253
-# endif
-# ifndef __NR_newfstatat
-# define __NR_newfstatat         293
-# endif
-#else /* __s390x__ */
-# ifndef __NR_getrlimit
-# define __NR_getrlimit          76
-# endif
-# ifndef __NR_setfsuid
-# define __NR_setfsuid           138
-# endif
-# ifndef __NR_setfsgid
-# define __NR_setfsgid           139
-# endif
-# ifndef __NR_setresuid
-# define __NR_setresuid          164
-# endif
-# ifndef __NR_getresuid
-# define __NR_getresuid          165
-# endif
-# ifndef __NR_setresgid
-# define __NR_setresgid          170
-# endif
-# ifndef __NR_getresgid
-# define __NR_getresgid          171
-# endif
-# ifndef __NR_ugetrlimit
-# define __NR_ugetrlimit         191
-# endif
-# ifndef __NR_mmap2
-# define __NR_mmap2              192
-# endif
-# ifndef __NR_setresuid32
-# define __NR_setresuid32        208
-# endif
-# ifndef __NR_getresuid32
-# define __NR_getresuid32        209
-# endif
-# ifndef __NR_setresgid32
-# define __NR_setresgid32        210
-# endif
-# ifndef __NR_getresgid32
-# define __NR_getresgid32        211
-# endif
-# ifndef __NR_setfsuid32
-# define __NR_setfsuid32         215
-# endif
-# ifndef __NR_setfsgid32
-# define __NR_setfsgid32         216
-# endif
-# ifndef __NR_fadvise64_64
-# define __NR_fadvise64_64       264
-# endif
-# ifndef __NR_fstatat64
-# define __NR_fstatat64          293
-# endif
-#endif /* __s390__ */
-/* End of s390/s390x definitions                                             */
-#endif
-
-
-/* After forking, we must make sure to only call system calls.               */
-#if __BOUNDED_POINTERS__
-  #error "Need to port invocations of syscalls for bounded ptrs"
-#else
-  /* The core dumper and the thread lister get executed after threads
-   * have been suspended. As a consequence, we cannot call any functions
-   * that acquire locks. Unfortunately, libc wraps most system calls
-   * (e.g. in order to implement pthread_atfork, and to make calls
-   * cancellable), which means we cannot call these functions. Instead,
-   * we have to call syscall() directly.
-   */
-  #undef LSS_ERRNO
-  #ifdef SYS_ERRNO
-    /* Allow the including file to override the location of errno. This can
-     * be useful when using clone() with the CLONE_VM option.
-     */
-    #define LSS_ERRNO SYS_ERRNO
-  #else
-    #define LSS_ERRNO errno
-  #endif
-
-  #undef LSS_INLINE
-  #ifdef SYS_INLINE
-    #define LSS_INLINE SYS_INLINE
-  #else
-    #define LSS_INLINE static inline
-  #endif
-
-  /* Allow the including file to override the prefix used for all new
-   * system calls. By default, it will be set to "sys_".
-   */
-  #undef LSS_NAME
-  #ifndef SYS_PREFIX
-    #define LSS_NAME(name) sys_##name
-  #elif SYS_PREFIX < 0
-    #define LSS_NAME(name) name
-  #elif SYS_PREFIX == 0
-    #define LSS_NAME(name) sys0_##name
-  #elif SYS_PREFIX == 1
-    #define LSS_NAME(name) sys1_##name
-  #elif SYS_PREFIX == 2
-    #define LSS_NAME(name) sys2_##name
-  #elif SYS_PREFIX == 3
-    #define LSS_NAME(name) sys3_##name
-  #elif SYS_PREFIX == 4
-    #define LSS_NAME(name) sys4_##name
-  #elif SYS_PREFIX == 5
-    #define LSS_NAME(name) sys5_##name
-  #elif SYS_PREFIX == 6
-    #define LSS_NAME(name) sys6_##name
-  #elif SYS_PREFIX == 7
-    #define LSS_NAME(name) sys7_##name
-  #elif SYS_PREFIX == 8
-    #define LSS_NAME(name) sys8_##name
-  #elif SYS_PREFIX == 9
-    #define LSS_NAME(name) sys9_##name
-  #endif
-
-  #undef  LSS_RETURN
-  #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) ||        \
-       defined(__aarch64__) || defined(__s390__))
-  /* Failing system calls return a negative result in the range of
-   * -1..-4095. These are "errno" values with the sign inverted.
-   */
-  #define LSS_RETURN(type, res)                                               \
-    do {                                                                      \
-      if ((unsigned long)(res) >= (unsigned long)(-4095)) {                   \
-        LSS_ERRNO = -(res);                                                   \
-        res = -1;                                                             \
-      }                                                                       \
-      return (type) (res);                                                    \
-    } while (0)
-  #elif defined(__mips__)
-  /* On MIPS, failing system calls return -1, and set errno in a
-   * separate CPU register.
-   */
-  #define LSS_RETURN(type, res, err)                                          \
-    do {                                                                      \
-      if (err) {                                                              \
-        LSS_ERRNO = (res);                                                    \
-        res = -1;                                                             \
-      }                                                                       \
-      return (type) (res);                                                    \
-    } while (0)
-  #elif defined(__PPC__)
-  /* On PPC, failing system calls return -1, and set errno in a
-   * separate CPU register. See linux/unistd.h.
-   */
-  #define LSS_RETURN(type, res, err)                                          \
-   do {                                                                       \
-     if (err & 0x10000000 ) {                                                 \
-       LSS_ERRNO = (res);                                                     \
-       res = -1;                                                              \
-     }                                                                        \
-     return (type) (res);                                                     \
-   } while (0)
-  #endif
-  #if defined(__i386__)
-    #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404)
-      /* This only works for GCC-4.4 and above -- the first version to use
-         .cfi directives for dwarf unwind info.  */
-      #define CFI_ADJUST_CFA_OFFSET(adjust)                                   \
-                  ".cfi_adjust_cfa_offset " #adjust "\n"
-    #else
-      #define CFI_ADJUST_CFA_OFFSET(adjust) /**/
-    #endif
-
-    /* In PIC mode (e.g. when building shared libraries), gcc for i386
-     * reserves ebx. Unfortunately, most distribution ship with implementations
-     * of _syscallX() which clobber ebx.
-     * Also, most definitions of _syscallX() neglect to mark "memory" as being
-     * clobbered. This causes problems with compilers, that do a better job
-     * at optimizing across __asm__ calls.
-     * So, we just have to redefine all of the _syscallX() macros.
-     */
-    #undef  LSS_BODY
-    #define LSS_BODY(type,args...)                                            \
-      long __res;                                                             \
-      __asm__ __volatile__("push %%ebx\n"                                     \
-                           CFI_ADJUST_CFA_OFFSET(4)                           \
-                           "movl %2,%%ebx\n"                                  \
-                           "int $0x80\n"                                      \
-                           "pop %%ebx\n"                                      \
-                           CFI_ADJUST_CFA_OFFSET(-4)                          \
-                           args                                               \
-                           : "esp", "memory");                                \
-      LSS_RETURN(type,__res)
-    #undef  _syscall0
-    #define _syscall0(type,name)                                              \
-      type LSS_NAME(name)(void) {                                             \
-        long __res;                                                           \
-        __asm__ volatile("int $0x80"                                          \
-                         : "=a" (__res)                                       \
-                         : "0" (__NR_##name)                                  \
-                         : "memory");                                         \
-        LSS_RETURN(type,__res);                                               \
-      }
-    #undef  _syscall1
-    #define _syscall1(type,name,type1,arg1)                                   \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        LSS_BODY(type,                                                        \
-             : "=a" (__res)                                                   \
-             : "0" (__NR_##name), "ri" ((long)(arg1)));                       \
-      }
-    #undef  _syscall2
-    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
-      type LSS_NAME(name)(type1 arg1,type2 arg2) {                            \
-        LSS_BODY(type,                                                        \
-             : "=a" (__res)                                                   \
-             : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2)));    \
-      }
-    #undef  _syscall3
-    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
-      type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) {                 \
-        LSS_BODY(type,                                                        \
-             : "=a" (__res)                                                   \
-             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
-               "d" ((long)(arg3)));                                           \
-      }
-    #undef  _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_BODY(type,                                                        \
-             : "=a" (__res)                                                   \
-             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
-               "d" ((long)(arg3)),"S" ((long)(arg4)));                        \
-      }
-    #undef  _syscall5
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        long __res;                                                           \
-        __asm__ __volatile__("push %%ebx\n"                                   \
-                             "movl %2,%%ebx\n"                                \
-                             "movl %1,%%eax\n"                                \
-                             "int  $0x80\n"                                   \
-                             "pop  %%ebx"                                     \
-                             : "=a" (__res)                                   \
-                             : "i" (__NR_##name), "ri" ((long)(arg1)),        \
-                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
-                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
-                             : "esp", "memory");                              \
-        LSS_RETURN(type,__res);                                               \
-      }
-    #undef  _syscall6
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        long __res;                                                           \
-        struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 };   \
-        __asm__ __volatile__("push %%ebp\n"                                   \
-                             "push %%ebx\n"                                   \
-                             "movl 4(%2),%%ebp\n"                             \
-                             "movl 0(%2), %%ebx\n"                            \
-                             "movl %1,%%eax\n"                                \
-                             "int  $0x80\n"                                   \
-                             "pop  %%ebx\n"                                   \
-                             "pop  %%ebp"                                     \
-                             : "=a" (__res)                                   \
-                             : "i" (__NR_##name),  "0" ((long)(&__s)),        \
-                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
-                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
-                             : "esp", "memory");                              \
-        LSS_RETURN(type,__res);                                               \
-      }
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long __res;
-      __asm__ __volatile__(/* if (fn == NULL)
-                            *   return -EINVAL;
-                            */
-                           "movl   %3,%%ecx\n"
-                           "jecxz  1f\n"
-
-                           /* if (child_stack == NULL)
-                            *   return -EINVAL;
-                            */
-                           "movl   %4,%%ecx\n"
-                           "jecxz  1f\n"
-
-                           /* Set up alignment of the child stack:
-                            * child_stack = (child_stack & ~0xF) - 20;
-                            */
-                           "andl   $-16,%%ecx\n"
-                           "subl   $20,%%ecx\n"
-
-                           /* Push "arg" and "fn" onto the stack that will be
-                            * used by the child.
-                            */
-                           "movl   %6,%%eax\n"
-                           "movl   %%eax,4(%%ecx)\n"
-                           "movl   %3,%%eax\n"
-                           "movl   %%eax,(%%ecx)\n"
-
-                           /* %eax = syscall(%eax = __NR_clone,
-                            *                %ebx = flags,
-                            *                %ecx = child_stack,
-                            *                %edx = parent_tidptr,
-                            *                %esi = newtls,
-                            *                %edi = child_tidptr)
-                            * Also, make sure that %ebx gets preserved as it is
-                            * used in PIC mode.
-                            */
-                           "movl   %8,%%esi\n"
-                           "movl   %7,%%edx\n"
-                           "movl   %5,%%eax\n"
-                           "movl   %9,%%edi\n"
-                           "pushl  %%ebx\n"
-                           "movl   %%eax,%%ebx\n"
-                           "movl   %2,%%eax\n"
-                           "int    $0x80\n"
-
-                           /* In the parent: restore %ebx
-                            * In the child:  move "fn" into %ebx
-                            */
-                           "popl   %%ebx\n"
-
-                           /* if (%eax != 0)
-                            *   return %eax;
-                            */
-                           "test   %%eax,%%eax\n"
-                           "jnz    1f\n"
-
-                           /* In the child, now. Terminate frame pointer chain.
-                            */
-                           "movl   $0,%%ebp\n"
-
-                           /* Call "fn". "arg" is already on the stack.
-                            */
-                           "call   *%%ebx\n"
-
-                           /* Call _exit(%ebx). Unfortunately older versions
-                            * of gcc restrict the number of arguments that can
-                            * be passed to asm(). So, we need to hard-code the
-                            * system call number.
-                            */
-                           "movl   %%eax,%%ebx\n"
-                           "movl   $1,%%eax\n"
-                           "int    $0x80\n"
-
-                           /* Return to parent.
-                            */
-                         "1:\n"
-                           : "=a" (__res)
-                           : "0"(-EINVAL), "i"(__NR_clone),
-                             "m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
-                             "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
-                           : "esp", "memory", "ecx", "edx", "esi", "edi");
-      LSS_RETURN(int, __res);
-    }
-
-    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
-      /* On i386, the kernel does not know how to return from a signal
-       * handler. Instead, it relies on user space to provide a
-       * restorer function that calls the {rt_,}sigreturn() system call.
-       * Unfortunately, we cannot just reference the glibc version of this
-       * function, as glibc goes out of its way to make it inaccessible.
-       */
-      void (*res)(void);
-      __asm__ __volatile__("call   2f\n"
-                         "0:.align 16\n"
-                         "1:movl   %1,%%eax\n"
-                           "int    $0x80\n"
-                         "2:popl   %0\n"
-                           "addl   $(1b-0b),%0\n"
-                           : "=a" (res)
-                           : "i"  (__NR_rt_sigreturn));
-      return res;
-    }
-    LSS_INLINE void (*LSS_NAME(restore)(void))(void) {
-      /* On i386, the kernel does not know how to return from a signal
-       * handler. Instead, it relies on user space to provide a
-       * restorer function that calls the {rt_,}sigreturn() system call.
-       * Unfortunately, we cannot just reference the glibc version of this
-       * function, as glibc goes out of its way to make it inaccessible.
-       */
-      void (*res)(void);
-      __asm__ __volatile__("call   2f\n"
-                         "0:.align 16\n"
-                         "1:pop    %%eax\n"
-                           "movl   %1,%%eax\n"
-                           "int    $0x80\n"
-                         "2:popl   %0\n"
-                           "addl   $(1b-0b),%0\n"
-                           : "=a" (res)
-                           : "i"  (__NR_sigreturn));
-      return res;
-    }
-  #elif defined(__x86_64__)
-    /* There are no known problems with any of the _syscallX() macros
-     * currently shipping for x86_64, but we still need to be able to define
-     * our own version so that we can override the location of the errno
-     * location (e.g. when using the clone() system call with the CLONE_VM
-     * option).
-     */
-    #undef  LSS_ENTRYPOINT
-    #define LSS_ENTRYPOINT "syscall\n"
-
-    /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit.
-     * We need to explicitly cast to an unsigned 64 bit type to avoid implicit
-     * sign extension.  We can't cast pointers directly because those are
-     * 32 bits, and gcc will dump ugly warnings about casting from a pointer
-     * to an integer of a different size.
-     */
-    #undef  LSS_SYSCALL_ARG
-    #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a))
-    #undef  _LSS_RETURN
-    #define _LSS_RETURN(type, res, cast)                                      \
-      do {                                                                    \
-        if ((uint64_t)(res) >= (uint64_t)(-4095)) {                           \
-          LSS_ERRNO = -(res);                                                 \
-          res = -1;                                                           \
-        }                                                                     \
-        return (type)(cast)(res);                                             \
-      } while (0)
-    #undef  LSS_RETURN
-    #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t)
-
-    #undef  _LSS_BODY
-    #define _LSS_BODY(nr, type, name, cast, ...)                              \
-          long long __res;                                                    \
-          __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT                \
-            : "=a" (__res)                                                    \
-            : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__)                 \
-            : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory");                   \
-          _LSS_RETURN(type, __res, cast)
-    #undef  LSS_BODY
-    #define LSS_BODY(nr, type, name, args...) \
-      _LSS_BODY(nr, type, name, uintptr_t, ## args)
-
-    #undef  LSS_BODY_ASM0
-    #undef  LSS_BODY_ASM1
-    #undef  LSS_BODY_ASM2
-    #undef  LSS_BODY_ASM3
-    #undef  LSS_BODY_ASM4
-    #undef  LSS_BODY_ASM5
-    #undef  LSS_BODY_ASM6
-    #define LSS_BODY_ASM0
-    #define LSS_BODY_ASM1 LSS_BODY_ASM0
-    #define LSS_BODY_ASM2 LSS_BODY_ASM1
-    #define LSS_BODY_ASM3 LSS_BODY_ASM2
-    #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;"
-    #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;"
-    #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;"
-
-    #undef  LSS_BODY_CLOBBER0
-    #undef  LSS_BODY_CLOBBER1
-    #undef  LSS_BODY_CLOBBER2
-    #undef  LSS_BODY_CLOBBER3
-    #undef  LSS_BODY_CLOBBER4
-    #undef  LSS_BODY_CLOBBER5
-    #undef  LSS_BODY_CLOBBER6
-    #define LSS_BODY_CLOBBER0
-    #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0
-    #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1
-    #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2
-    #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10",
-    #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8",
-    #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9",
-
-    #undef  LSS_BODY_ARG0
-    #undef  LSS_BODY_ARG1
-    #undef  LSS_BODY_ARG2
-    #undef  LSS_BODY_ARG3
-    #undef  LSS_BODY_ARG4
-    #undef  LSS_BODY_ARG5
-    #undef  LSS_BODY_ARG6
-    #define LSS_BODY_ARG0()
-    #define LSS_BODY_ARG1(arg1) \
-      LSS_BODY_ARG0(), "D" (arg1)
-    #define LSS_BODY_ARG2(arg1, arg2) \
-      LSS_BODY_ARG1(arg1), "S" (arg2)
-    #define LSS_BODY_ARG3(arg1, arg2, arg3) \
-      LSS_BODY_ARG2(arg1, arg2), "d" (arg3)
-    #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \
-      LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4)
-    #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \
-      LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5)
-    #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \
-      LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6)
-
-    #undef _syscall0
-    #define _syscall0(type,name)                                              \
-      type LSS_NAME(name)() {                                                 \
-        LSS_BODY(0, type, name);                                              \
-      }
-    #undef _syscall1
-    #define _syscall1(type,name,type1,arg1)                                   \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1));                       \
-      }
-    #undef _syscall2
-    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\
-      }
-    #undef _syscall3
-    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
-                                LSS_SYSCALL_ARG(arg3));                       \
-      }
-    #undef _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
-                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\
-      }
-    #undef _syscall5
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
-                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
-                                LSS_SYSCALL_ARG(arg5));                       \
-      }
-    #undef _syscall6
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
-                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
-                                LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\
-      }
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long long __res;
-      {
-        __asm__ __volatile__(/* if (fn == NULL)
-                              *   return -EINVAL;
-                              */
-                             "testq  %4,%4\n"
-                             "jz     1f\n"
-
-                             /* if (child_stack == NULL)
-                              *   return -EINVAL;
-                              */
-                             "testq  %5,%5\n"
-                             "jz     1f\n"
-
-                             /* Set up alignment of the child stack:
-                              * child_stack = (child_stack & ~0xF) - 16;
-                              */
-                             "andq   $-16,%5\n"
-                             "subq   $16,%5\n"
-
-                             /* Push "arg" and "fn" onto the stack that will be
-                              * used by the child.
-                              */
-                             "movq   %7,8(%5)\n"
-                             "movq   %4,0(%5)\n"
-
-                             /* %rax = syscall(%rax = __NR_clone,
-                              *                %rdi = flags,
-                              *                %rsi = child_stack,
-                              *                %rdx = parent_tidptr,
-                              *                %r8  = new_tls,
-                              *                %r10 = child_tidptr)
-                              */
-                             "movq   %2,%%rax\n"
-                             "movq   %9,%%r8\n"
-                             "movq   %10,%%r10\n"
-                             "syscall\n"
-
-                             /* if (%rax != 0)
-                              *   return;
-                              */
-                             "testq  %%rax,%%rax\n"
-                             "jnz    1f\n"
-
-                             /* In the child. Terminate frame pointer chain.
-                              */
-                             "xorq   %%rbp,%%rbp\n"
-
-                             /* Call "fn(arg)".
-                              */
-                             "popq   %%rax\n"
-                             "popq   %%rdi\n"
-                             "call   *%%rax\n"
-
-                             /* Call _exit(%ebx).
-                              */
-                             "movq   %%rax,%%rdi\n"
-                             "movq   %3,%%rax\n"
-                             "syscall\n"
-
-                             /* Return to parent.
-                              */
-                           "1:\n"
-                             : "=a" (__res)
-                             : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
-                               "r"(LSS_SYSCALL_ARG(fn)),
-                               "S"(LSS_SYSCALL_ARG(child_stack)),
-                               "D"(LSS_SYSCALL_ARG(flags)),
-                               "r"(LSS_SYSCALL_ARG(arg)),
-                               "d"(LSS_SYSCALL_ARG(parent_tidptr)),
-                               "r"(LSS_SYSCALL_ARG(newtls)),
-                               "r"(LSS_SYSCALL_ARG(child_tidptr))
-                             : "rsp", "memory", "r8", "r10", "r11", "rcx");
-      }
-      LSS_RETURN(int, __res);
-    }
-
-    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
-      /* On x86-64, the kernel does not know how to return from
-       * a signal handler. Instead, it relies on user space to provide a
-       * restorer function that calls the rt_sigreturn() system call.
-       * Unfortunately, we cannot just reference the glibc version of this
-       * function, as glibc goes out of its way to make it inaccessible.
-       */
-      long long res;
-      __asm__ __volatile__("call   2f\n"
-                         "0:.align 16\n"
-                         "1:movq   %1,%%rax\n"
-                           "syscall\n"
-                         "2:popq   %0\n"
-                           "addq   $(1b-0b),%0\n"
-                           : "=a" (res)
-                           : "i"  (__NR_rt_sigreturn));
-      return (void (*)(void))(uintptr_t)res;
-    }
-  #elif defined(__arm__)
-    /* Most definitions of _syscallX() neglect to mark "memory" as being
-     * clobbered. This causes problems with compilers, that do a better job
-     * at optimizing across __asm__ calls.
-     * So, we just have to redefine all fo the _syscallX() macros.
-     */
-    #undef LSS_REG
-    #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
-
-    /* r0..r3 are scratch registers and not preserved across function
-     * calls.  We need to first evaluate the first 4 syscall arguments
-     * and store them on stack.  They must be loaded into r0..r3 after
-     * all function calls to avoid r0..r3 being clobbered.
-     */
-    #undef LSS_SAVE_ARG
-    #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a
-    #undef LSS_LOAD_ARG
-    #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r
-
-    #undef  LSS_BODY
-    #define LSS_BODY(type, name, args...)                                     \
-          register long __res_r0 __asm__("r0");                               \
-          long __res;                                                         \
-          __SYS_REG(name)                                                     \
-          __asm__ __volatile__ (__syscall_safe(name)                          \
-                                : "=r"(__res_r0)                              \
-                                : __SYS_REG_LIST(args)                        \
-                                : "lr", "memory");                            \
-          __res = __res_r0;                                                   \
-          LSS_RETURN(type, __res)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-      type LSS_NAME(name)() {                                                 \
-        LSS_BODY(type, name);                                                 \
-      }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        /* There is no need for using a volatile temp.  */                    \
-        LSS_REG(0, arg1);                                                     \
-        LSS_BODY(type, name, "r"(__r0));                                      \
-      }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
-      }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
-      }
-    #undef _syscall4
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4)                                            \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
-      }
-    #undef _syscall5
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4, type5, arg5)                               \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_REG(4, arg5);                                                     \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
-                             "r"(__r4));                                      \
-      }
-    #undef _syscall6
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4, type5, arg5, type6, arg6)                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_REG(4, arg5);                                                     \
-        LSS_REG(5, arg6);                                                     \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
-                             "r"(__r4), "r"(__r5));                           \
-      }
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      register long __res __asm__("r5");
-      {
-        if (fn == NULL || child_stack == NULL) {
-            __res = -EINVAL;
-            goto clone_exit;
-        }
-
-        /* stash first 4 arguments on stack first because we can only load
-         * them after all function calls.
-         */
-        int    tmp_flags = flags;
-        int  * tmp_stack = (int*) child_stack;
-        void * tmp_ptid  = parent_tidptr;
-        void * tmp_tls   = newtls;
-
-        register int  *__ctid  __asm__("r4") = child_tidptr;
-
-        /* Push "arg" and "fn" onto the stack that will be
-         * used by the child.
-         */
-        *(--tmp_stack) = (int) arg;
-        *(--tmp_stack) = (int) fn;
-
-        /* We must load r0..r3 last after all possible function calls.  */
-        register int   __flags __asm__("r0") = tmp_flags;
-        register void *__stack __asm__("r1") = tmp_stack;
-        register void *__ptid  __asm__("r2") = tmp_ptid;
-        register void *__tls   __asm__("r3") = tmp_tls;
-
-        /* %r0 = syscall(%r0 = flags,
-         *               %r1 = child_stack,
-         *               %r2 = parent_tidptr,
-         *               %r3 = newtls,
-         *               %r4 = child_tidptr)
-         */
-        __SYS_REG(clone)
-        __asm__ __volatile__(/* %r0 = syscall(%r0 = flags,
-                              *               %r1 = child_stack,
-                              *               %r2 = parent_tidptr,
-                              *               %r3 = newtls,
-                              *               %r4 = child_tidptr)
-                              */
-                             "push  {r7}\n"
-                             "mov   r7,%1\n"
-                             __syscall(clone)"\n"
-
-                             /* if (%r0 != 0)
-                              *   return %r0;
-                              */
-                             "movs  %0,r0\n"
-                             "bne   1f\n"
-
-                             /* In the child, now. Call "fn(arg)".
-                              */
-                             "ldr   r0,[sp, #4]\n"
-                             "mov   lr,pc\n"
-                             "ldr   pc,[sp]\n"
-
-                             /* Call _exit(%r0), which never returns.  We only
-                              * need to set r7 for EABI syscall ABI but we do
-                              * this always to simplify code sharing between
-                              * old and new syscall ABIs.
-                              */
-                             "mov   r7,%2\n"
-                             __syscall(exit)"\n"
-
-                             /* Pop r7 from the stack only in the parent.
-                              */
-                           "1: pop {r7}\n"
-                             : "=r" (__res)
-                             : "r"(__sysreg),
-                               "i"(__NR_exit), "r"(__stack), "r"(__flags),
-                               "r"(__ptid), "r"(__tls), "r"(__ctid)
-                             : "cc", "lr", "memory");
-      }
-      clone_exit:
-      LSS_RETURN(int, __res);
-    }
-  #elif defined(__mips__)
-    #undef LSS_REG
-    #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \
-                                 (unsigned long)(a)
-
-    #if _MIPS_SIM == _MIPS_SIM_ABI32
-    // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html
-    // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html
-    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\
-                                "$13", "$14", "$15", "$24", "$25", "memory"
-    #else
-    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13",     \
-                                "$14", "$15", "$24", "$25", "memory"
-    #endif
-
-    #undef  LSS_BODY
-    #define LSS_BODY(type,name,r7,...)                                        \
-          register unsigned long __v0 __asm__("$2") = __NR_##name;            \
-          __asm__ __volatile__ ("syscall\n"                                   \
-                                : "=&r"(__v0), r7 (__r7)                      \
-                                : "0"(__v0), ##__VA_ARGS__                    \
-                                : MIPS_SYSCALL_CLOBBERS);                     \
-          LSS_RETURN(type, __v0, __r7)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-      type LSS_NAME(name)() {                                                 \
-        register unsigned long __r7 __asm__("$7");                            \
-        LSS_BODY(type, name, "=r");                                           \
-      }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        register unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4));              \
-      }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        register unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_REG(5, arg2);                                   \
-        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5));                     \
-      }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        register unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6));          \
-      }
-    #undef _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4);                                                     \
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6));          \
-      }
-    #undef _syscall5
-    #if _MIPS_SIM == _MIPS_SIM_ABI32
-    /* The old 32bit MIPS system call API passes the fifth and sixth argument
-     * on the stack, whereas the new APIs use registers "r8" and "r9".
-     */
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4);                                                     \
-        register unsigned long __v0 __asm__("$2");                            \
-        __asm__ __volatile__ (".set noreorder\n"                              \
-                              "lw    $2, %6\n"                                \
-                              "subu  $29, 32\n"                               \
-                              "sw    $2, 16($29)\n"                           \
-                              "li    $2, %2\n"                                \
-                              "syscall\n"                                     \
-                              "addiu $29, 32\n"                               \
-                              ".set reorder\n"                                \
-                              : "=&r"(__v0), "+r" (__r7)                      \
-                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
-                                "r"(__r6), "m" ((unsigned long)arg5)          \
-                              : MIPS_SYSCALL_CLOBBERS);                       \
-        LSS_RETURN(type, __v0, __r7);                                         \
-      }
-    #else
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4); LSS_REG(8, arg5);                                   \
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
-                 "r"(__r8));                                                  \
-      }
-    #endif
-    #undef _syscall6
-    #if _MIPS_SIM == _MIPS_SIM_ABI32
-    /* The old 32bit MIPS system call API passes the fifth and sixth argument
-     * on the stack, whereas the new APIs use registers "r8" and "r9".
-     */
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4);                                                     \
-        register unsigned long __v0 __asm__("$2");                            \
-        __asm__ __volatile__ (".set noreorder\n"                              \
-                              "lw    $2, %6\n"                                \
-                              "lw    $8, %7\n"                                \
-                              "subu  $29, 32\n"                               \
-                              "sw    $2, 16($29)\n"                           \
-                              "sw    $8, 20($29)\n"                           \
-                              "li    $2, %2\n"                                \
-                              "syscall\n"                                     \
-                              "addiu $29, 32\n"                               \
-                              ".set reorder\n"                                \
-                              : "=&r"(__v0), "+r" (__r7)                      \
-                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
-                                "r"(__r6), "m" ((unsigned long)arg5),         \
-                                "m" ((unsigned long)arg6)                     \
-                              : MIPS_SYSCALL_CLOBBERS);                       \
-        LSS_RETURN(type, __v0, __r7);                                         \
-      }
-    #else
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5,type6 arg6) {                            \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6);                 \
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
-                 "r"(__r8), "r"(__r9));                                       \
-      }
-    #endif
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      register unsigned long __v0 __asm__("$2");
-      register unsigned long __r7 __asm__("$7") = (unsigned long)newtls;
-      {
-        register int   __flags __asm__("$4") = flags;
-        register void *__stack __asm__("$5") = child_stack;
-        register void *__ptid  __asm__("$6") = parent_tidptr;
-        register int  *__ctid  __asm__("$8") = child_tidptr;
-        __asm__ __volatile__(
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                             "subu  $29,24\n"
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                             "sub   $29,16\n"
-          #else
-                             "dsubu $29,16\n"
-          #endif
-
-                             /* if (fn == NULL || child_stack == NULL)
-                              *   return -EINVAL;
-                              */
-                             "li    %0,%2\n"
-                             "beqz  %5,1f\n"
-                             "beqz  %6,1f\n"
-
-                             /* Push "arg" and "fn" onto the stack that will be
-                              * used by the child.
-                              */
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                             "subu  %6,32\n"
-                             "sw    %5,0(%6)\n"
-                             "sw    %8,4(%6)\n"
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                             "sub   %6,32\n"
-                             "sw    %5,0(%6)\n"
-                             "sw    %8,8(%6)\n"
-          #else
-                             "dsubu %6,32\n"
-                             "sd    %5,0(%6)\n"
-                             "sd    %8,8(%6)\n"
-          #endif
-
-                             /* $7 = syscall($4 = flags,
-                              *              $5 = child_stack,
-                              *              $6 = parent_tidptr,
-                              *              $7 = newtls,
-                              *              $8 = child_tidptr)
-                              */
-                             "li    $2,%3\n"
-                             "syscall\n"
-
-                             /* if ($7 != 0)
-                              *   return $2;
-                              */
-                             "bnez  $7,1f\n"
-                             "bnez  $2,1f\n"
-
-                             /* In the child, now. Call "fn(arg)".
-                              */
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                            "lw    $25,0($29)\n"
-                            "lw    $4,4($29)\n"
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                            "lw    $25,0($29)\n"
-                            "lw    $4,8($29)\n"
-          #else
-                            "ld    $25,0($29)\n"
-                            "ld    $4,8($29)\n"
-          #endif
-                            "jalr  $25\n"
-
-                             /* Call _exit($2)
-                              */
-                            "move  $4,$2\n"
-                            "li    $2,%4\n"
-                            "syscall\n"
-
-                           "1:\n"
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                             "addu  $29, 24\n"
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                             "add   $29, 16\n"
-          #else
-                             "daddu $29,16\n"
-          #endif
-                             : "=&r" (__v0), "=r" (__r7)
-                             : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
-                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
-                               "r"(__ptid), "r"(__r7), "r"(__ctid)
-                             : "$9", "$10", "$11", "$12", "$13", "$14", "$15",
-                               "$24", "memory");
-      }
-      LSS_RETURN(int, __v0, __r7);
-    }
-  #elif defined (__PPC__)
-    #undef  LSS_LOADARGS_0
-    #define LSS_LOADARGS_0(name, dummy...)                                    \
-        __sc_0 = __NR_##name
-    #undef  LSS_LOADARGS_1
-    #define LSS_LOADARGS_1(name, arg1)                                        \
-            LSS_LOADARGS_0(name);                                             \
-            __sc_3 = (unsigned long) (arg1)
-    #undef  LSS_LOADARGS_2
-    #define LSS_LOADARGS_2(name, arg1, arg2)                                  \
-            LSS_LOADARGS_1(name, arg1);                                       \
-            __sc_4 = (unsigned long) (arg2)
-    #undef  LSS_LOADARGS_3
-    #define LSS_LOADARGS_3(name, arg1, arg2, arg3)                            \
-            LSS_LOADARGS_2(name, arg1, arg2);                                 \
-            __sc_5 = (unsigned long) (arg3)
-    #undef  LSS_LOADARGS_4
-    #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4)                      \
-            LSS_LOADARGS_3(name, arg1, arg2, arg3);                           \
-            __sc_6 = (unsigned long) (arg4)
-    #undef  LSS_LOADARGS_5
-    #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5)                \
-            LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4);                     \
-            __sc_7 = (unsigned long) (arg5)
-    #undef  LSS_LOADARGS_6
-    #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6)          \
-            LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5);               \
-            __sc_8 = (unsigned long) (arg6)
-    #undef  LSS_ASMINPUT_0
-    #define LSS_ASMINPUT_0 "0" (__sc_0)
-    #undef  LSS_ASMINPUT_1
-    #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3)
-    #undef  LSS_ASMINPUT_2
-    #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4)
-    #undef  LSS_ASMINPUT_3
-    #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5)
-    #undef  LSS_ASMINPUT_4
-    #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6)
-    #undef  LSS_ASMINPUT_5
-    #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7)
-    #undef  LSS_ASMINPUT_6
-    #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8)
-    #undef  LSS_BODY
-    #define LSS_BODY(nr, type, name, args...)                                 \
-        long __sc_ret, __sc_err;                                              \
-        {                                                                     \
-            register unsigned long __sc_0 __asm__ ("r0");                     \
-            register unsigned long __sc_3 __asm__ ("r3");                     \
-            register unsigned long __sc_4 __asm__ ("r4");                     \
-            register unsigned long __sc_5 __asm__ ("r5");                     \
-            register unsigned long __sc_6 __asm__ ("r6");                     \
-            register unsigned long __sc_7 __asm__ ("r7");                     \
-            register unsigned long __sc_8 __asm__ ("r8");                     \
-                                                                              \
-            LSS_LOADARGS_##nr(name, args);                                    \
-            __asm__ __volatile__                                              \
-                ("sc\n\t"                                                     \
-                 "mfcr %0"                                                    \
-                 : "=&r" (__sc_0),                                            \
-                   "=&r" (__sc_3), "=&r" (__sc_4),                            \
-                   "=&r" (__sc_5), "=&r" (__sc_6),                            \
-                   "=&r" (__sc_7), "=&r" (__sc_8)                             \
-                 : LSS_ASMINPUT_##nr                                          \
-                 : "cr0", "ctr", "memory",                                    \
-                   "r9", "r10", "r11", "r12");                                \
-            __sc_ret = __sc_3;                                                \
-            __sc_err = __sc_0;                                                \
-        }                                                                     \
-        LSS_RETURN(type, __sc_ret, __sc_err)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-       type LSS_NAME(name)(void) {                                            \
-          LSS_BODY(0, type, name);                                            \
-       }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-       type LSS_NAME(name)(type1 arg1) {                                      \
-          LSS_BODY(1, type, name, arg1);                                      \
-       }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \
-          LSS_BODY(2, type, name, arg1, arg2);                                \
-       }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \
-          LSS_BODY(3, type, name, arg1, arg2, arg3);                          \
-       }
-    #undef _syscall4
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4)                                \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {  \
-          LSS_BODY(4, type, name, arg1, arg2, arg3, arg4);                    \
-       }
-    #undef _syscall5
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4, type5, arg5)                   \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
-                                               type5 arg5) {                  \
-          LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5);              \
-       }
-    #undef _syscall6
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4, type5, arg5, type6, arg6)      \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
-                                               type5 arg5, type6 arg6) {      \
-          LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6);        \
-       }
-    /* clone function adapted from glibc 2.18 clone.S                       */
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long __ret, __err;
-      {
-#if defined(__PPC64__)
-
-/* Stack frame offsets.  */
-#if _CALL_ELF != 2
-#define FRAME_MIN_SIZE         112
-#define FRAME_TOC_SAVE         40
-#else
-#define FRAME_MIN_SIZE         32
-#define FRAME_TOC_SAVE         24
-#endif
-
-
-        register int (*__fn)(void *) __asm__ ("r3") = fn;
-        register void *__cstack      __asm__ ("r4") = child_stack;
-        register int __flags         __asm__ ("r5") = flags;
-        register void * __arg        __asm__ ("r6") = arg;
-        register int * __ptidptr     __asm__ ("r7") = parent_tidptr;
-        register void * __newtls     __asm__ ("r8") = newtls;
-        register int * __ctidptr     __asm__ ("r9") = child_tidptr;
-        __asm__ __volatile__(
-            /* check for fn == NULL
-             * and child_stack == NULL
-             */
-            "cmpdi cr0, %6, 0\n\t"
-            "cmpdi cr1, %7, 0\n\t"
-            "cror  cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
-            "beq-  cr0, 1f\n\t"
-
-            /* set up stack frame for child                                  */
-            "clrrdi %7, %7, 4\n\t"
-            "li     0, 0\n\t"
-            "stdu   0, -%13(%7)\n\t"
-
-            /* fn, arg, child_stack are saved acrVoss the syscall             */
-            "mr 28, %6\n\t"
-            "mr 29, %7\n\t"
-            "mr 27, %9\n\t"
-
-            /* syscall
-               r3 == flags
-               r4 == child_stack
-               r5 == parent_tidptr
-               r6 == newtls
-               r7 == child_tidptr                                            */
-            "mr 3, %8\n\t"
-            "mr 5, %10\n\t"
-            "mr 6, %11\n\t"
-            "mr 7, %12\n\t"
-	    "li	0, %4\n\t"
-            "sc\n\t"
-
-            /* Test if syscall was successful                                */
-            "cmpdi  cr1, 3, 0\n\t"
-            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
-            "bne-   cr1, 1f\n\t"
-
-            /* Do the function call                                          */
-            "std   2, %14(1)\n\t"
-#if _CALL_ELF != 2
-	    "ld    0, 0(28)\n\t"
-	    "ld    2, 8(28)\n\t"
-            "mtctr 0\n\t"
-#else
-            "mr    12, 28\n\t"
-            "mtctr 12\n\t"
-#endif
-            "mr    3, 27\n\t"
-            "bctrl\n\t"
-	    "ld    2, %14(1)\n\t"
-
-            /* Call _exit(r3)                                                */
-            "li 0, %5\n\t"
-            "sc\n\t"
-
-            /* Return to parent                                              */
-	    "1:\n\t"
-            "mr %0, 3\n\t"
-              : "=r" (__ret), "=r" (__err)
-              : "0" (-1), "i" (EINVAL),
-                "i" (__NR_clone), "i" (__NR_exit),
-                "r" (__fn), "r" (__cstack), "r" (__flags),
-                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
-                "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE)
-              : "cr0", "cr1", "memory", "ctr",
-                "r0", "r29", "r27", "r28");
-#else
-        register int (*__fn)(void *)    __asm__ ("r8")  = fn;
-        register void *__cstack                 __asm__ ("r4")  = child_stack;
-        register int __flags                    __asm__ ("r3")  = flags;
-        register void * __arg                   __asm__ ("r9")  = arg;
-        register int * __ptidptr                __asm__ ("r5")  = parent_tidptr;
-        register void * __newtls                __asm__ ("r6")  = newtls;
-        register int * __ctidptr                __asm__ ("r7")  = child_tidptr;
-        __asm__ __volatile__(
-            /* check for fn == NULL
-             * and child_stack == NULL
-             */
-            "cmpwi cr0, %6, 0\n\t"
-            "cmpwi cr1, %7, 0\n\t"
-            "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
-            "beq- cr0, 1f\n\t"
-
-            /* set up stack frame for child                                  */
-            "clrrwi %7, %7, 4\n\t"
-            "li 0, 0\n\t"
-            "stwu 0, -16(%7)\n\t"
-
-            /* fn, arg, child_stack are saved across the syscall: r28-30     */
-            "mr 28, %6\n\t"
-            "mr 29, %7\n\t"
-            "mr 27, %9\n\t"
-
-            /* syscall                                                       */
-            "li 0, %4\n\t"
-            /* flags already in r3
-             * child_stack already in r4
-             * ptidptr already in r5
-             * newtls already in r6
-             * ctidptr already in r7
-             */
-            "sc\n\t"
-
-            /* Test if syscall was successful                                */
-            "cmpwi cr1, 3, 0\n\t"
-            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
-            "bne- cr1, 1f\n\t"
-
-            /* Do the function call                                          */
-            "mtctr 28\n\t"
-            "mr 3, 27\n\t"
-            "bctrl\n\t"
-
-            /* Call _exit(r3)                                                */
-            "li 0, %5\n\t"
-            "sc\n\t"
-
-            /* Return to parent                                              */
-            "1:\n"
-            "mfcr %1\n\t"
-            "mr %0, 3\n\t"
-              : "=r" (__ret), "=r" (__err)
-              : "0" (-1), "1" (EINVAL),
-                "i" (__NR_clone), "i" (__NR_exit),
-                "r" (__fn), "r" (__cstack), "r" (__flags),
-                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
-                "r" (__ctidptr)
-              : "cr0", "cr1", "memory", "ctr",
-                "r0", "r29", "r27", "r28");
-
-#endif
-      }
-      LSS_RETURN(int, __ret, __err);
-    }
-  #elif defined(__aarch64__)
-    #undef LSS_REG
-    #define LSS_REG(r,a) register long __x##r __asm__("x"#r) = (long)a
-    #undef  LSS_BODY
-    #define LSS_BODY(type,name,args...)                                       \
-          register long __res_x0 __asm__("x0");                               \
-          long __res;                                                         \
-          __asm__ __volatile__ ("mov x8, %1\n"                                \
-                                "svc 0x0\n"                                   \
-                                : "=r"(__res_x0)                              \
-                                : "i"(__NR_##name) , ## args                  \
-                                : "memory");                                  \
-          __res = __res_x0;                                                   \
-          LSS_RETURN(type, __res)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-      type LSS_NAME(name)(void) {                                             \
-        LSS_BODY(type, name);                                                 \
-      }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__x0));                    \
-      }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1));                           \
-      }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2));                \
-      }
-    #undef _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4);                                                     \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3));     \
-      }
-    #undef _syscall5
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3),      \
-                             "r"(__x4));                                      \
-      }
-    #undef _syscall6
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "x"(__x2), "r"(__x3),      \
-                             "r"(__x4), "r"(__x5));                           \
-      }
-    /* clone function adapted from glibc 2.18 clone.S                       */
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long __res;
-      {
-        register int (*__fn)(void *)  __asm__("x0") = fn;
-        register void *__stack __asm__("x1") = child_stack;
-        register int   __flags __asm__("x2") = flags;
-        register void *__arg   __asm__("x3") = arg;
-        register int  *__ptid  __asm__("x4") = parent_tidptr;
-        register void *__tls   __asm__("x5") = newtls;
-        register int  *__ctid  __asm__("x6") = child_tidptr;
-        __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL)
-                              *   return -EINVAL;
-                              */
-                             "cbz     x0,1f\n"
-                             "cbz     x1,1f\n"
-
-                             /* Push "arg" and "fn" onto the stack that will be
-                              * used by the child.
-                              */
-                             "stp x0,x3, [x1, #-16]!\n"
-
-                             "mov x0,x2\n" /* flags  */
-                             "mov x2,x4\n" /* ptid  */
-                             "mov x3,x5\n" /* tls */
-                             "mov x4,x6\n" /* ctid */
-                             "mov x8,%9\n" /* clone */
-
-                             "svc 0x0\n"
-
-                             /* if (%r0 != 0)
-                              *   return %r0;
-                              */
-                             "cmp x0, #0\n"
-                             "bne 2f\n"
-
-                             /* In the child, now. Call "fn(arg)".
-                              */
-                             "ldp x1, x0, [sp], #16\n"
-                             "blr x1\n"
-
-                             /* Call _exit(%r0).
-                              */
-                             "mov x8, %10\n"
-                             "svc 0x0\n"
-                           "1:\n"
-                             "mov x8, %1\n"
-                           "2:\n"
-                             : "=r" (__res)
-                             : "i"(-EINVAL),
-                               "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg),
-                               "r"(__ptid), "r"(__tls), "r"(__ctid),
-                               "i"(__NR_clone), "i"(__NR_exit)
-                             : "x30", "memory");
-      }
-      LSS_RETURN(int, __res);
-    }
-  #elif defined(__s390__)
-    #undef  LSS_REG
-    #define LSS_REG(r, a) register unsigned long __r##r __asm__("r"#r) = (unsigned long) a
-    #undef  LSS_BODY
-    #define LSS_BODY(type, name, args...)                                     \
-        register unsigned long __nr __asm__("r1")                             \
-            = (unsigned long)(__NR_##name);                                   \
-        register long __res_r2 __asm__("r2");                                 \
-        long __res;                                                           \
-        __asm__ __volatile__                                                  \
-            ("svc 0\n\t"                                                      \
-             : "=d"(__res_r2)                                                 \
-             : "d"(__nr), ## args                                             \
-             : "memory");                                                     \
-        __res = __res_r2;                                                     \
-        LSS_RETURN(type, __res)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-       type LSS_NAME(name)(void) {                                            \
-          LSS_BODY(type, name);                                               \
-       }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-       type LSS_NAME(name)(type1 arg1) {                                      \
-          LSS_REG(2, arg1);                                                   \
-          LSS_BODY(type, name, "0"(__r2));                                    \
-       }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \
-          LSS_REG(2, arg1); LSS_REG(3, arg2);                                 \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3));                         \
-       }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \
-          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4));              \
-       }
-    #undef _syscall4
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4)                                \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
-                           type4 arg4) {                                      \
-          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
-          LSS_REG(5, arg4);                                                   \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
-                               "d"(__r5));                                    \
-       }
-    #undef _syscall5
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4, type5, arg5)                   \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
-                           type4 arg4, type5 arg5) {                          \
-          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
-          LSS_REG(5, arg4); LSS_REG(6, arg5);                                 \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
-                               "d"(__r5), "d"(__r6));                         \
-       }
-    #undef _syscall6
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4, type5, arg5, type6, arg6)      \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
-                           type4 arg4, type5 arg5, type6 arg6) {              \
-          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
-          LSS_REG(5, arg4); LSS_REG(6, arg5); LSS_REG(7, arg6);               \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
-                               "d"(__r5), "d"(__r6), "d"(__r7));              \
-       }
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long __ret;
-      {
-        register int  (*__fn)(void *)    __asm__ ("r1")  = fn;
-        register void  *__cstack         __asm__ ("r2")  = child_stack;
-        register int    __flags          __asm__ ("r3")  = flags;
-        register void  *__arg            __asm__ ("r0")  = arg;
-        register int   *__ptidptr        __asm__ ("r4")  = parent_tidptr;
-        register void  *__newtls         __asm__ ("r6")  = newtls;
-        register int   *__ctidptr        __asm__ ("r5")  = child_tidptr;
-        __asm__ __volatile__ (
-    #ifndef __s390x__
-                                  /* arg already in r0 */
-          "ltr %4, %4\n\t"        /* check fn, which is already in r1 */
-          "jz 1f\n\t"             /* NULL function pointer, return -EINVAL */
-          "ltr %5, %5\n\t"        /* check child_stack, which is already in r2 */
-          "jz 1f\n\t"             /* NULL stack pointer, return -EINVAL */
-                                  /* flags already in r3 */
-                                  /* parent_tidptr already in r4 */
-                                  /* child_tidptr already in r5 */
-                                  /* newtls already in r6 */
-          "svc %2\n\t"            /* invoke clone syscall */
-          "ltr %0,%%r2\n\t"       /* load return code into __ret and test */
-          "jnz 1f\n\t"            /* return to parent if non-zero */
-                                  /* start child thread */
-          "lr %%r2, %7\n\t"       /* set first parameter to void *arg */
-          "ahi %%r15, -96\n\t"    /* make room on the stack for the save area */
-          "xc 0(4,%%r15), 0(%%r15)\n\t"
-          "basr %%r14, %4\n\t"    /* jump to fn */
-          "svc %3\n"              /* invoke exit syscall */
-          "1:\n"
-    #else
-                                  /* arg already in r0 */
-          "ltgr %4, %4\n\t"       /* check fn, which is already in r1 */
-          "jz 1f\n\t"             /* NULL function pointer, return -EINVAL */
-          "ltgr %5, %5\n\t"       /* check child_stack, which is already in r2 */
-          "jz 1f\n\t"             /* NULL stack pointer, return -EINVAL */
-                                  /* flags already in r3 */
-                                  /* parent_tidptr already in r4 */
-                                  /* child_tidptr already in r5 */
-                                  /* newtls already in r6 */
-          "svc %2\n\t"            /* invoke clone syscall */
-          "ltgr %0, %%r2\n\t"     /* load return code into __ret and test */
-          "jnz 1f\n\t"            /* return to parent if non-zero */
-                                  /* start child thread */
-          "lgr %%r2, %7\n\t"      /* set first parameter to void *arg */
-          "aghi %%r15, -160\n\t"  /* make room on the stack for the save area */
-          "xc 0(8,%%r15), 0(%%r15)\n\t"
-          "basr %%r14, %4\n\t"    /* jump to fn */
-          "svc %3\n"              /* invoke exit syscall */
-          "1:\n"
-    #endif
-          : "=r" (__ret)
-          : "0" (-EINVAL), "i" (__NR_clone), "i" (__NR_exit),
-            "d" (__fn), "d" (__cstack), "d" (__flags), "d" (__arg),
-            "d" (__ptidptr), "d" (__newtls), "d" (__ctidptr)
-          : "cc", "r14", "memory"
-        );
-      }
-      LSS_RETURN(int, __ret);
-    }
-  #endif
-  #define __NR__exit   __NR_exit
-  #define __NR__gettid __NR_gettid
-  #define __NR__mremap __NR_mremap
-  LSS_INLINE _syscall1(int,     close,           int,         f)
-  LSS_INLINE _syscall1(int,     _exit,           int,         e)
-  LSS_INLINE _syscall3(int,     fcntl,           int,         f,
-                       int,            c, long,   a)
-  LSS_INLINE _syscall2(int,     fstat,           int,         f,
-                      struct kernel_stat*,   b)
-  LSS_INLINE _syscall6(int,     futex,           int*,        a,
-                       int,            o, int,    v,
-                      struct kernel_timespec*, t,
-                       int*, a2,
-                       int, v3)
-#ifdef __NR_getdents64
-    LSS_INLINE _syscall3(int,     getdents64,      int,         f,
-                         struct kernel_dirent64*, d, int,    c)
-#define KERNEL_DIRENT kernel_dirent64
-#define GETDENTS sys_getdents64
-#else
-    LSS_INLINE _syscall3(int,     getdents,        int,         f,
-                         struct kernel_dirent*, d, int,    c)
-#define KERNEL_DIRENT kernel_dirent
-#define GETDENTS sys_getdents
-#endif
-  LSS_INLINE _syscall0(pid_t,   getpid)
-  LSS_INLINE _syscall0(pid_t,   getppid)
-  LSS_INLINE _syscall0(pid_t,   _gettid)
-  LSS_INLINE _syscall2(int,     kill,            pid_t,       p,
-                       int,            s)
-  #if defined(__x86_64__)
-    /* Need to make sure off_t isn't truncated to 32-bits under x32.  */
-    LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) {
-      _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o),
-                                        LSS_SYSCALL_ARG(w));
-    }
-  #else
-    LSS_INLINE _syscall3(off_t,   lseek,           int,         f,
-                         off_t,          o, int,    w)
-  #endif
-  LSS_INLINE _syscall2(int,     munmap,          void*,       s,
-                       size_t,         l)
-  LSS_INLINE _syscall5(void*,   _mremap,         void*,       o,
-                       size_t,         os,       size_t,      ns,
-                       unsigned long,  f, void *, a)
-  LSS_INLINE _syscall2(int,     prctl,           int,         o,
-                       long,           a)
-  LSS_INLINE _syscall4(long,    ptrace,          int,         r,
-                       pid_t,          p, void *, a, void *, d)
-  LSS_INLINE _syscall3(ssize_t, read,            int,         f,
-                       void *,         b, size_t, c)
-  LSS_INLINE _syscall4(int,     rt_sigaction,    int,         s,
-                       const struct kernel_sigaction*, a,
-                       struct kernel_sigaction*, o, size_t,   c)
-  LSS_INLINE _syscall4(int, rt_sigprocmask,      int,         h,
-                       const struct kernel_sigset_t*,  s,
-                       struct kernel_sigset_t*,        o, size_t, c);
-  LSS_INLINE _syscall0(int,     sched_yield)
-  LSS_INLINE _syscall2(int,     sigaltstack,     const stack_t*, s,
-                       const stack_t*, o)
-  #if defined(__NR_fstatat)
-    LSS_INLINE _syscall4(int, fstatat, int, d, const char *, p,
-                         struct kernel_stat*,   b, int, flags)
-    LSS_INLINE int LSS_NAME(stat)(const char* p, struct kernel_stat* b) {
-      return LSS_NAME(fstatat)(AT_FDCWD,p,b,0);
-  }
-  #else
-    LSS_INLINE _syscall2(int,     stat,            const char*, f,
-                         struct kernel_stat*,   b)
-  #endif
-  LSS_INLINE _syscall3(ssize_t, write,            int,        f,
-                       const void *,   b, size_t, c)
-  #if defined(__NR_getcpu)
-    LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
-                         unsigned *, node, void *, unused);
-  #endif
-  #if defined(__x86_64__) || defined(__aarch64__) || \
-     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
-    LSS_INLINE _syscall3(int, socket,             int,   d,
-                         int,                     t, int,       p)
-  #endif
-  #if defined(__x86_64__) || defined(__s390x__)
-    LSS_INLINE int LSS_NAME(sigaction)(int signum,
-                                       const struct kernel_sigaction *act,
-                                       struct kernel_sigaction *oldact) {
-      #if defined(__x86_64__)
-      /* On x86_64, the kernel requires us to always set our own
-       * SA_RESTORER in order to be able to return from a signal handler.
-       * This function must have a "magic" signature that the "gdb"
-       * (and maybe the kernel?) can recognize.
-       */
-      if (act != NULL && !(act->sa_flags & SA_RESTORER)) {
-        struct kernel_sigaction a = *act;
-        a.sa_flags   |= SA_RESTORER;
-        a.sa_restorer = LSS_NAME(restore_rt)();
-        return LSS_NAME(rt_sigaction)(signum, &a, oldact,
-                                      (KERNEL_NSIG+7)/8);
-      } else
-      #endif
-        return LSS_NAME(rt_sigaction)(signum, act, oldact,
-                                      (KERNEL_NSIG+7)/8);
-    }
-
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
-                                         const struct kernel_sigset_t *set,
-                                         struct kernel_sigset_t *oldset) {
-      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
-    }
-  #endif
-  #if (defined(__aarch64__)) || \
-      (defined(__mips__) && (_MIPS_ISA == _MIPS_ISA_MIPS64))
-    LSS_INLINE int LSS_NAME(sigaction)(int signum,
-                                       const struct kernel_sigaction *act,
-                                       struct kernel_sigaction *oldact) {
-        return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG+7)/8);
-
-    }
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
-                                         const struct kernel_sigset_t *set,
-                                         struct kernel_sigset_t *oldset) {
-      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
-    }
-  #endif
-  #ifdef __NR_wait4
-    LSS_INLINE _syscall4(pid_t, wait4,            pid_t, p,
-                         int*,                    s, int,       o,
-                         struct kernel_rusage*,   r)
-    LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
-      return LSS_NAME(wait4)(pid, status, options, 0);
-    }
-  #else
-    LSS_INLINE _syscall3(pid_t, waitpid,          pid_t, p,
-                         int*,              s,    int,   o)
-  #endif
-  #ifdef __NR_openat
-    LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
-    LSS_INLINE int LSS_NAME(open)(const char* p, int f, int m) {
-      return LSS_NAME(openat)(AT_FDCWD,p,f,m );
-    }
-  #else
-  LSS_INLINE _syscall3(int,     open,            const char*, p,
-                       int,            f, int,    m)
-  #endif
-  LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
-    memset(&set->sig, 0, sizeof(set->sig));
-    return 0;
-  }
-
-  LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
-    memset(&set->sig, -1, sizeof(set->sig));
-    return 0;
-  }
-
-  LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
-                                     int signum) {
-    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
-      LSS_ERRNO = EINVAL;
-      return -1;
-    } else {
-      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
-          |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0])));
-      return 0;
-    }
-  }
-
-  LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
-                                        int signum) {
-    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
-      LSS_ERRNO = EINVAL;
-      return -1;
-    } else {
-      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
-          &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0]))));
-      return 0;
-    }
-  }
-
-  #if defined(__i386__) ||                                                    \
-      defined(__arm__) ||                                                     \
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) ||                   \
-      defined(__PPC__) ||                                                     \
-     (defined(__s390__) && !defined(__s390x__))
-    #define __NR__sigaction   __NR_sigaction
-    #define __NR__sigprocmask __NR_sigprocmask
-    LSS_INLINE _syscall2(int, fstat64,             int, f,
-                         struct kernel_stat64 *, b)
-    LSS_INLINE _syscall5(int, _llseek,     uint, fd, ulong, hi, ulong, lo,
-                         loff_t *, res, uint, wh)
-#if defined(__s390__) && !defined(__s390x__)
-    /* On s390, mmap2() arguments are passed in memory. */
-    LSS_INLINE void* LSS_NAME(_mmap2)(void *s, size_t l, int p, int f, int d,
-                                      off_t o) {
-      unsigned long buf[6] = { (unsigned long) s, (unsigned long) l,
-                               (unsigned long) p, (unsigned long) f,
-                               (unsigned long) d, (unsigned long) o };
-      LSS_REG(2, buf);
-      LSS_BODY(void*, mmap2, "0"(__r2));
-    }
-#elif !defined(__PPC64__)
-    #define __NR__mmap2 __NR_mmap2
-    LSS_INLINE _syscall6(void*, _mmap2,            void*, s,
-                         size_t,                   l, int,               p,
-                         int,                      f, int,               d,
-                         off_t,                    o)
-#endif
-    LSS_INLINE _syscall3(int,   _sigaction,        int,   s,
-                         const struct kernel_old_sigaction*,  a,
-                         struct kernel_old_sigaction*,        o)
-    LSS_INLINE _syscall3(int,   _sigprocmask,      int,   h,
-                         const unsigned long*,     s,
-                         unsigned long*,           o)
-    LSS_INLINE _syscall2(int, stat64,              const char *, p,
-                         struct kernel_stat64 *, b)
-
-    LSS_INLINE int LSS_NAME(sigaction)(int signum,
-                                       const struct kernel_sigaction *act,
-                                       struct kernel_sigaction *oldact) {
-      int old_errno = LSS_ERRNO;
-      int rc;
-      struct kernel_sigaction a;
-      if (act != NULL) {
-        a             = *act;
-        #ifdef __i386__
-        /* On i386, the kernel requires us to always set our own
-         * SA_RESTORER when using realtime signals. Otherwise, it does not
-         * know how to return from a signal handler. This function must have
-         * a "magic" signature that the "gdb" (and maybe the kernel?) can
-         * recognize.
-         * Apparently, a SA_RESTORER is implicitly set by the kernel, when
-         * using non-realtime signals.
-         *
-         * TODO: Test whether ARM needs a restorer
-         */
-        if (!(a.sa_flags & SA_RESTORER)) {
-          a.sa_flags   |= SA_RESTORER;
-          a.sa_restorer = (a.sa_flags & SA_SIGINFO)
-                          ? LSS_NAME(restore_rt)() : LSS_NAME(restore)();
-        }
-        #endif
-      }
-      rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact,
-                                  (KERNEL_NSIG+7)/8);
-      if (rc < 0 && LSS_ERRNO == ENOSYS) {
-        struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa;
-        if (!act) {
-          ptr_a            = NULL;
-        } else {
-          oa.sa_handler_   = act->sa_handler_;
-          memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask));
-          #ifndef __mips__
-          oa.sa_restorer   = act->sa_restorer;
-          #endif
-          oa.sa_flags      = act->sa_flags;
-        }
-        if (!oldact) {
-          ptr_oa           = NULL;
-        }
-        LSS_ERRNO = old_errno;
-        rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa);
-        if (rc == 0 && oldact) {
-          if (act) {
-            memcpy(oldact, act, sizeof(*act));
-          } else {
-            memset(oldact, 0, sizeof(*oldact));
-          }
-          oldact->sa_handler_    = ptr_oa->sa_handler_;
-          oldact->sa_flags       = ptr_oa->sa_flags;
-          memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask));
-          #ifndef __mips__
-          oldact->sa_restorer    = ptr_oa->sa_restorer;
-          #endif
-        }
-      }
-      return rc;
-    }
-
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
-                                         const struct kernel_sigset_t *set,
-                                         struct kernel_sigset_t *oldset) {
-      int olderrno = LSS_ERRNO;
-      int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
-      if (rc < 0 && LSS_ERRNO == ENOSYS) {
-        LSS_ERRNO = olderrno;
-        if (oldset) {
-          LSS_NAME(sigemptyset)(oldset);
-        }
-        rc = LSS_NAME(_sigprocmask)(how,
-                                    set ? &set->sig[0] : NULL,
-                                    oldset ? &oldset->sig[0] : NULL);
-      }
-      return rc;
-    }
-  #endif
-  #if defined(__i386__) ||                                                    \
-      defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) ||                     \
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) ||                   \
-     (defined(__PPC__) && !defined(__PPC64__)) ||                             \
-     (defined(__s390__) && !defined(__s390x__))
-    /* On these architectures, implement mmap() with mmap2(). */
-    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
-                                    int64_t o) {
-      if (o % 4096) {
-        LSS_ERRNO = EINVAL;
-        return (void *) -1;
-      }
-      return LSS_NAME(_mmap2)(s, l, p, f, d, (o / 4096));
-    }
-  #elif defined(__s390x__)
-    /* On s390x, mmap() arguments are passed in memory. */
-    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
-                                    int64_t o) {
-      unsigned long buf[6] = { (unsigned long) s, (unsigned long) l,
-                               (unsigned long) p, (unsigned long) f,
-                               (unsigned long) d, (unsigned long) o };
-      LSS_REG(2, buf);
-      LSS_BODY(void*, mmap, "0"(__r2));
-    }
-  #elif defined(__x86_64__)
-    /* Need to make sure __off64_t isn't truncated to 32-bits under x32.  */
-    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
-                                    int64_t o) {
-      LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l),
-                               LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f),
-                               LSS_SYSCALL_ARG(d), (uint64_t)(o));
-    }
-  #else
-    /* Remaining 64-bit architectures. */
-    LSS_INLINE _syscall6(void*, mmap, void*, addr, size_t, length, int, prot,
-                         int, flags, int, fd, int64_t, offset)
-  #endif
-  #if defined(__i386__) || \
-      defined(__PPC__) || \
-      (defined(__arm__) && !defined(__ARM_EABI__)) || \
-      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \
-      defined(__s390__)
-
-    /* See sys_socketcall in net/socket.c in kernel source.
-     * It de-multiplexes on its first arg and unpacks the arglist
-     * array in its second arg.
-     */
-    LSS_INLINE _syscall2(int, socketcall, int, c, unsigned long*, a)
-
-    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
-      unsigned long args[3] = {
-        (unsigned long) domain,
-        (unsigned long) type,
-        (unsigned long) protocol
-      };
-      return LSS_NAME(socketcall)(1, args);
-    }
-  #elif defined(__ARM_EABI__)
-    LSS_INLINE _syscall3(int, socket,             int,   d,
-                         int,                     t, int,       p)
-  #endif
-  #if defined(__mips__)
-    /* sys_pipe() on MIPS has non-standard calling conventions, as it returns
-     * both file handles through CPU registers.
-     */
-    LSS_INLINE int LSS_NAME(pipe)(int *p) {
-      register unsigned long __v0 __asm__("$2") = __NR_pipe;
-      register unsigned long __v1 __asm__("$3");
-      register unsigned long __r7 __asm__("$7");
-      __asm__ __volatile__ ("syscall\n"
-                            : "=&r"(__v0), "=&r"(__v1), "+r" (__r7)
-                            : "0"(__v0)
-                            : "$8", "$9", "$10", "$11", "$12",
-                              "$13", "$14", "$15", "$24", "memory");
-      if (__r7) {
-        LSS_ERRNO = __v0;
-        return -1;
-      } else {
-        p[0] = __v0;
-        p[1] = __v1;
-        return 0;
-      }
-    }
-  #elif defined(__NR_pipe2)
-    LSS_INLINE _syscall2(int,     pipe2,          int *, p,
-                         int,     f                        )
-    LSS_INLINE int LSS_NAME(pipe)( int * p) {
-        return LSS_NAME(pipe2)(p, 0);
-    }
-  #else
-    LSS_INLINE _syscall1(int,     pipe,           int *, p)
-  #endif
-
-  LSS_INLINE pid_t LSS_NAME(gettid)() {
-    pid_t tid = LSS_NAME(_gettid)();
-    if (tid != -1) {
-      return tid;
-    }
-    return LSS_NAME(getpid)();
-  }
-
-  LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size,
-                                    size_t new_size, int flags, ...) {
-    va_list ap;
-    void *new_address, *rc;
-    va_start(ap, flags);
-    new_address = va_arg(ap, void *);
-    rc = LSS_NAME(_mremap)(old_address, old_size, new_size,
-                           flags, new_address);
-    va_end(ap);
-    return rc;
-  }
-
-  LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) {
-    /* PTRACE_DETACH can sometimes forget to wake up the tracee and it
-     * then sends job control signals to the real parent, rather than to
-     * the tracer. We reduce the risk of this happening by starting a
-     * whole new time slice, and then quickly sending a SIGCONT signal
-     * right after detaching from the tracee.
-     */
-    int rc, err;
-    LSS_NAME(sched_yield)();
-    rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0);
-    err = LSS_ERRNO;
-    LSS_NAME(kill)(pid, SIGCONT);
-    LSS_ERRNO = err;
-    return rc;
-  }
-#endif
-
-#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
-}
-#endif
-
-#endif
-#endif
diff --git a/contrib/libtcmalloc/src/base/linuxthreads.cc b/contrib/libtcmalloc/src/base/linuxthreads.cc
deleted file mode 100644
index 891e70c88c4..00000000000
--- a/contrib/libtcmalloc/src/base/linuxthreads.cc
+++ /dev/null
@@ -1,707 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005-2007, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#include "base/linuxthreads.h"
-
-#ifdef THREADS
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sched.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <sys/prctl.h>
-#include <semaphore.h>
-
-#include "base/linux_syscall_support.h"
-#include "base/thread_lister.h"
-
-#ifndef CLONE_UNTRACED
-#define CLONE_UNTRACED 0x00800000
-#endif
-
-
-/* Synchronous signals that should not be blocked while in the lister thread.
- */
-static const int sync_signals[]  = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS,
-                                     SIGXCPU, SIGXFSZ };
-
-/* itoa() is not a standard function, and we cannot safely call printf()
- * after suspending threads. So, we just implement our own copy. A
- * recursive approach is the easiest here.
- */
-static char *local_itoa(char *buf, int i) {
-  if (i < 0) {
-    *buf++ = '-';
-    return local_itoa(buf, -i);
-  } else {
-    if (i >= 10)
-      buf = local_itoa(buf, i/10);
-    *buf++ = (i%10) + '0';
-    *buf   = '\000';
-    return buf;
-  }
-}
-
-
-/* Wrapper around clone() that runs "fn" on the same stack as the
- * caller! Unlike fork(), the cloned thread shares the same address space.
- * The caller must be careful to use only minimal amounts of stack until
- * the cloned thread has returned.
- * There is a good chance that the cloned thread and the caller will share
- * the same copy of errno!
- */
-#ifdef __GNUC__
-#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3
-/* Try to force this function into a separate stack frame, and make sure
- * that arguments are passed on the stack.
- */
-static int local_clone (int (*fn)(void *), void *arg, ...)
-  __attribute__ ((noinline));
-#endif
-#endif
-
-/* To avoid the gap cross page boundaries, increase by the large parge
- * size mostly PowerPC system uses.  */
-#ifdef __PPC64__
-#define CLONE_STACK_SIZE 65536
-#else
-#define CLONE_STACK_SIZE 4096
-#endif
-
-static int local_clone (int (*fn)(void *), void *arg, ...) {
-  /* Leave 4kB of gap between the callers stack and the new clone. This
-   * should be more than sufficient for the caller to call waitpid() until
-   * the cloned thread terminates.
-   *
-   * It is important that we set the CLONE_UNTRACED flag, because newer
-   * versions of "gdb" otherwise attempt to attach to our thread, and will
-   * attempt to reap its status codes. This subsequently results in the
-   * caller hanging indefinitely in waitpid(), waiting for a change in
-   * status that will never happen. By setting the CLONE_UNTRACED flag, we
-   * prevent "gdb" from stealing events, but we still expect the thread
-   * lister to fail, because it cannot PTRACE_ATTACH to the process that
-   * is being debugged. This is OK and the error code will be reported
-   * correctly.
-   */
-  return sys_clone(fn, (char *)&arg - CLONE_STACK_SIZE,
-                   CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg, 0, 0, 0);
-}
-
-
-/* Local substitute for the atoi() function, which is not necessarily safe
- * to call once threads are suspended (depending on whether libc looks up
- * locale information,  when executing atoi()).
- */
-static int local_atoi(const char *s) {
-  int n   = 0;
-  int neg = *s == '-';
-  if (neg)
-    s++;
-  while (*s >= '0' && *s <= '9')
-    n = 10*n + (*s++ - '0');
-  return neg ? -n : n;
-}
-
-
-/* Re-runs fn until it doesn't cause EINTR
- */
-#define NO_INTR(fn)   do {} while ((fn) < 0 && errno == EINTR)
-
-
-/* Wrap a class around system calls, in order to give us access to
- * a private copy of errno. This only works in C++, but it has the
- * advantage of not needing nested functions, which are a non-standard
- * language extension.
- */
-#ifdef __cplusplus
-namespace {
-  class SysCalls {
-   public:
-    #define SYS_CPLUSPLUS
-    #define SYS_ERRNO     my_errno
-    #define SYS_INLINE    inline
-    #define SYS_PREFIX    -1
-    #undef  SYS_LINUX_SYSCALL_SUPPORT_H
-    #include "linux_syscall_support.h"
-    SysCalls() : my_errno(0) { }
-    int my_errno;
-  };
-}
-#define ERRNO sys.my_errno
-#else
-#define ERRNO my_errno
-#endif
-
-
-/* Wrapper for open() which is guaranteed to never return EINTR.
- */
-static int c_open(const char *fname, int flags, int mode) {
-  ssize_t rc;
-  NO_INTR(rc = sys_open(fname, flags, mode));
-  return rc;
-}
-
-
-/* abort() is not safely reentrant, and changes it's behavior each time
- * it is called. This means, if the main application ever called abort()
- * we cannot safely call it again. This would happen if we were called
- * from a SIGABRT signal handler in the main application. So, document
- * that calling SIGABRT from the thread lister makes it not signal safe
- * (and vice-versa).
- * Also, since we share address space with the main application, we
- * cannot call abort() from the callback and expect the main application
- * to behave correctly afterwards. In fact, the only thing we can do, is
- * to terminate the main application with extreme prejudice (aka
- * PTRACE_KILL).
- * We set up our own SIGABRT handler to do this.
- * In order to find the main application from the signal handler, we
- * need to store information about it in global variables. This is
- * safe, because the main application should be suspended at this
- * time. If the callback ever called TCMalloc_ResumeAllProcessThreads(), then
- * we are running a higher risk, though. So, try to avoid calling
- * abort() after calling TCMalloc_ResumeAllProcessThreads.
- */
-static volatile int *sig_pids, sig_num_threads, sig_proc, sig_marker;
-
-
-/* Signal handler to help us recover from dying while we are attached to
- * other threads.
- */
-static void SignalHandler(int signum, siginfo_t *si, void *data) {
-  if (sig_pids != NULL) {
-    if (signum == SIGABRT) {
-      while (sig_num_threads-- > 0) {
-        /* Not sure if sched_yield is really necessary here, but it does not */
-        /* hurt, and it might be necessary for the same reasons that we have */
-        /* to do so in sys_ptrace_detach().                                  */
-        sys_sched_yield();
-        sys_ptrace(PTRACE_KILL, sig_pids[sig_num_threads], 0, 0);
-      }
-    } else if (sig_num_threads > 0) {
-      TCMalloc_ResumeAllProcessThreads(sig_num_threads, (int *)sig_pids);
-    }
-  }
-  sig_pids = NULL;
-  if (sig_marker >= 0)
-    NO_INTR(sys_close(sig_marker));
-  sig_marker = -1;
-  if (sig_proc >= 0)
-    NO_INTR(sys_close(sig_proc));
-  sig_proc = -1;
-
-  sys__exit(signum == SIGABRT ? 1 : 2);
-}
-
-
-/* Try to dirty the stack, and hope that the compiler is not smart enough
- * to optimize this function away. Or worse, the compiler could inline the
- * function and permanently allocate the data on the stack.
- */
-static void DirtyStack(size_t amount) {
-  char buf[amount];
-  memset(buf, 0, amount);
-  sys_read(-1, buf, amount);
-}
-
-
-/* Data structure for passing arguments to the lister thread.
- */
-#define ALT_STACKSIZE (MINSIGSTKSZ + 4096)
-
-struct ListerParams {
-  int         result, err;
-  char        *altstack_mem;
-  ListAllProcessThreadsCallBack callback;
-  void        *parameter;
-  va_list     ap;
-  sem_t       *lock;
-};
-
-
-static void ListerThread(struct ListerParams *args) {
-  int                found_parent = 0;
-  pid_t              clone_pid  = sys_gettid(), ppid = sys_getppid();
-  char               proc_self_task[80], marker_name[48], *marker_path;
-  const char         *proc_paths[3];
-  const char *const  *proc_path = proc_paths;
-  int                proc = -1, marker = -1, num_threads = 0;
-  int                max_threads = 0, sig;
-  struct kernel_stat marker_sb, proc_sb;
-  stack_t            altstack;
-
-  /* Wait for parent thread to set appropriate permissions
-   * to allow ptrace activity
-   */
-  if (sem_wait(args->lock) < 0) {
-    goto failure;
-  }
-
-  /* Create "marker" that we can use to detect threads sharing the same
-   * address space and the same file handles. By setting the FD_CLOEXEC flag
-   * we minimize the risk of misidentifying child processes as threads;
-   * and since there is still a race condition,  we will filter those out
-   * later, anyway.
-   */
-  if ((marker = sys_socket(PF_LOCAL, SOCK_DGRAM, 0)) < 0 ||
-      sys_fcntl(marker, F_SETFD, FD_CLOEXEC) < 0) {
-  failure:
-    args->result = -1;
-    args->err    = errno;
-    if (marker >= 0)
-      NO_INTR(sys_close(marker));
-    sig_marker = marker = -1;
-    if (proc >= 0)
-      NO_INTR(sys_close(proc));
-    sig_proc = proc = -1;
-    sys__exit(1);
-  }
-
-  /* Compute search paths for finding thread directories in /proc            */
-  local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid);
-  strcpy(marker_name, proc_self_task);
-  marker_path = marker_name + strlen(marker_name);
-  strcat(proc_self_task, "/task/");
-  proc_paths[0] = proc_self_task; /* /proc/$$/task/                          */
-  proc_paths[1] = "/proc/";       /* /proc/                                  */
-  proc_paths[2] = NULL;
-
-  /* Compute path for marker socket in /proc                                 */
-  local_itoa(strcpy(marker_path, "/fd/") + 4, marker);
-  if (sys_stat(marker_name, &marker_sb) < 0) {
-    goto failure;
-  }
-
-  /* Catch signals on an alternate pre-allocated stack. This way, we can
-   * safely execute the signal handler even if we ran out of memory.
-   */
-  memset(&altstack, 0, sizeof(altstack));
-  altstack.ss_sp    = args->altstack_mem;
-  altstack.ss_flags = 0;
-  altstack.ss_size  = ALT_STACKSIZE;
-  sys_sigaltstack(&altstack, (const stack_t *)NULL);
-
-  /* Some kernels forget to wake up traced processes, when the
-   * tracer dies.  So, intercept synchronous signals and make sure
-   * that we wake up our tracees before dying. It is the caller's
-   * responsibility to ensure that asynchronous signals do not
-   * interfere with this function.
-   */
-  sig_marker = marker;
-  sig_proc   = -1;
-  for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) {
-    struct kernel_sigaction sa;
-    memset(&sa, 0, sizeof(sa));
-    sa.sa_sigaction_ = SignalHandler;
-    sys_sigfillset(&sa.sa_mask);
-    sa.sa_flags      = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND;
-    sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL);
-  }
-  
-  /* Read process directories in /proc/...                                   */
-  for (;;) {
-    /* Some kernels know about threads, and hide them in "/proc"
-     * (although they are still there, if you know the process
-     * id). Threads are moved into a separate "task" directory. We
-     * check there first, and then fall back on the older naming
-     * convention if necessary.
-     */
-    if ((sig_proc = proc = c_open(*proc_path, O_RDONLY|O_DIRECTORY, 0)) < 0) {
-      if (*++proc_path != NULL)
-        continue;
-      goto failure;
-    }
-    if (sys_fstat(proc, &proc_sb) < 0)
-      goto failure;
-    
-    /* Since we are suspending threads, we cannot call any libc
-     * functions that might acquire locks. Most notably, we cannot
-     * call malloc(). So, we have to allocate memory on the stack,
-     * instead. Since we do not know how much memory we need, we
-     * make a best guess. And if we guessed incorrectly we retry on
-     * a second iteration (by jumping to "detach_threads").
-     *
-     * Unless the number of threads is increasing very rapidly, we
-     * should never need to do so, though, as our guestimate is very
-     * conservative.
-     */
-    if (max_threads < proc_sb.st_nlink + 100)
-      max_threads = proc_sb.st_nlink + 100;
-    
-    /* scope */ {
-      pid_t pids[max_threads];
-      int   added_entries = 0;
-      sig_num_threads     = num_threads;
-      sig_pids            = pids;
-      for (;;) {
-        struct KERNEL_DIRENT *entry;
-        char buf[4096];
-        ssize_t nbytes = GETDENTS(proc, (struct KERNEL_DIRENT *)buf,
-                                         sizeof(buf));
-        if (nbytes < 0)
-          goto failure;
-        else if (nbytes == 0) {
-          if (added_entries) {
-            /* Need to keep iterating over "/proc" in multiple
-             * passes until we no longer find any more threads. This
-             * algorithm eventually completes, when all threads have
-             * been suspended.
-             */
-            added_entries = 0;
-            sys_lseek(proc, 0, SEEK_SET);
-            continue;
-          }
-          break;
-        }
-        for (entry = (struct KERNEL_DIRENT *)buf;
-             entry < (struct KERNEL_DIRENT *)&buf[nbytes];
-             entry = (struct KERNEL_DIRENT *)((char *)entry+entry->d_reclen)) {
-          if (entry->d_ino != 0) {
-            const char *ptr = entry->d_name;
-            pid_t pid;
-            
-            /* Some kernels hide threads by preceding the pid with a '.'     */
-            if (*ptr == '.')
-              ptr++;
-            
-            /* If the directory is not numeric, it cannot be a
-             * process/thread
-             */
-            if (*ptr < '0' || *ptr > '9')
-              continue;
-            pid = local_atoi(ptr);
-
-            /* Attach (and suspend) all threads                              */
-            if (pid && pid != clone_pid) {
-              struct kernel_stat tmp_sb;
-              char fname[entry->d_reclen + 48];
-              strcat(strcat(strcpy(fname, "/proc/"),
-                            entry->d_name), marker_path);
-              
-              /* Check if the marker is identical to the one we created      */
-              if (sys_stat(fname, &tmp_sb) >= 0 &&
-                  marker_sb.st_ino == tmp_sb.st_ino) {
-                long i, j;
-
-                /* Found one of our threads, make sure it is no duplicate    */
-                for (i = 0; i < num_threads; i++) {
-                  /* Linear search is slow, but should not matter much for
-                   * the typically small number of threads.
-                   */
-                  if (pids[i] == pid) {
-                    /* Found a duplicate; most likely on second pass         */
-                    goto next_entry;
-                  }
-                }
-                
-                /* Check whether data structure needs growing                */
-                if (num_threads >= max_threads) {
-                  /* Back to square one, this time with more memory          */
-                  NO_INTR(sys_close(proc));
-                  goto detach_threads;
-                }
-
-                /* Attaching to thread suspends it                           */
-                pids[num_threads++] = pid;
-                sig_num_threads     = num_threads;
-                if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0,
-                               (void *)0) < 0) {
-                  /* If operation failed, ignore thread. Maybe it
-                   * just died?  There might also be a race
-                   * condition with a concurrent core dumper or
-                   * with a debugger. In that case, we will just
-                   * make a best effort, rather than failing
-                   * entirely.
-                   */
-                  num_threads--;
-                  sig_num_threads = num_threads;
-                  goto next_entry;
-                }
-                while (sys_waitpid(pid, (int *)0, __WALL) < 0) {
-                  if (errno != EINTR) {
-                    sys_ptrace_detach(pid);
-                    num_threads--;
-                    sig_num_threads = num_threads;
-                    goto next_entry;
-                  }
-                }
-
-                if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j ||
-                    sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i   != j) {
-                  /* Address spaces are distinct, even though both
-                   * processes show the "marker". This is probably
-                   * a forked child process rather than a thread.
-                   */
-                  sys_ptrace_detach(pid);
-                  num_threads--;
-                  sig_num_threads = num_threads;
-                } else {
-                  found_parent |= pid == ppid;
-                  added_entries++;
-                }
-              }
-            }
-          }
-        next_entry:;
-        }
-      }
-      NO_INTR(sys_close(proc));
-      sig_proc = proc = -1;
-
-      /* If we failed to find any threads, try looking somewhere else in
-       * /proc. Maybe, threads are reported differently on this system.
-       */
-      if (num_threads > 1 || !*++proc_path) {
-        NO_INTR(sys_close(marker));
-        sig_marker = marker = -1;
-
-        /* If we never found the parent process, something is very wrong.
-         * Most likely, we are running in debugger. Any attempt to operate
-         * on the threads would be very incomplete. Let's just report an
-         * error to the caller.
-         */
-        if (!found_parent) {
-          TCMalloc_ResumeAllProcessThreads(num_threads, pids);
-          sys__exit(3);
-        }
-
-        /* Now we are ready to call the callback,
-         * which takes care of resuming the threads for us.
-         */
-        args->result = args->callback(args->parameter, num_threads,
-                                      pids, args->ap);
-        args->err = errno;
-
-        /* Callback should have resumed threads, but better safe than sorry  */
-        if (TCMalloc_ResumeAllProcessThreads(num_threads, pids)) {
-          /* Callback forgot to resume at least one thread, report error     */
-          args->err    = EINVAL;
-          args->result = -1;
-        }
-
-        sys__exit(0);
-      }
-    detach_threads:
-      /* Resume all threads prior to retrying the operation                  */
-      TCMalloc_ResumeAllProcessThreads(num_threads, pids);
-      sig_pids = NULL;
-      num_threads = 0;
-      sig_num_threads = num_threads;
-      max_threads += 100;
-    }
-  }
-}
-
-
-/* This function gets the list of all linux threads of the current process
- * passes them to the 'callback' along with the 'parameter' pointer; at the
- * call back call time all the threads are paused via
- * PTRACE_ATTACH.
- * The callback is executed from a separate thread which shares only the
- * address space, the filesystem, and the filehandles with the caller. Most
- * notably, it does not share the same pid and ppid; and if it terminates,
- * the rest of the application is still there. 'callback' is supposed to do
- * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if
- * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous
- * signals are blocked. If the 'callback' decides to unblock them, it must
- * ensure that they cannot terminate the application, or that
- * TCMalloc_ResumeAllProcessThreads will get called.
- * It is an error for the 'callback' to make any library calls that could
- * acquire locks. Most notably, this means that most system calls have to
- * avoid going through libc. Also, this means that it is not legal to call
- * exit() or abort().
- * We return -1 on error and the return value of 'callback' on success.
- */
-int TCMalloc_ListAllProcessThreads(void *parameter,
-                                   ListAllProcessThreadsCallBack callback, ...) {
-  char                   altstack_mem[ALT_STACKSIZE];
-  struct ListerParams    args;
-  pid_t                  clone_pid;
-  int                    dumpable = 1, sig;
-  struct kernel_sigset_t sig_blocked, sig_old;
-  sem_t                  lock;
-
-  va_start(args.ap, callback);
-
-  /* If we are short on virtual memory, initializing the alternate stack
-   * might trigger a SIGSEGV. Let's do this early, before it could get us
-   * into more trouble (i.e. before signal handlers try to use the alternate
-   * stack, and before we attach to other threads).
-   */
-  memset(altstack_mem, 0, sizeof(altstack_mem));
-
-  /* Some of our cleanup functions could conceivable use more stack space.
-   * Try to touch the stack right now. This could be defeated by the compiler
-   * being too smart for it's own good, so try really hard.
-   */
-  DirtyStack(32768);
-
-  /* Make this process "dumpable". This is necessary in order to ptrace()
-   * after having called setuid().
-   */
-  dumpable = sys_prctl(PR_GET_DUMPABLE, 0);
-  if (!dumpable)
-    sys_prctl(PR_SET_DUMPABLE, 1);
-
-  /* Fill in argument block for dumper thread                                */
-  args.result       = -1;
-  args.err          = 0;
-  args.altstack_mem = altstack_mem;
-  args.parameter    = parameter;
-  args.callback     = callback;
-  args.lock         = &lock;
-
-  /* Before cloning the thread lister, block all asynchronous signals, as we */
-  /* are not prepared to handle them.                                        */
-  sys_sigfillset(&sig_blocked);
-  for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) {
-    sys_sigdelset(&sig_blocked, sync_signals[sig]);
-  }
-  if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) {
-    args.err = errno;
-    args.result = -1;
-    goto failed;
-  }
-
-  /* scope */ {
-    /* After cloning, both the parent and the child share the same instance
-     * of errno. We must make sure that at least one of these processes
-     * (in our case, the parent) uses modified syscall macros that update
-     * a local copy of errno, instead.
-     */
-    #ifdef __cplusplus
-      #define sys0_sigprocmask sys.sigprocmask
-      #define sys0_waitpid     sys.waitpid
-      SysCalls sys;
-    #else
-      int my_errno;
-      #define SYS_ERRNO        my_errno
-      #define SYS_INLINE       inline
-      #define SYS_PREFIX       0
-      #undef  SYS_LINUX_SYSCALL_SUPPORT_H
-      #include "linux_syscall_support.h"
-    #endif
-
-    /* Lock before clone so that parent can set
-	 * ptrace permissions (if necessary) prior
-     * to ListerThread actually executing
-     */
-    if (sem_init(&lock, 0, 0) == 0) {
-
-      int clone_errno;
-      clone_pid = local_clone((int (*)(void *))ListerThread, &args);
-      clone_errno = errno;
-
-      sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old);
-
-      if (clone_pid >= 0) {
-#ifdef PR_SET_PTRACER
-        /* In newer versions of glibc permission must explicitly
-         * be given to allow for ptrace.
-         */
-        prctl(PR_SET_PTRACER, clone_pid, 0, 0, 0);
-#endif
-        /* Releasing the lock here allows the
-         * ListerThread to execute and ptrace us.
-		 */
-        sem_post(&lock);
-        int status, rc;
-        while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 &&
-               ERRNO == EINTR) {
-                /* Keep waiting                                                 */
-        }
-        if (rc < 0) {
-          args.err = ERRNO;
-          args.result = -1;
-        } else if (WIFEXITED(status)) {
-          switch (WEXITSTATUS(status)) {
-            case 0: break;             /* Normal process termination           */
-            case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected   */
-                    args.result = -1;
-                    break;
-            case 3: args.err = EPERM;  /* Process is already being traced      */
-                    args.result = -1;
-                    break;
-            default:args.err = ECHILD; /* Child died unexpectedly              */
-                    args.result = -1;
-                    break;
-          }
-        } else if (!WIFEXITED(status)) {
-          args.err    = EFAULT;        /* Terminated due to an unhandled signal*/
-          args.result = -1;
-        }
-        sem_destroy(&lock);
-      } else {
-        args.result = -1;
-        args.err    = clone_errno;
-      }
-    } else {
-      args.result = -1;
-      args.err    = errno;
-    }
-  }
-
-  /* Restore the "dumpable" state of the process                             */
-failed:
-  if (!dumpable)
-    sys_prctl(PR_SET_DUMPABLE, dumpable);
-
-  va_end(args.ap);
-
-  errno = args.err;
-  return args.result;
-}
-
-/* This function resumes the list of all linux threads that
- * TCMalloc_ListAllProcessThreads pauses before giving to its callback.
- * The function returns non-zero if at least one thread was
- * suspended and has now been resumed.
- */
-int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
-  int detached_at_least_one = 0;
-  while (num_threads-- > 0) {
-    detached_at_least_one |= sys_ptrace_detach(thread_pids[num_threads]) >= 0;
-  }
-  return detached_at_least_one;
-}
-
-#ifdef __cplusplus
-}
-#endif
-#endif
diff --git a/contrib/libtcmalloc/src/base/linuxthreads.h b/contrib/libtcmalloc/src/base/linuxthreads.h
deleted file mode 100644
index 09ce45fc13f..00000000000
--- a/contrib/libtcmalloc/src/base/linuxthreads.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright (c) 2005-2007, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#ifndef _LINUXTHREADS_H
-#define _LINUXTHREADS_H
-
-/* Include thread_lister.h to get the interface that we implement for linux.
- */
-
-/* We currently only support certain platforms on Linux. Porting to other
- * related platforms should not be difficult.
- */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-     defined(__mips__) || defined(__PPC__) || defined(__aarch64__) ||       \
-     defined(__s390__)) && defined(__linux)
-
-/* Define the THREADS symbol to make sure that there is exactly one core dumper
- * built into the library.
- */
-#define THREADS "Linux /proc"
-
-#endif
-
-#endif  /* _LINUXTHREADS_H */
diff --git a/contrib/libtcmalloc/src/base/logging.cc b/contrib/libtcmalloc/src/base/logging.cc
deleted file mode 100644
index 2b0adcb8945..00000000000
--- a/contrib/libtcmalloc/src/base/logging.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// This file just provides storage for FLAGS_verbose.
-
-#include "../config.h"
-#include "base/logging.h"
-#include "base/commandlineflags.h"
-
-DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0),
-             "Set to numbers >0 for more verbose output, or <0 for less.  "
-             "--verbose == -4 means we log fatal errors only.");
-
-
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-
-// While windows does have a POSIX-compatible API
-// (_open/_write/_close), it acquires memory.  Using this lower-level
-// windows API is the closest we can get to being "raw".
-RawFD RawOpenForWriting(const char* filename) {
-  // CreateFile allocates memory if file_name isn't absolute, so if
-  // that ever becomes a problem then we ought to compute the absolute
-  // path on its behalf (perhaps the ntdll/kernel function isn't aware
-  // of the working directory?)
-  RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL,
-                         CREATE_ALWAYS, 0, NULL);
-  if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS)
-    SetEndOfFile(fd);    // truncate the existing file
-  return fd;
-}
-
-void RawWrite(RawFD handle, const char* buf, size_t len) {
-  while (len > 0) {
-    DWORD wrote;
-    BOOL ok = WriteFile(handle, buf, len, &wrote, NULL);
-    // We do not use an asynchronous file handle, so ok==false means an error
-    if (!ok) break;
-    buf += wrote;
-    len -= wrote;
-  }
-}
-
-void RawClose(RawFD handle) {
-  CloseHandle(handle);
-}
-
-#else  // _WIN32 || __CYGWIN__ || __CYGWIN32__
-
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
-
-// Re-run fn until it doesn't cause EINTR.
-#define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
-
-RawFD RawOpenForWriting(const char* filename) {
-  return open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0664);
-}
-
-void RawWrite(RawFD fd, const char* buf, size_t len) {
-  while (len > 0) {
-    ssize_t r;
-    NO_INTR(r = write(fd, buf, len));
-    if (r <= 0) break;
-    buf += r;
-    len -= r;
-  }
-}
-
-void RawClose(RawFD fd) {
-  NO_INTR(close(fd));
-}
-
-#endif  // _WIN32 || __CYGWIN__ || __CYGWIN32__
diff --git a/contrib/libtcmalloc/src/base/logging.h b/contrib/libtcmalloc/src/base/logging.h
deleted file mode 100644
index fa22489bea3..00000000000
--- a/contrib/libtcmalloc/src/base/logging.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// This file contains #include information about logging-related stuff.
-// Pretty much everybody needs to #include this file so that they can
-// log various happenings.
-//
-#ifndef _LOGGING_H_
-#define _LOGGING_H_
-
-#include "../config.h"
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>    // for write()
-#endif
-#include <string.h>    // for strlen(), strcmp()
-#include <assert.h>
-#include <errno.h>     // for errno
-#include "base/commandlineflags.h"
-
-// On some systems (like freebsd), we can't call write() at all in a
-// global constructor, perhaps because errno hasn't been set up.
-// (In windows, we can't call it because it might call malloc.)
-// Calling the write syscall is safer (it doesn't set errno), so we
-// prefer that.  Note we don't care about errno for logging: we just
-// do logging on a best-effort basis.
-#if defined(_MSC_VER)
-#define WRITE_TO_STDERR(buf, len) WriteToStderr(buf, len);  // in port.cc
-#elif defined(HAVE_SYS_SYSCALL_H)
-#include <sys/syscall.h>
-#define WRITE_TO_STDERR(buf, len) syscall(SYS_write, STDERR_FILENO, buf, len)
-#else
-#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len)
-#endif
-
-// MSVC and mingw define their own, safe version of vnsprintf (the
-// windows one in broken) in port.cc.  Everyone else can use the
-// version here.  We had to give it a unique name for windows.
-#ifndef _WIN32
-# define perftools_vsnprintf vsnprintf
-#endif
-
-
-// We log all messages at this log-level and below.
-// INFO == -1, WARNING == -2, ERROR == -3, FATAL == -4
-DECLARE_int32(verbose);
-
-// CHECK dies with a fatal error if condition is not true.  It is *not*
-// controlled by NDEBUG, so the check will be executed regardless of
-// compilation mode.  Therefore, it is safe to do things like:
-//    CHECK(fp->Write(x) == 4)
-// Note we use write instead of printf/puts to avoid the risk we'll
-// call malloc().
-#define CHECK(condition)                                                \
-  do {                                                                  \
-    if (!(condition)) {                                                 \
-      WRITE_TO_STDERR("Check failed: " #condition "\n",                 \
-                      sizeof("Check failed: " #condition "\n")-1);      \
-      abort();                                                          \
-    }                                                                   \
-  } while (0)
-
-// This takes a message to print.  The name is historical.
-#define RAW_CHECK(condition, message)                                          \
-  do {                                                                         \
-    if (!(condition)) {                                                        \
-      WRITE_TO_STDERR("Check failed: " #condition ": " message "\n",           \
-                      sizeof("Check failed: " #condition ": " message "\n")-1);\
-      abort();                                                                 \
-    }                                                                          \
-  } while (0)
-
-// This is like RAW_CHECK, but only in debug-mode
-#ifdef NDEBUG
-enum { DEBUG_MODE = 0 };
-#define RAW_DCHECK(condition, message)
-#else
-enum { DEBUG_MODE = 1 };
-#define RAW_DCHECK(condition, message)  RAW_CHECK(condition, message)
-#endif
-
-// This prints errno as well.  Note we use write instead of printf/puts to
-// avoid the risk we'll call malloc().
-#define PCHECK(condition)                                               \
-  do {                                                                  \
-    if (!(condition)) {                                                 \
-      const int err_no = errno;                                         \
-      WRITE_TO_STDERR("Check failed: " #condition ": ",                 \
-                      sizeof("Check failed: " #condition ": ")-1);      \
-      WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no)));      \
-      WRITE_TO_STDERR("\n", sizeof("\n")-1);                            \
-      abort();                                                          \
-    }                                                                   \
-  } while (0)
-
-// Helper macro for binary operators; prints the two values on error
-// Don't use this macro directly in your code, use CHECK_EQ et al below
-
-// WARNING: These don't compile correctly if one of the arguments is a pointer
-// and the other is NULL. To work around this, simply static_cast NULL to the
-// type of the desired pointer.
-
-// TODO(jandrews): Also print the values in case of failure.  Requires some
-// sort of type-sensitive ToString() function.
-#define CHECK_OP(op, val1, val2)                                        \
-  do {                                                                  \
-    if (!((val1) op (val2))) {                                          \
-      fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2);   \
-      abort();                                                          \
-    }                                                                   \
-  } while (0)
-
-#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2)
-#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2)
-#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2)
-#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2)
-#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2)
-#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2)
-
-// Synonyms for CHECK_* that are used in some unittests.
-#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2)
-#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2)
-#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2)
-#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2)
-#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2)
-#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2)
-#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2)
-#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2)
-#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2)
-#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2)
-#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2)
-#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2)
-// As are these variants.
-#define EXPECT_TRUE(cond)     CHECK(cond)
-#define EXPECT_FALSE(cond)    CHECK(!(cond))
-#define EXPECT_STREQ(a, b)    CHECK(strcmp(a, b) == 0)
-#define ASSERT_TRUE(cond)     EXPECT_TRUE(cond)
-#define ASSERT_FALSE(cond)    EXPECT_FALSE(cond)
-#define ASSERT_STREQ(a, b)    EXPECT_STREQ(a, b)
-
-// Used for (libc) functions that return -1 and set errno
-#define CHECK_ERR(invocation)  PCHECK((invocation) != -1)
-
-// A few more checks that only happen in debug mode
-#ifdef NDEBUG
-#define DCHECK_EQ(val1, val2)
-#define DCHECK_NE(val1, val2)
-#define DCHECK_LE(val1, val2)
-#define DCHECK_LT(val1, val2)
-#define DCHECK_GE(val1, val2)
-#define DCHECK_GT(val1, val2)
-#else
-#define DCHECK_EQ(val1, val2)  CHECK_EQ(val1, val2)
-#define DCHECK_NE(val1, val2)  CHECK_NE(val1, val2)
-#define DCHECK_LE(val1, val2)  CHECK_LE(val1, val2)
-#define DCHECK_LT(val1, val2)  CHECK_LT(val1, val2)
-#define DCHECK_GE(val1, val2)  CHECK_GE(val1, val2)
-#define DCHECK_GT(val1, val2)  CHECK_GT(val1, val2)
-#endif
-
-
-#ifdef ERROR
-#undef ERROR      // may conflict with ERROR macro on windows
-#endif
-enum LogSeverity {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4};
-
-// NOTE: we add a newline to the end of the output if it's not there already
-inline void LogPrintf(int severity, const char* pat, va_list ap) {
-  // We write directly to the stderr file descriptor and avoid FILE
-  // buffering because that may invoke malloc()
-  char buf[600];
-  perftools_vsnprintf(buf, sizeof(buf)-1, pat, ap);
-  if (buf[0] != '\0' && buf[strlen(buf)-1] != '\n') {
-    assert(strlen(buf)+1 < sizeof(buf));
-    strcat(buf, "\n");
-  }
-  WRITE_TO_STDERR(buf, strlen(buf));
-  if ((severity) == FATAL)
-    abort(); // LOG(FATAL) indicates a big problem, so don't run atexit() calls
-}
-
-// Note that since the order of global constructors is unspecified,
-// global code that calls RAW_LOG may execute before FLAGS_verbose is set.
-// Such code will run with verbosity == 0 no matter what.
-#define VLOG_IS_ON(severity) (FLAGS_verbose >= severity)
-
-// In a better world, we'd use __VA_ARGS__, but VC++ 7 doesn't support it.
-#define LOG_PRINTF(severity, pat) do {          \
-  if (VLOG_IS_ON(severity)) {                   \
-    va_list ap;                                 \
-    va_start(ap, pat);                          \
-    LogPrintf(severity, pat, ap);               \
-    va_end(ap);                                 \
-  }                                             \
-} while (0)
-
-// RAW_LOG is the main function; some synonyms are used in unittests.
-inline void RAW_LOG(int lvl, const char* pat, ...)  { LOG_PRINTF(lvl, pat); }
-inline void RAW_VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); }
-inline void LOG(int lvl, const char* pat, ...)      { LOG_PRINTF(lvl, pat); }
-inline void VLOG(int lvl, const char* pat, ...)     { LOG_PRINTF(lvl, pat); }
-inline void LOG_IF(int lvl, bool cond, const char* pat, ...) {
-  if (cond)  LOG_PRINTF(lvl, pat);
-}
-
-// This isn't technically logging, but it's also IO and also is an
-// attempt to be "raw" -- that is, to not use any higher-level libc
-// routines that might allocate memory or (ideally) try to allocate
-// locks.  We use an opaque file handle (not necessarily an int)
-// to allow even more low-level stuff in the future.
-// Like other "raw" routines, these functions are best effort, and
-// thus don't return error codes (except RawOpenForWriting()).
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-#ifndef NOMINMAX
-#define NOMINMAX     // @#!$& windows
-#endif
-#include <windows.h>
-typedef HANDLE RawFD;
-const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE;
-#else
-typedef int RawFD;
-const RawFD kIllegalRawFD = -1;   // what open returns if it fails
-#endif  // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-
-RawFD RawOpenForWriting(const char* filename);   // uses default permissions
-void RawWrite(RawFD fd, const char* buf, size_t len);
-void RawClose(RawFD fd);
-
-#endif // _LOGGING_H_
diff --git a/contrib/libtcmalloc/src/base/low_level_alloc.cc b/contrib/libtcmalloc/src/base/low_level_alloc.cc
deleted file mode 100644
index 6b467cff123..00000000000
--- a/contrib/libtcmalloc/src/base/low_level_alloc.cc
+++ /dev/null
@@ -1,582 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// A low-level allocator that can be used by other low-level
-// modules without introducing dependency cycles.
-// This allocator is slow and wasteful of memory;
-// it should not be used when performance is key.
-
-#include "base/low_level_alloc.h"
-#include "base/dynamic_annotations.h"
-#include "base/spinlock.h"
-#include "base/logging.h"
-#include "malloc_hook-inl.h"
-#include <gperftools/malloc_hook.h>
-#include <errno.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-#include <new>                   // for placement-new
-
-// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
-// form of the name instead.
-#ifndef MAP_ANONYMOUS
-# define MAP_ANONYMOUS MAP_ANON
-#endif
-
-// A first-fit allocator with amortized logarithmic free() time.
-
-LowLevelAlloc::PagesAllocator::~PagesAllocator() {
-}
-
-// ---------------------------------------------------------------------------
-static const int kMaxLevel = 30;
-
-// We put this class-only struct in a namespace to avoid polluting the
-// global namespace with this struct name (thus risking an ODR violation).
-namespace low_level_alloc_internal {
-  // This struct describes one allocated block, or one free block.
-  struct AllocList {
-    struct Header {
-      intptr_t size;  // size of entire region, including this field. Must be
-                      // first.  Valid in both allocated and unallocated blocks
-      intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this
-      LowLevelAlloc::Arena *arena; // pointer to parent arena
-      void *dummy_for_alignment;   // aligns regions to 0 mod 2*sizeof(void*)
-    } header;
-
-    // Next two fields: in unallocated blocks: freelist skiplist data
-    //                  in allocated blocks: overlaps with client data
-    int levels;           // levels in skiplist used
-    AllocList *next[kMaxLevel];   // actually has levels elements.
-                                  // The AllocList node may not have room for
-                                  // all kMaxLevel entries.  See max_fit in
-                                  // LLA_SkiplistLevels()
-  };
-}
-using low_level_alloc_internal::AllocList;
-
-
-// ---------------------------------------------------------------------------
-// A trivial skiplist implementation.  This is used to keep the freelist
-// in address order while taking only logarithmic time per insert and delete.
-
-// An integer approximation of log2(size/base)
-// Requires size >= base.
-static int IntLog2(size_t size, size_t base) {
-  int result = 0;
-  for (size_t i = size; i > base; i >>= 1) { // i == floor(size/2**result)
-    result++;
-  }
-  //    floor(size / 2**result) <= base < floor(size / 2**(result-1))
-  // =>     log2(size/(base+1)) <= result < 1+log2(size/base)
-  // => result ~= log2(size/base)
-  return result;
-}
-
-// Return a random integer n:  p(n)=1/(2**n) if 1 <= n; p(n)=0 if n < 1.
-static int Random() {
-  static uint32 r = 1;         // no locking---it's not critical
-  ANNOTATE_BENIGN_RACE(&r, "benign race, not critical.");
-  int result = 1;
-  while ((((r = r*1103515245 + 12345) >> 30) & 1) == 0) {
-    result++;
-  }
-  return result;
-}
-
-// Return a number of skiplist levels for a node of size bytes, where
-// base is the minimum node size.  Compute level=log2(size / base)+n
-// where n is 1 if random is false and otherwise a random number generated with
-// the standard distribution for a skiplist:  See Random() above.
-// Bigger nodes tend to have more skiplist levels due to the log2(size / base)
-// term, so first-fit searches touch fewer nodes.  "level" is clipped so
-// level<kMaxLevel and next[level-1] will fit in the node.
-// 0 < LLA_SkiplistLevels(x,y,false) <= LLA_SkiplistLevels(x,y,true) < kMaxLevel
-static int LLA_SkiplistLevels(size_t size, size_t base, bool random) {
-  // max_fit is the maximum number of levels that will fit in a node for the
-  // given size.   We can't return more than max_fit, no matter what the
-  // random number generator says.
-  int max_fit = (size-OFFSETOF_MEMBER(AllocList, next)) / sizeof (AllocList *);
-  int level = IntLog2(size, base) + (random? Random() : 1);
-  if (level > max_fit)     level = max_fit;
-  if (level > kMaxLevel-1) level = kMaxLevel - 1;
-  RAW_CHECK(level >= 1, "block not big enough for even one level");
-  return level;
-}
-
-// Return "atleast", the first element of AllocList *head s.t. *atleast >= *e.
-// For 0 <= i < head->levels, set prev[i] to "no_greater", where no_greater
-// points to the last element at level i in the AllocList less than *e, or is
-// head if no such element exists.
-static AllocList *LLA_SkiplistSearch(AllocList *head,
-                                     AllocList *e, AllocList **prev) {
-  AllocList *p = head;
-  for (int level = head->levels - 1; level >= 0; level--) {
-    for (AllocList *n; (n = p->next[level]) != 0 && n < e; p = n) {
-    }
-    prev[level] = p;
-  }
-  return (head->levels == 0) ?  0 : prev[0]->next[0];
-}
-
-// Insert element *e into AllocList *head.  Set prev[] as LLA_SkiplistSearch.
-// Requires that e->levels be previously set by the caller (using
-// LLA_SkiplistLevels())
-static void LLA_SkiplistInsert(AllocList *head, AllocList *e,
-                               AllocList **prev) {
-  LLA_SkiplistSearch(head, e, prev);
-  for (; head->levels < e->levels; head->levels++) { // extend prev pointers
-    prev[head->levels] = head;                       // to all *e's levels
-  }
-  for (int i = 0; i != e->levels; i++) { // add element to list
-    e->next[i] = prev[i]->next[i];
-    prev[i]->next[i] = e;
-  }
-}
-
-// Remove element *e from AllocList *head.  Set prev[] as LLA_SkiplistSearch().
-// Requires that e->levels be previous set by the caller (using
-// LLA_SkiplistLevels())
-static void LLA_SkiplistDelete(AllocList *head, AllocList *e,
-                               AllocList **prev) {
-  AllocList *found = LLA_SkiplistSearch(head, e, prev);
-  RAW_CHECK(e == found, "element not in freelist");
-  for (int i = 0; i != e->levels && prev[i]->next[i] == e; i++) {
-    prev[i]->next[i] = e->next[i];
-  }
-  while (head->levels > 0 && head->next[head->levels - 1] == 0) {
-    head->levels--;   // reduce head->levels if level unused
-  }
-}
-
-// ---------------------------------------------------------------------------
-// Arena implementation
-
-struct LowLevelAlloc::Arena {
-  Arena() : mu(SpinLock::LINKER_INITIALIZED) {} // does nothing; for static init
-  explicit Arena(int) : pagesize(0) {}  // set pagesize to zero explicitly
-                                        // for non-static init
-
-  SpinLock mu;            // protects freelist, allocation_count,
-                          // pagesize, roundup, min_size
-  AllocList freelist;     // head of free list; sorted by addr (under mu)
-  int32 allocation_count; // count of allocated blocks (under mu)
-  int32 flags;            // flags passed to NewArena (ro after init)
-  size_t pagesize;        // ==getpagesize()  (init under mu, then ro)
-  size_t roundup;         // lowest power of 2 >= max(16,sizeof (AllocList))
-                          // (init under mu, then ro)
-  size_t min_size;        // smallest allocation block size
-                          // (init under mu, then ro)
-  PagesAllocator *allocator;
-};
-
-// The default arena, which is used when 0 is passed instead of an Arena
-// pointer.
-static struct LowLevelAlloc::Arena default_arena;
-
-// Non-malloc-hooked arenas: used only to allocate metadata for arenas that
-// do not want malloc hook reporting, so that for them there's no malloc hook
-// reporting even during arena creation.
-static struct LowLevelAlloc::Arena unhooked_arena;
-static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena;
-
-namespace {
-
-  class DefaultPagesAllocator : public LowLevelAlloc::PagesAllocator {
-  public:
-    virtual ~DefaultPagesAllocator() {};
-    virtual void *MapPages(int32 flags, size_t size);
-    virtual void UnMapPages(int32 flags, void *addr, size_t size);
-  };
-
-}
-
-// magic numbers to identify allocated and unallocated blocks
-static const intptr_t kMagicAllocated = 0x4c833e95;
-static const intptr_t kMagicUnallocated = ~kMagicAllocated;
-
-namespace {
-  class SCOPED_LOCKABLE ArenaLock {
-   public:
-    explicit ArenaLock(LowLevelAlloc::Arena *arena)
-        EXCLUSIVE_LOCK_FUNCTION(arena->mu)
-        : left_(false), mask_valid_(false), arena_(arena) {
-      if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
-      // We've decided not to support async-signal-safe arena use until
-      // there a demonstrated need.  Here's how one could do it though
-      // (would need to be made more portable).
-#if 0
-        sigset_t all;
-        sigfillset(&all);
-        this->mask_valid_ =
-            (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0);
-#else
-        RAW_CHECK(false, "We do not yet support async-signal-safe arena.");
-#endif
-      }
-      this->arena_->mu.Lock();
-    }
-    ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); }
-    void Leave() /*UNLOCK_FUNCTION()*/ {
-      this->arena_->mu.Unlock();
-#if 0
-      if (this->mask_valid_) {
-        pthread_sigmask(SIG_SETMASK, &this->mask_, 0);
-      }
-#endif
-      this->left_ = true;
-    }
-   private:
-    bool left_;       // whether left region
-    bool mask_valid_;
-#if 0
-    sigset_t mask_;   // old mask of blocked signals
-#endif
-    LowLevelAlloc::Arena *arena_;
-    DISALLOW_COPY_AND_ASSIGN(ArenaLock);
-  };
-} // anonymous namespace
-
-// create an appropriate magic number for an object at "ptr"
-// "magic" should be kMagicAllocated or kMagicUnallocated
-inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) {
-  return magic ^ reinterpret_cast<intptr_t>(ptr);
-}
-
-// Initialize the fields of an Arena
-static void ArenaInit(LowLevelAlloc::Arena *arena) {
-  if (arena->pagesize == 0) {
-    arena->pagesize = getpagesize();
-    // Round up block sizes to a power of two close to the header size.
-    arena->roundup = 16;
-    while (arena->roundup < sizeof (arena->freelist.header)) {
-      arena->roundup += arena->roundup;
-    }
-    // Don't allocate blocks less than twice the roundup size to avoid tiny
-    // free blocks.
-    arena->min_size = 2 * arena->roundup;
-    arena->freelist.header.size = 0;
-    arena->freelist.header.magic =
-        Magic(kMagicUnallocated, &arena->freelist.header);
-    arena->freelist.header.arena = arena;
-    arena->freelist.levels = 0;
-    memset(arena->freelist.next, 0, sizeof (arena->freelist.next));
-    arena->allocation_count = 0;
-    if (arena == &default_arena) {
-      // Default arena should be hooked, e.g. for heap-checker to trace
-      // pointer chains through objects in the default arena.
-      arena->flags = LowLevelAlloc::kCallMallocHook;
-    } else if (arena == &unhooked_async_sig_safe_arena) {
-      arena->flags = LowLevelAlloc::kAsyncSignalSafe;
-    } else {
-      arena->flags = 0;   // other arenas' flags may be overridden by client,
-                          // but unhooked_arena will have 0 in 'flags'.
-    }
-    arena->allocator = LowLevelAlloc::GetDefaultPagesAllocator();
-  }
-}
-
-// L < meta_data_arena->mu
-LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags,
-                                              Arena *meta_data_arena) {
-  return NewArenaWithCustomAlloc(flags, meta_data_arena, NULL);
-}
-
-// L < meta_data_arena->mu
-LowLevelAlloc::Arena *LowLevelAlloc::NewArenaWithCustomAlloc(int32 flags,
-                                                             Arena *meta_data_arena,
-                                                             PagesAllocator *allocator) {
-  RAW_CHECK(meta_data_arena != 0, "must pass a valid arena");
-  if (meta_data_arena == &default_arena) {
-    if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
-      meta_data_arena = &unhooked_async_sig_safe_arena;
-    } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) {
-      meta_data_arena = &unhooked_arena;
-    }
-  }
-  // Arena(0) uses the constructor for non-static contexts
-  Arena *result =
-    new (AllocWithArena(sizeof (*result), meta_data_arena)) Arena(0);
-  ArenaInit(result);
-  result->flags = flags;
-  if (allocator) {
-    result->allocator = allocator;
-  }
-  return result;
-}
-
-// L < arena->mu, L < arena->arena->mu
-bool LowLevelAlloc::DeleteArena(Arena *arena) {
-  RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena,
-            "may not delete default arena");
-  ArenaLock section(arena);
-  bool empty = (arena->allocation_count == 0);
-  section.Leave();
-  if (empty) {
-    while (arena->freelist.next[0] != 0) {
-      AllocList *region = arena->freelist.next[0];
-      size_t size = region->header.size;
-      arena->freelist.next[0] = region->next[0];
-      RAW_CHECK(region->header.magic ==
-                Magic(kMagicUnallocated, &region->header),
-                "bad magic number in DeleteArena()");
-      RAW_CHECK(region->header.arena == arena,
-                "bad arena pointer in DeleteArena()");
-      RAW_CHECK(size % arena->pagesize == 0,
-                "empty arena has non-page-aligned block size");
-      RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0,
-                "empty arena has non-page-aligned block");
-      int munmap_result;
-      if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) {
-        munmap_result = munmap(region, size);
-      } else {
-        munmap_result = MallocHook::UnhookedMUnmap(region, size);
-      }
-      RAW_CHECK(munmap_result == 0,
-                "LowLevelAlloc::DeleteArena:  munmap failed address");
-    }
-    Free(arena);
-  }
-  return empty;
-}
-
-// ---------------------------------------------------------------------------
-
-// Return value rounded up to next multiple of align.
-// align must be a power of two.
-static intptr_t RoundUp(intptr_t addr, intptr_t align) {
-  return (addr + align - 1) & ~(align - 1);
-}
-
-// Equivalent to "return prev->next[i]" but with sanity checking
-// that the freelist is in the correct order, that it
-// consists of regions marked "unallocated", and that no two regions
-// are adjacent in memory (they should have been coalesced).
-// L < arena->mu
-static AllocList *Next(int i, AllocList *prev, LowLevelAlloc::Arena *arena) {
-  RAW_CHECK(i < prev->levels, "too few levels in Next()");
-  AllocList *next = prev->next[i];
-  if (next != 0) {
-    RAW_CHECK(next->header.magic == Magic(kMagicUnallocated, &next->header),
-              "bad magic number in Next()");
-    RAW_CHECK(next->header.arena == arena,
-              "bad arena pointer in Next()");
-    if (prev != &arena->freelist) {
-      RAW_CHECK(prev < next, "unordered freelist");
-      RAW_CHECK(reinterpret_cast<char *>(prev) + prev->header.size <
-                reinterpret_cast<char *>(next), "malformed freelist");
-    }
-  }
-  return next;
-}
-
-// Coalesce list item "a" with its successor if they are adjacent.
-static void Coalesce(AllocList *a) {
-  AllocList *n = a->next[0];
-  if (n != 0 && reinterpret_cast<char *>(a) + a->header.size ==
-                    reinterpret_cast<char *>(n)) {
-    LowLevelAlloc::Arena *arena = a->header.arena;
-    a->header.size += n->header.size;
-    n->header.magic = 0;
-    n->header.arena = 0;
-    AllocList *prev[kMaxLevel];
-    LLA_SkiplistDelete(&arena->freelist, n, prev);
-    LLA_SkiplistDelete(&arena->freelist, a, prev);
-    a->levels = LLA_SkiplistLevels(a->header.size, arena->min_size, true);
-    LLA_SkiplistInsert(&arena->freelist, a, prev);
-  }
-}
-
-// Adds block at location "v" to the free list
-// L >= arena->mu
-static void AddToFreelist(void *v, LowLevelAlloc::Arena *arena) {
-  AllocList *f = reinterpret_cast<AllocList *>(
-                        reinterpret_cast<char *>(v) - sizeof (f->header));
-  RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header),
-            "bad magic number in AddToFreelist()");
-  RAW_CHECK(f->header.arena == arena,
-            "bad arena pointer in AddToFreelist()");
-  f->levels = LLA_SkiplistLevels(f->header.size, arena->min_size, true);
-  AllocList *prev[kMaxLevel];
-  LLA_SkiplistInsert(&arena->freelist, f, prev);
-  f->header.magic = Magic(kMagicUnallocated, &f->header);
-  Coalesce(f);                  // maybe coalesce with successor
-  Coalesce(prev[0]);            // maybe coalesce with predecessor
-}
-
-// Frees storage allocated by LowLevelAlloc::Alloc().
-// L < arena->mu
-void LowLevelAlloc::Free(void *v) {
-  if (v != 0) {
-    AllocList *f = reinterpret_cast<AllocList *>(
-                        reinterpret_cast<char *>(v) - sizeof (f->header));
-    RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header),
-              "bad magic number in Free()");
-    LowLevelAlloc::Arena *arena = f->header.arena;
-    if ((arena->flags & kCallMallocHook) != 0) {
-      MallocHook::InvokeDeleteHook(v);
-    }
-    ArenaLock section(arena);
-    AddToFreelist(v, arena);
-    RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free");
-    arena->allocation_count--;
-    section.Leave();
-  }
-}
-
-// allocates and returns a block of size bytes, to be freed with Free()
-// L < arena->mu
-static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
-  void *result = 0;
-  if (request != 0) {
-    AllocList *s;       // will point to region that satisfies request
-    ArenaLock section(arena);
-    ArenaInit(arena);
-    // round up with header
-    size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup);
-    for (;;) {      // loop until we find a suitable region
-      // find the minimum levels that a block of this size must have
-      int i = LLA_SkiplistLevels(req_rnd, arena->min_size, false) - 1;
-      if (i < arena->freelist.levels) {   // potential blocks exist
-        AllocList *before = &arena->freelist;  // predecessor of s
-        while ((s = Next(i, before, arena)) != 0 && s->header.size < req_rnd) {
-          before = s;
-        }
-        if (s != 0) {       // we found a region
-          break;
-        }
-      }
-      // we unlock before mmap() both because mmap() may call a callback hook,
-      // and because it may be slow.
-      arena->mu.Unlock();
-      // mmap generous 64K chunks to decrease
-      // the chances/impact of fragmentation:
-      size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16);
-      void *new_pages = arena->allocator->MapPages(arena->flags, new_pages_size);
-      arena->mu.Lock();
-      s = reinterpret_cast<AllocList *>(new_pages);
-      s->header.size = new_pages_size;
-      // Pretend the block is allocated; call AddToFreelist() to free it.
-      s->header.magic = Magic(kMagicAllocated, &s->header);
-      s->header.arena = arena;
-      AddToFreelist(&s->levels, arena);  // insert new region into free list
-    }
-    AllocList *prev[kMaxLevel];
-    LLA_SkiplistDelete(&arena->freelist, s, prev);    // remove from free list
-    // s points to the first free region that's big enough
-    if (req_rnd + arena->min_size <= s->header.size) {  // big enough to split
-      AllocList *n = reinterpret_cast<AllocList *>
-                        (req_rnd + reinterpret_cast<char *>(s));
-      n->header.size = s->header.size - req_rnd;
-      n->header.magic = Magic(kMagicAllocated, &n->header);
-      n->header.arena = arena;
-      s->header.size = req_rnd;
-      AddToFreelist(&n->levels, arena);
-    }
-    s->header.magic = Magic(kMagicAllocated, &s->header);
-    RAW_CHECK(s->header.arena == arena, "");
-    arena->allocation_count++;
-    section.Leave();
-    result = &s->levels;
-  }
-  ANNOTATE_NEW_MEMORY(result, request);
-  return result;
-}
-
-void *LowLevelAlloc::Alloc(size_t request) {
-  void *result = DoAllocWithArena(request, &default_arena);
-  if ((default_arena.flags & kCallMallocHook) != 0) {
-    // this call must be directly in the user-called allocator function
-    // for MallocHook::GetCallerStackTrace to work properly
-    MallocHook::InvokeNewHook(result, request);
-  }
-  return result;
-}
-
-void *LowLevelAlloc::AllocWithArena(size_t request, Arena *arena) {
-  RAW_CHECK(arena != 0, "must pass a valid arena");
-  void *result = DoAllocWithArena(request, arena);
-  if ((arena->flags & kCallMallocHook) != 0) {
-    // this call must be directly in the user-called allocator function
-    // for MallocHook::GetCallerStackTrace to work properly
-    MallocHook::InvokeNewHook(result, request);
-  }
-  return result;
-}
-
-LowLevelAlloc::Arena *LowLevelAlloc::DefaultArena() {
-  return &default_arena;
-}
-
-static DefaultPagesAllocator *default_pages_allocator;
-static union {
-  char chars[sizeof(DefaultPagesAllocator)];
-  void *ptr;
-} debug_pages_allocator_space;
-
-LowLevelAlloc::PagesAllocator *LowLevelAlloc::GetDefaultPagesAllocator(void) {
-  if (default_pages_allocator) {
-    return default_pages_allocator;
-  }
-  default_pages_allocator = new (debug_pages_allocator_space.chars) DefaultPagesAllocator();
-  return default_pages_allocator;
-}
-
-void *DefaultPagesAllocator::MapPages(int32 flags, size_t size) {
-  void *new_pages;
-  if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
-    new_pages = MallocHook::UnhookedMMap(0, size,
-                                         PROT_WRITE|PROT_READ,
-                                         MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-  } else {
-    new_pages = mmap(0, size,
-                     PROT_WRITE|PROT_READ,
-                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-  }
-  RAW_CHECK(new_pages != MAP_FAILED, "mmap error");
-
-  return new_pages;
-}
-
-void DefaultPagesAllocator::UnMapPages(int32 flags, void *region, size_t size) {
-  int munmap_result;
-  if ((flags & LowLevelAlloc::kAsyncSignalSafe) == 0) {
-    munmap_result = munmap(region, size);
-  } else {
-    munmap_result = MallocHook::UnhookedMUnmap(region, size);
-  }
-  RAW_CHECK(munmap_result == 0,
-            "LowLevelAlloc::DeleteArena: munmap failed address");
-}
diff --git a/contrib/libtcmalloc/src/base/low_level_alloc.h b/contrib/libtcmalloc/src/base/low_level_alloc.h
deleted file mode 100644
index 8a20dd8b870..00000000000
--- a/contrib/libtcmalloc/src/base/low_level_alloc.h
+++ /dev/null
@@ -1,120 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#if !defined(_BASE_LOW_LEVEL_ALLOC_H_)
-#define _BASE_LOW_LEVEL_ALLOC_H_
-
-// A simple thread-safe memory allocator that does not depend on
-// mutexes or thread-specific data.  It is intended to be used
-// sparingly, and only when malloc() would introduce an unwanted
-// dependency, such as inside the heap-checker.
-
-#include "../config.h"
-#include <stddef.h>             // for size_t
-#include "base/basictypes.h"
-
-class LowLevelAlloc {
- public:
-  class PagesAllocator {
-  public:
-    virtual ~PagesAllocator();
-    virtual void *MapPages(int32 flags, size_t size) = 0;
-    virtual void UnMapPages(int32 flags, void *addr, size_t size) = 0;
-  };
-
-  static PagesAllocator *GetDefaultPagesAllocator(void);
-
-  struct Arena;       // an arena from which memory may be allocated
-
-  // Returns a pointer to a block of at least "request" bytes
-  // that have been newly allocated from the specific arena.
-  // for Alloc() call the DefaultArena() is used.
-  // Returns 0 if passed request==0.
-  // Does not return 0 under other circumstances; it crashes if memory
-  // is not available.
-  static void *Alloc(size_t request)
-    ATTRIBUTE_SECTION(malloc_hook);
-  static void *AllocWithArena(size_t request, Arena *arena)
-    ATTRIBUTE_SECTION(malloc_hook);
-
-  // Deallocates a region of memory that was previously allocated with
-  // Alloc().   Does nothing if passed 0.   "s" must be either 0,
-  // or must have been returned from a call to Alloc() and not yet passed to
-  // Free() since that call to Alloc().  The space is returned to the arena
-  // from which it was allocated.
-  static void Free(void *s) ATTRIBUTE_SECTION(malloc_hook);
-
-    // ATTRIBUTE_SECTION(malloc_hook) for Alloc* and Free
-    // are to put all callers of MallocHook::Invoke* in this module
-    // into special section,
-    // so that MallocHook::GetCallerStackTrace can function accurately.
-
-  // Create a new arena.
-  // The root metadata for the new arena is allocated in the
-  // meta_data_arena; the DefaultArena() can be passed for meta_data_arena.
-  // These values may be ored into flags:
-  enum {
-    // Report calls to Alloc() and Free() via the MallocHook interface.
-    // Set in the DefaultArena.
-    kCallMallocHook = 0x0001,
-
-    // Make calls to Alloc(), Free() be async-signal-safe.  Not set in
-    // DefaultArena().
-    kAsyncSignalSafe = 0x0002,
-
-    // When used with DefaultArena(), the NewArena() and DeleteArena() calls
-    // obey the flags given explicitly in the NewArena() call, even if those
-    // flags differ from the settings in DefaultArena().  So the call
-    // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe,
-    // as well as generatating an arena that provides async-signal-safe
-    // Alloc/Free.
-  };
-  static Arena *NewArena(int32 flags, Arena *meta_data_arena);
-
-  // note: pages allocator will never be destroyed and allocated pages will never be freed
-  // When allocator is NULL, it's same as NewArena
-  static Arena *NewArenaWithCustomAlloc(int32 flags, Arena *meta_data_arena, PagesAllocator *allocator);
-
-  // Destroys an arena allocated by NewArena and returns true,
-  // provided no allocated blocks remain in the arena.
-  // If allocated blocks remain in the arena, does nothing and
-  // returns false.
-  // It is illegal to attempt to destroy the DefaultArena().
-  static bool DeleteArena(Arena *arena);
-
-  // The default arena that always exists.
-  static Arena *DefaultArena();
-
- private:
-  LowLevelAlloc();      // no instances
-};
-
-#endif
diff --git a/contrib/libtcmalloc/src/base/simple_mutex.h b/contrib/libtcmalloc/src/base/simple_mutex.h
deleted file mode 100644
index e57c1079283..00000000000
--- a/contrib/libtcmalloc/src/base/simple_mutex.h
+++ /dev/null
@@ -1,332 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
-// ---
-// Author: Craig Silverstein.
-//
-// A simple mutex wrapper, supporting locks and read-write locks.
-// You should assume the locks are *not* re-entrant.
-//
-// To use: you should define the following macros in your configure.ac:
-//   ACX_PTHREAD
-//   AC_RWLOCK
-// The latter is defined in ../autoconf.
-//
-// This class is meant to be internal-only and should be wrapped by an
-// internal namespace.  Before you use this module, please give the
-// name of your internal namespace for this module.  Or, if you want
-// to expose it, you'll want to move it to the Google namespace.  We
-// cannot put this class in global namespace because there can be some
-// problems when we have multiple versions of Mutex in each shared object.
-//
-// NOTE: TryLock() is broken for NO_THREADS mode, at least in NDEBUG
-//       mode.
-//
-// CYGWIN NOTE: Cygwin support for rwlock seems to be buggy:
-//    http://www.cygwin.com/ml/cygwin/2008-12/msg00017.html
-// Because of that, we might as well use windows locks for
-// cygwin.  They seem to be more reliable than the cygwin pthreads layer.
-//
-// TRICKY IMPLEMENTATION NOTE:
-// This class is designed to be safe to use during
-// dynamic-initialization -- that is, by global constructors that are
-// run before main() starts.  The issue in this case is that
-// dynamic-initialization happens in an unpredictable order, and it
-// could be that someone else's dynamic initializer could call a
-// function that tries to acquire this mutex -- but that all happens
-// before this mutex's constructor has run.  (This can happen even if
-// the mutex and the function that uses the mutex are in the same .cc
-// file.)  Basically, because Mutex does non-trivial work in its
-// constructor, it's not, in the naive implementation, safe to use
-// before dynamic initialization has run on it.
-//
-// The solution used here is to pair the actual mutex primitive with a
-// bool that is set to true when the mutex is dynamically initialized.
-// (Before that it's false.)  Then we modify all mutex routines to
-// look at the bool, and not try to lock/unlock until the bool makes
-// it to true (which happens after the Mutex constructor has run.)
-//
-// This works because before main() starts -- particularly, during
-// dynamic initialization -- there are no threads, so a) it's ok that
-// the mutex operations are a no-op, since we don't need locking then
-// anyway; and b) we can be quite confident our bool won't change
-// state between a call to Lock() and a call to Unlock() (that would
-// require a global constructor in one translation unit to call Lock()
-// and another global constructor in another translation unit to call
-// Unlock() later, which is pretty perverse).
-//
-// That said, it's tricky, and can conceivably fail; it's safest to
-// avoid trying to acquire a mutex in a global constructor, if you
-// can.  One way it can fail is that a really smart compiler might
-// initialize the bool to true at static-initialization time (too
-// early) rather than at dynamic-initialization time.  To discourage
-// that, we set is_safe_ to true in code (not the constructor
-// colon-initializer) and set it to true via a function that always
-// evaluates to true, but that the compiler can't know always
-// evaluates to true.  This should be good enough.
-//
-// A related issue is code that could try to access the mutex
-// after it's been destroyed in the global destructors (because
-// the Mutex global destructor runs before some other global
-// destructor, that tries to acquire the mutex).  The way we
-// deal with this is by taking a constructor arg that global
-// mutexes should pass in, that causes the destructor to do no
-// work.  We still depend on the compiler not doing anything
-// weird to a Mutex's memory after it is destroyed, but for a
-// static global variable, that's pretty safe.
-
-#ifndef GOOGLE_MUTEX_H_
-#define GOOGLE_MUTEX_H_
-
-#include "../config.h"
-
-#if defined(NO_THREADS)
-  typedef int MutexType;      // to keep a lock-count
-#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-# ifndef WIN32_LEAN_AND_MEAN
-#   define WIN32_LEAN_AND_MEAN  // We only need minimal includes
-# endif
-  // We need Windows NT or later for TryEnterCriticalSection().  If you
-  // don't need that functionality, you can remove these _WIN32_WINNT
-  // lines, and change TryLock() to assert(0) or something.
-# ifndef _WIN32_WINNT
-#   define _WIN32_WINNT 0x0400
-# endif
-# include <windows.h>
-  typedef CRITICAL_SECTION MutexType;
-#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
-  // Needed for pthread_rwlock_*.  If it causes problems, you could take it
-  // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it
-  // *does* cause problems for FreeBSD, or MacOSX, but isn't needed
-  // for locking there.)
-# ifdef __linux__
-#   define _XOPEN_SOURCE 500  // may be needed to get the rwlock calls
-# endif
-# include <pthread.h>
-  typedef pthread_rwlock_t MutexType;
-#elif defined(HAVE_PTHREAD)
-# include <pthread.h>
-  typedef pthread_mutex_t MutexType;
-#else
-# error Need to implement mutex.h for your architecture, or #define NO_THREADS
-#endif
-
-#include <assert.h>
-#include <stdlib.h>      // for abort()
-
-#define MUTEX_NAMESPACE perftools_mutex_namespace
-
-namespace MUTEX_NAMESPACE {
-
-class Mutex {
- public:
-  // This is used for the single-arg constructor
-  enum LinkerInitialized { LINKER_INITIALIZED };
-
-  // Create a Mutex that is not held by anybody.  This constructor is
-  // typically used for Mutexes allocated on the heap or the stack.
-  inline Mutex();
-  // This constructor should be used for global, static Mutex objects.
-  // It inhibits work being done by the destructor, which makes it
-  // safer for code that tries to acqiure this mutex in their global
-  // destructor.
-  inline Mutex(LinkerInitialized);
-
-  // Destructor
-  inline ~Mutex();
-
-  inline void Lock();    // Block if needed until free then acquire exclusively
-  inline void Unlock();  // Release a lock acquired via Lock()
-  inline bool TryLock(); // If free, Lock() and return true, else return false
-  // Note that on systems that don't support read-write locks, these may
-  // be implemented as synonyms to Lock() and Unlock().  So you can use
-  // these for efficiency, but don't use them anyplace where being able
-  // to do shared reads is necessary to avoid deadlock.
-  inline void ReaderLock();   // Block until free or shared then acquire a share
-  inline void ReaderUnlock(); // Release a read share of this Mutex
-  inline void WriterLock() { Lock(); }     // Acquire an exclusive lock
-  inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
-
- private:
-  MutexType mutex_;
-  // We want to make sure that the compiler sets is_safe_ to true only
-  // when we tell it to, and never makes assumptions is_safe_ is
-  // always true.  volatile is the most reliable way to do that.
-  volatile bool is_safe_;
-  // This indicates which constructor was called.
-  bool destroy_;
-
-  inline void SetIsSafe() { is_safe_ = true; }
-
-  // Catch the error of writing Mutex when intending MutexLock.
-  Mutex(Mutex* /*ignored*/) {}
-  // Disallow "evil" constructors
-  Mutex(const Mutex&);
-  void operator=(const Mutex&);
-};
-
-// Now the implementation of Mutex for various systems
-#if defined(NO_THREADS)
-
-// When we don't have threads, we can be either reading or writing,
-// but not both.  We can have lots of readers at once (in no-threads
-// mode, that's most likely to happen in recursive function calls),
-// but only one writer.  We represent this by having mutex_ be -1 when
-// writing and a number > 0 when reading (and 0 when no lock is held).
-//
-// In debug mode, we assert these invariants, while in non-debug mode
-// we do nothing, for efficiency.  That's why everything is in an
-// assert.
-
-Mutex::Mutex() : mutex_(0) { }
-Mutex::Mutex(Mutex::LinkerInitialized) : mutex_(0) { }
-Mutex::~Mutex()            { assert(mutex_ == 0); }
-void Mutex::Lock()         { assert(--mutex_ == -1); }
-void Mutex::Unlock()       { assert(mutex_++ == -1); }
-bool Mutex::TryLock()      { if (mutex_) return false; Lock(); return true; }
-void Mutex::ReaderLock()   { assert(++mutex_ > 0); }
-void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
-
-#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-
-Mutex::Mutex() : destroy_(true) {
-  InitializeCriticalSection(&mutex_);
-  SetIsSafe();
-}
-Mutex::Mutex(LinkerInitialized) : destroy_(false) {
-  InitializeCriticalSection(&mutex_);
-  SetIsSafe();
-}
-Mutex::~Mutex()            { if (destroy_) DeleteCriticalSection(&mutex_); }
-void Mutex::Lock()         { if (is_safe_) EnterCriticalSection(&mutex_); }
-void Mutex::Unlock()       { if (is_safe_) LeaveCriticalSection(&mutex_); }
-bool Mutex::TryLock()      { return is_safe_ ?
-                                 TryEnterCriticalSection(&mutex_) != 0 : true; }
-void Mutex::ReaderLock()   { Lock(); }      // we don't have read-write locks
-void Mutex::ReaderUnlock() { Unlock(); }
-
-#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
-
-#define SAFE_PTHREAD(fncall)  do {   /* run fncall if is_safe_ is true */  \
-  if (is_safe_ && fncall(&mutex_) != 0) abort();                           \
-} while (0)
-
-Mutex::Mutex() : destroy_(true) {
-  SetIsSafe();
-  if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort();
-}
-Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) {
-  SetIsSafe();
-  if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort();
-}
-Mutex::~Mutex()       { if (destroy_) SAFE_PTHREAD(pthread_rwlock_destroy); }
-void Mutex::Lock()         { SAFE_PTHREAD(pthread_rwlock_wrlock); }
-void Mutex::Unlock()       { SAFE_PTHREAD(pthread_rwlock_unlock); }
-bool Mutex::TryLock()      { return is_safe_ ?
-                               pthread_rwlock_trywrlock(&mutex_) == 0 : true; }
-void Mutex::ReaderLock()   { SAFE_PTHREAD(pthread_rwlock_rdlock); }
-void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); }
-#undef SAFE_PTHREAD
-
-#elif defined(HAVE_PTHREAD)
-
-#define SAFE_PTHREAD(fncall)  do {   /* run fncall if is_safe_ is true */  \
-  if (is_safe_ && fncall(&mutex_) != 0) abort();                           \
-} while (0)
-
-Mutex::Mutex() : destroy_(true) {
-  SetIsSafe();
-  if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort();
-}
-Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) {
-  SetIsSafe();
-  if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort();
-}
-Mutex::~Mutex()       { if (destroy_) SAFE_PTHREAD(pthread_mutex_destroy); }
-void Mutex::Lock()         { SAFE_PTHREAD(pthread_mutex_lock); }
-void Mutex::Unlock()       { SAFE_PTHREAD(pthread_mutex_unlock); }
-bool Mutex::TryLock()      { return is_safe_ ?
-                                 pthread_mutex_trylock(&mutex_) == 0 : true; }
-void Mutex::ReaderLock()   { Lock(); }
-void Mutex::ReaderUnlock() { Unlock(); }
-#undef SAFE_PTHREAD
-
-#endif
-
-// --------------------------------------------------------------------------
-// Some helper classes
-
-// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
-class MutexLock {
- public:
-  explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
-  ~MutexLock() { mu_->Unlock(); }
- private:
-  Mutex * const mu_;
-  // Disallow "evil" constructors
-  MutexLock(const MutexLock&);
-  void operator=(const MutexLock&);
-};
-
-// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
-class ReaderMutexLock {
- public:
-  explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
-  ~ReaderMutexLock() { mu_->ReaderUnlock(); }
- private:
-  Mutex * const mu_;
-  // Disallow "evil" constructors
-  ReaderMutexLock(const ReaderMutexLock&);
-  void operator=(const ReaderMutexLock&);
-};
-
-class WriterMutexLock {
- public:
-  explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
-  ~WriterMutexLock() { mu_->WriterUnlock(); }
- private:
-  Mutex * const mu_;
-  // Disallow "evil" constructors
-  WriterMutexLock(const WriterMutexLock&);
-  void operator=(const WriterMutexLock&);
-};
-
-// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
-#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name)
-#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
-#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
-
-}  // namespace MUTEX_NAMESPACE
-
-using namespace MUTEX_NAMESPACE;
-
-#undef MUTEX_NAMESPACE
-
-#endif  /* #define GOOGLE_SIMPLE_MUTEX_H_ */
diff --git a/contrib/libtcmalloc/src/base/spinlock.cc b/contrib/libtcmalloc/src/base/spinlock.cc
deleted file mode 100644
index 48bb163d1de..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-#include "../config.h"
-#include "base/spinlock.h"
-#include "base/spinlock_internal.h"
-#include "base/sysinfo.h"   /* for GetSystemCPUsCount() */
-
-// NOTE on the Lock-state values:
-//
-// kSpinLockFree represents the unlocked state
-// kSpinLockHeld represents the locked state with no waiters
-// kSpinLockSleeper represents the locked state with waiters
-
-static int adaptive_spin_count = 0;
-
-const base::LinkerInitialized SpinLock::LINKER_INITIALIZED =
-    base::LINKER_INITIALIZED;
-
-namespace {
-struct SpinLock_InitHelper {
-  SpinLock_InitHelper() {
-    // On multi-cpu machines, spin for longer before yielding
-    // the processor or sleeping.  Reduces idle time significantly.
-    if (GetSystemCPUsCount() > 1) {
-      adaptive_spin_count = 1000;
-    }
-  }
-};
-
-// Hook into global constructor execution:
-// We do not do adaptive spinning before that,
-// but nothing lock-intensive should be going on at that time.
-static SpinLock_InitHelper init_helper;
-
-inline void SpinlockPause(void) {
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-  __asm__ __volatile__("rep; nop" : : );
-#endif
-}
-
-}  // unnamed namespace
-
-// Monitor the lock to see if its value changes within some time
-// period (adaptive_spin_count loop iterations). The last value read
-// from the lock is returned from the method.
-Atomic32 SpinLock::SpinLoop() {
-  int c = adaptive_spin_count;
-  while (base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree && --c > 0) {
-    SpinlockPause();
-  }
-  return base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
-                                              kSpinLockSleeper);
-}
-
-void SpinLock::SlowLock() {
-  Atomic32 lock_value = SpinLoop();
-
-  int lock_wait_call_count = 0;
-  while (lock_value != kSpinLockFree) {
-    // If the lock is currently held, but not marked as having a sleeper, mark
-    // it as having a sleeper.
-    if (lock_value == kSpinLockHeld) {
-      // Here, just "mark" that the thread is going to sleep.  Don't store the
-      // lock wait time in the lock as that will cause the current lock
-      // owner to think it experienced contention.
-      lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_,
-                                                        kSpinLockHeld,
-                                                        kSpinLockSleeper);
-      if (lock_value == kSpinLockHeld) {
-        // Successfully transitioned to kSpinLockSleeper.  Pass
-        // kSpinLockSleeper to the SpinLockDelay routine to properly indicate
-        // the last lock_value observed.
-        lock_value = kSpinLockSleeper;
-      } else if (lock_value == kSpinLockFree) {
-        // Lock is free again, so try and acquire it before sleeping.  The
-        // new lock state will be the number of cycles this thread waited if
-        // this thread obtains the lock.
-        lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_,
-                                                          kSpinLockFree,
-                                                          kSpinLockSleeper);
-        continue;  // skip the delay at the end of the loop
-      }
-    }
-
-    // Wait for an OS specific delay.
-    base::internal::SpinLockDelay(&lockword_, lock_value,
-                                  ++lock_wait_call_count);
-    // Spin again after returning from the wait routine to give this thread
-    // some chance of obtaining the lock.
-    lock_value = SpinLoop();
-  }
-}
-
-void SpinLock::SlowUnlock() {
-  // wake waiter if necessary
-  base::internal::SpinLockWake(&lockword_, false);
-}
diff --git a/contrib/libtcmalloc/src/base/spinlock.h b/contrib/libtcmalloc/src/base/spinlock.h
deleted file mode 100644
index 42a4eb906a0..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-// SpinLock is async signal safe.
-// If used within a signal handler, all lock holders
-// should block the signal even outside the signal handler.
-
-#ifndef BASE_SPINLOCK_H_
-#define BASE_SPINLOCK_H_
-
-#include "../config.h"
-#include "base/atomicops.h"
-#include "base/basictypes.h"
-#include "base/dynamic_annotations.h"
-#include "base/thread_annotations.h"
-
-class LOCKABLE SpinLock {
- public:
-  SpinLock() : lockword_(kSpinLockFree) { }
-
-  // Special constructor for use with static SpinLock objects.  E.g.,
-  //
-  //    static SpinLock lock(base::LINKER_INITIALIZED);
-  //
-  // When intialized using this constructor, we depend on the fact
-  // that the linker has already initialized the memory appropriately.
-  // A SpinLock constructed like this can be freely used from global
-  // initializers without worrying about the order in which global
-  // initializers run.
-  explicit SpinLock(base::LinkerInitialized /*x*/) {
-    // Does nothing; lockword_ is already initialized
-  }
-
-  // Acquire this SpinLock.
-  // TODO(csilvers): uncomment the annotation when we figure out how to
-  //                 support this macro with 0 args (see thread_annotations.h)
-  inline void Lock() /*EXCLUSIVE_LOCK_FUNCTION()*/ {
-    if (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
-                                             kSpinLockHeld) != kSpinLockFree) {
-      SlowLock();
-    }
-    ANNOTATE_RWLOCK_ACQUIRED(this, 1);
-  }
-
-  // Try to acquire this SpinLock without blocking and return true if the
-  // acquisition was successful.  If the lock was not acquired, false is
-  // returned.  If this SpinLock is free at the time of the call, TryLock
-  // will return true with high probability.
-  inline bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) {
-    bool res =
-        (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
-                                              kSpinLockHeld) == kSpinLockFree);
-    if (res) {
-      ANNOTATE_RWLOCK_ACQUIRED(this, 1);
-    }
-    return res;
-  }
-
-  // Release this SpinLock, which must be held by the calling thread.
-  // TODO(csilvers): uncomment the annotation when we figure out how to
-  //                 support this macro with 0 args (see thread_annotations.h)
-  inline void Unlock() /*UNLOCK_FUNCTION()*/ {
-    ANNOTATE_RWLOCK_RELEASED(this, 1);
-    uint64 prev_value = static_cast<uint64>(
-        base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree));
-    if (prev_value != kSpinLockHeld) {
-      // Speed the wakeup of any waiter.
-      SlowUnlock();
-    }
-  }
-
-  // Determine if the lock is held.  When the lock is held by the invoking
-  // thread, true will always be returned. Intended to be used as
-  // CHECK(lock.IsHeld()).
-  inline bool IsHeld() const {
-    return base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree;
-  }
-
-  static const base::LinkerInitialized LINKER_INITIALIZED;  // backwards compat
- private:
-  enum { kSpinLockFree = 0 };
-  enum { kSpinLockHeld = 1 };
-  enum { kSpinLockSleeper = 2 };
-
-  volatile Atomic32 lockword_;
-
-  void SlowLock();
-  void SlowUnlock();
-  Atomic32 SpinLoop();
-
-  DISALLOW_COPY_AND_ASSIGN(SpinLock);
-};
-
-// Corresponding locker object that arranges to acquire a spinlock for
-// the duration of a C++ scope.
-class SCOPED_LOCKABLE SpinLockHolder {
- private:
-  SpinLock* lock_;
- public:
-  inline explicit SpinLockHolder(SpinLock* l) EXCLUSIVE_LOCK_FUNCTION(l)
-      : lock_(l) {
-    l->Lock();
-  }
-  // TODO(csilvers): uncomment the annotation when we figure out how to
-  //                 support this macro with 0 args (see thread_annotations.h)
-  inline ~SpinLockHolder() /*UNLOCK_FUNCTION()*/ { lock_->Unlock(); }
-};
-// Catch bug where variable name is omitted, e.g. SpinLockHolder (&lock);
-#define SpinLockHolder(x) COMPILE_ASSERT(0, spin_lock_decl_missing_var_name)
-
-
-#endif  // BASE_SPINLOCK_H_
diff --git a/contrib/libtcmalloc/src/base/spinlock_internal.cc b/contrib/libtcmalloc/src/base/spinlock_internal.cc
deleted file mode 100644
index d9629717be1..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_internal.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// The OS-specific header included below must provide two calls:
-// base::internal::SpinLockDelay() and base::internal::SpinLockWake().
-// See spinlock_internal.h for the spec of SpinLockWake().
-
-// void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop)
-// SpinLockDelay() generates an apprproate spin delay on iteration "loop" of a
-// spin loop on location *w, whose previously observed value was "value".
-// SpinLockDelay() may do nothing, may yield the CPU, may sleep a clock tick,
-// or may wait for a delay that can be truncated by a call to SpinlockWake(w).
-// In all cases, it must return in bounded time even if SpinlockWake() is not
-// called.
-
-#include "base/spinlock_internal.h"
-
-// forward declaration for use by spinlock_*-inl.h
-namespace base { namespace internal { static int SuggestedDelayNS(int loop); }}
-
-#if defined(_WIN32)
-#include "base/spinlock_win32-inl.h"
-#elif defined(__linux__)
-#include "base/spinlock_linux-inl.h"
-#else
-#include "base/spinlock_posix-inl.h"
-#endif
-
-namespace base {
-namespace internal {
-
-// Return a suggested delay in nanoseconds for iteration number "loop"
-static int SuggestedDelayNS(int loop) {
-  // Weak pseudo-random number generator to get some spread between threads
-  // when many are spinning.
-#ifdef BASE_HAS_ATOMIC64
-  static base::subtle::Atomic64 rand;
-  uint64 r = base::subtle::NoBarrier_Load(&rand);
-  r = 0x5deece66dLL * r + 0xb;   // numbers from nrand48()
-  base::subtle::NoBarrier_Store(&rand, r);
-
-  r <<= 16;   // 48-bit random number now in top 48-bits.
-  if (loop < 0 || loop > 32) {   // limit loop to 0..32
-    loop = 32;
-  }
-  // loop>>3 cannot exceed 4 because loop cannot exceed 32.
-  // Select top 20..24 bits of lower 48 bits,
-  // giving approximately 0ms to 16ms.
-  // Mean is exponential in loop for first 32 iterations, then 8ms.
-  // The futex path multiplies this by 16, since we expect explicit wakeups
-  // almost always on that path.
-  return r >> (44 - (loop >> 3));
-#else
-  static Atomic32 rand;
-  uint32 r = base::subtle::NoBarrier_Load(&rand);
-  r = 0x343fd * r + 0x269ec3;   // numbers from MSVC++
-  base::subtle::NoBarrier_Store(&rand, r);
-
-  r <<= 1;   // 31-bit random number now in top 31-bits.
-  if (loop < 0 || loop > 32) {   // limit loop to 0..32
-    loop = 32;
-  }
-  // loop>>3 cannot exceed 4 because loop cannot exceed 32.
-  // Select top 20..24 bits of lower 31 bits,
-  // giving approximately 0ms to 16ms.
-  // Mean is exponential in loop for first 32 iterations, then 8ms.
-  // The futex path multiplies this by 16, since we expect explicit wakeups
-  // almost always on that path.
-  return r >> (12 - (loop >> 3));
-#endif
-}
-
-} // namespace internal
-} // namespace base
diff --git a/contrib/libtcmalloc/src/base/spinlock_internal.h b/contrib/libtcmalloc/src/base/spinlock_internal.h
deleted file mode 100644
index 636885cd6e5..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_internal.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This file is an internal part spinlock.cc and once.cc
- * It may not be used directly by code outside of //base.
- */
-
-#ifndef BASE_SPINLOCK_INTERNAL_H_
-#define BASE_SPINLOCK_INTERNAL_H_
-
-#include "../config.h"
-#include "base/basictypes.h"
-#include "base/atomicops.h"
-
-namespace base {
-namespace internal {
-
-void SpinLockWake(volatile Atomic32 *w, bool all);
-void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop);
-
-} // namespace internal
-} // namespace base
-#endif
diff --git a/contrib/libtcmalloc/src/base/spinlock_linux-inl.h b/contrib/libtcmalloc/src/base/spinlock_linux-inl.h
deleted file mode 100644
index aadf62a4b67..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_linux-inl.h
+++ /dev/null
@@ -1,101 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2009, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This file is a Linux-specific part of spinlock_internal.cc
- */
-
-#include <errno.h>
-#include <sched.h>
-#include <time.h>
-#include <limits.h>
-#include "base/linux_syscall_support.h"
-
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
-#define FUTEX_PRIVATE_FLAG 128
-
-static bool have_futex;
-static int futex_private_flag = FUTEX_PRIVATE_FLAG;
-
-namespace {
-static struct InitModule {
-  InitModule() {
-    int x = 0;
-    // futexes are ints, so we can use them only when
-    // that's the same size as the lockword_ in SpinLock.
-    have_futex = (sizeof (Atomic32) == sizeof (int) &&
-                  sys_futex(&x, FUTEX_WAKE, 1, NULL, NULL, 0) >= 0);
-    if (have_futex &&
-        sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, NULL, NULL, 0) < 0) {
-      futex_private_flag = 0;
-    }
-  }
-} init_module;
-
-}  // anonymous namespace
-
-
-namespace base {
-namespace internal {
-
-void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
-  if (loop != 0) {
-    int save_errno = errno;
-    struct timespec tm;
-    tm.tv_sec = 0;
-    if (have_futex) {
-      tm.tv_nsec = base::internal::SuggestedDelayNS(loop);
-    } else {
-      tm.tv_nsec = 2000001;   // above 2ms so linux 2.4 doesn't spin
-    }
-    if (have_futex) {
-      tm.tv_nsec *= 16;  // increase the delay; we expect explicit wakeups
-      sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
-                FUTEX_WAIT | futex_private_flag,
-                value, reinterpret_cast<struct kernel_timespec *>(&tm),
-                NULL, 0);
-    } else {
-      nanosleep(&tm, NULL);
-    }
-    errno = save_errno;
-  }
-}
-
-void SpinLockWake(volatile Atomic32 *w, bool all) {
-  if (have_futex) {
-    sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
-              FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1,
-              NULL, NULL, 0);
-  }
-}
-
-} // namespace internal
-} // namespace base
diff --git a/contrib/libtcmalloc/src/base/spinlock_posix-inl.h b/contrib/libtcmalloc/src/base/spinlock_posix-inl.h
deleted file mode 100644
index 2695b7b1bb9..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_posix-inl.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2009, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This file is a Posix-specific part of spinlock_internal.cc
- */
-
-#include "../config.h"
-#include <errno.h>
-#ifdef HAVE_SCHED_H
-#include <sched.h>      /* For sched_yield() */
-#endif
-#include <time.h>       /* For nanosleep() */
-
-namespace base {
-namespace internal {
-
-void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
-  int save_errno = errno;
-  if (loop == 0) {
-  } else if (loop == 1) {
-    sched_yield();
-  } else {
-    struct timespec tm;
-    tm.tv_sec = 0;
-    tm.tv_nsec = base::internal::SuggestedDelayNS(loop);
-    nanosleep(&tm, NULL);
-  }
-  errno = save_errno;
-}
-
-void SpinLockWake(volatile Atomic32 *w, bool all) {
-}
-
-} // namespace internal
-} // namespace base
diff --git a/contrib/libtcmalloc/src/base/spinlock_win32-inl.h b/contrib/libtcmalloc/src/base/spinlock_win32-inl.h
deleted file mode 100644
index 956b9653e6d..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_win32-inl.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2009, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This file is a Win32-specific part of spinlock_internal.cc
- */
-
-
-#include <windows.h>
-
-namespace base {
-namespace internal {
-
-void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
-  if (loop == 0) {
-  } else if (loop == 1) {
-    Sleep(0);
-  } else {
-    Sleep(base::internal::SuggestedDelayNS(loop) / 1000000);
-  }
-}
-
-void SpinLockWake(volatile Atomic32 *w, bool all) {
-}
-
-} // namespace internal
-} // namespace base
diff --git a/contrib/libtcmalloc/src/base/stl_allocator.h b/contrib/libtcmalloc/src/base/stl_allocator.h
deleted file mode 100644
index 4520713622f..00000000000
--- a/contrib/libtcmalloc/src/base/stl_allocator.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Maxim Lifantsev
- */
-
-
-#ifndef BASE_STL_ALLOCATOR_H_
-#define BASE_STL_ALLOCATOR_H_
-
-#include "../config.h"
-
-#include <stddef.h>   // for ptrdiff_t
-#include <limits>
-
-#include "base/logging.h"
-
-// Generic allocator class for STL objects
-// that uses a given type-less allocator Alloc, which must provide:
-//   static void* Alloc::Allocate(size_t size);
-//   static void Alloc::Free(void* ptr, size_t size);
-//
-// STL_Allocator<T, MyAlloc> provides the same thread-safety
-// guarantees as MyAlloc.
-//
-// Usage example:
-//   set<T, less<T>, STL_Allocator<T, MyAlloc> > my_set;
-// CAVEAT: Parts of the code below are probably specific
-//         to the STL version(s) we are using.
-//         The code is simply lifted from what std::allocator<> provides.
-template <typename T, class Alloc>
-class STL_Allocator {
- public:
-  typedef size_t     size_type;
-  typedef ptrdiff_t  difference_type;
-  typedef T*         pointer;
-  typedef const T*   const_pointer;
-  typedef T&         reference;
-  typedef const T&   const_reference;
-  typedef T          value_type;
-
-  template <class T1> struct rebind {
-    typedef STL_Allocator<T1, Alloc> other;
-  };
-
-  STL_Allocator() { }
-  STL_Allocator(const STL_Allocator&) { }
-  template <class T1> STL_Allocator(const STL_Allocator<T1, Alloc>&) { }
-  ~STL_Allocator() { }
-
-  pointer address(reference x) const { return &x; }
-  const_pointer address(const_reference x) const { return &x; }
-
-  pointer allocate(size_type n, const void* = 0) {
-    RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate");
-    return static_cast<T*>(Alloc::Allocate(n * sizeof(T)));
-  }
-  void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); }
-
-  size_type max_size() const { return size_t(-1) / sizeof(T); }
-
-  void construct(pointer p, const T& val) { ::new(p) T(val); }
-  void construct(pointer p) { ::new(p) T(); }
-  void destroy(pointer p) { p->~T(); }
-
-  // There's no state, so these allocators are always equal
-  bool operator==(const STL_Allocator&) const { return true; }
-};
-
-#endif  // BASE_STL_ALLOCATOR_H_
diff --git a/contrib/libtcmalloc/src/base/sysinfo.cc b/contrib/libtcmalloc/src/base/sysinfo.cc
deleted file mode 100644
index 75217e6795a..00000000000
--- a/contrib/libtcmalloc/src/base/sysinfo.cc
+++ /dev/null
@@ -1,860 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2006, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "../config.h"
-#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
-# define PLATFORM_WINDOWS 1
-#endif
-
-#include <ctype.h>    // for isspace()
-#include <stdlib.h>   // for getenv()
-#include <stdio.h>    // for snprintf(), sscanf()
-#include <string.h>   // for memmove(), memchr(), etc.
-#include <fcntl.h>    // for open()
-#include <errno.h>    // for errno
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>   // for read()
-#endif
-#if defined __MACH__          // Mac OS X, almost certainly
-#include <mach-o/dyld.h>      // for iterating over dll's in ProcMapsIter
-#include <mach-o/loader.h>    // for iterating over dll's in ProcMapsIter
-#include <sys/types.h>
-#include <sys/sysctl.h>       // how we figure out numcpu's on OS X
-#elif defined __FreeBSD__
-#include <sys/sysctl.h>
-#elif defined __sun__         // Solaris
-#include <procfs.h>           // for, e.g., prmap_t
-#elif defined(PLATFORM_WINDOWS)
-#include <process.h>          // for getpid() (actually, _getpid())
-#include <shlwapi.h>          // for SHGetValueA()
-#include <tlhelp32.h>         // for Module32First()
-#endif
-#include "base/sysinfo.h"
-#include "base/commandlineflags.h"
-#include "base/dynamic_annotations.h"   // for RunningOnValgrind
-#include "base/logging.h"
-
-#ifdef PLATFORM_WINDOWS
-#ifdef MODULEENTRY32
-// In a change from the usual W-A pattern, there is no A variant of
-// MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
-// In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be
-// MODULEENTRY32W.  These #undefs are the only way I see to get back
-// access to the original, ascii struct (and related functions).
-#undef MODULEENTRY32
-#undef Module32First
-#undef Module32Next
-#undef PMODULEENTRY32
-#undef LPMODULEENTRY32
-#endif  /* MODULEENTRY32 */
-// MinGW doesn't seem to define this, perhaps some windowsen don't either.
-#ifndef TH32CS_SNAPMODULE32
-#define TH32CS_SNAPMODULE32  0
-#endif  /* TH32CS_SNAPMODULE32 */
-#endif  /* PLATFORM_WINDOWS */
-
-// Re-run fn until it doesn't cause EINTR.
-#define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
-
-// open/read/close can set errno, which may be illegal at this
-// time, so prefer making the syscalls directly if we can.
-#ifdef HAVE_SYS_SYSCALL_H
-# include <sys/syscall.h>
-#endif
-#ifdef SYS_open   // solaris 11, at least sometimes, only defines SYS_openat
-# define safeopen(filename, mode)  syscall(SYS_open, filename, mode)
-#else
-# define safeopen(filename, mode)  open(filename, mode)
-#endif
-#ifdef SYS_read
-# define saferead(fd, buffer, size)  syscall(SYS_read, fd, buffer, size)
-#else
-# define saferead(fd, buffer, size)  read(fd, buffer, size)
-#endif
-#ifdef SYS_close
-# define safeclose(fd)  syscall(SYS_close, fd)
-#else
-# define safeclose(fd)  close(fd)
-#endif
-
-// ----------------------------------------------------------------------
-// GetenvBeforeMain()
-// GetUniquePathFromEnv()
-//    Some non-trivial getenv-related functions.
-// ----------------------------------------------------------------------
-
-// It's not safe to call getenv() in the malloc hooks, because they
-// might be called extremely early, before libc is done setting up
-// correctly.  In particular, the thread library may not be done
-// setting up errno.  So instead, we use the built-in __environ array
-// if it exists, and otherwise read /proc/self/environ directly, using
-// system calls to read the file, and thus avoid setting errno.
-// /proc/self/environ has a limit of how much data it exports (around
-// 8K), so it's not an ideal solution.
-const char* GetenvBeforeMain(const char* name) {
-#if defined(HAVE___ENVIRON)   // if we have it, it's declared in unistd.h
-  if (__environ) {            // can exist but be NULL, if statically linked
-    const int namelen = strlen(name);
-    for (char** p = __environ; *p; p++) {
-      if (strlen(*p) < namelen) {
-        continue;
-      }
-      if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=')  // it's a match
-        return *p + namelen+1;                                 // point after =
-    }
-    return NULL;
-  }
-#endif
-#if defined(PLATFORM_WINDOWS)
-  // TODO(mbelshe) - repeated calls to this function will overwrite the
-  // contents of the static buffer.
-  static char envvar_buf[1024];  // enough to hold any envvar we care about
-  if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1))
-    return NULL;
-  return envvar_buf;
-#endif
-  // static is ok because this function should only be called before
-  // main(), when we're single-threaded.
-  static char envbuf[16<<10];
-  if (*envbuf == '\0') {    // haven't read the environ yet
-    int fd = safeopen("/proc/self/environ", O_RDONLY);
-    // The -2 below guarantees the last two bytes of the buffer will be \0\0
-    if (fd == -1 ||           // unable to open the file, fall back onto libc
-        saferead(fd, envbuf, sizeof(envbuf) - 2) < 0) { // error reading file
-      RAW_VLOG(1, "Unable to open /proc/self/environ, falling back "
-               "on getenv(\"%s\"), which may not work", name);
-      if (fd != -1) safeclose(fd);
-      return getenv(name);
-    }
-    safeclose(fd);
-  }
-  const int namelen = strlen(name);
-  const char* p = envbuf;
-  while (*p != '\0') {    // will happen at the \0\0 that terminates the buffer
-    // proc file has the format NAME=value\0NAME=value\0NAME=value\0...
-    const char* endp = (char*)memchr(p, '\0', sizeof(envbuf) - (p - envbuf));
-    if (endp == NULL)            // this entry isn't NUL terminated
-      return NULL;
-    else if (!memcmp(p, name, namelen) && p[namelen] == '=')    // it's a match
-      return p + namelen+1;      // point after =
-    p = endp + 1;
-  }
-  return NULL;                   // env var never found
-}
-
-extern "C" {
-  const char* TCMallocGetenvSafe(const char* name) {
-    return GetenvBeforeMain(name);
-  }
-}
-
-// This takes as an argument an environment-variable name (like
-// CPUPROFILE) whose value is supposed to be a file-path, and sets
-// path to that path, and returns true.  If the env var doesn't exist,
-// or is the empty string, leave path unchanged and returns false.
-// The reason this is non-trivial is that this function handles munged
-// pathnames.  Here's why:
-//
-// If we're a child process of the 'main' process, we can't just use
-// getenv("CPUPROFILE") -- the parent process will be using that path.
-// Instead we append our pid to the pathname.  How do we tell if we're a
-// child process?  Ideally we'd set an environment variable that all
-// our children would inherit.  But -- and this is seemingly a bug in
-// gcc -- if you do a setenv() in a shared libarary in a global
-// constructor, the environment setting is lost by the time main() is
-// called.  The only safe thing we can do in such a situation is to
-// modify the existing envvar.  So we do a hack: in the parent, we set
-// the high bit of the 1st char of CPUPROFILE.  In the child, we
-// notice the high bit is set and append the pid().  This works
-// assuming cpuprofile filenames don't normally have the high bit set
-// in their first character!  If that assumption is violated, we'll
-// still get a profile, but one with an unexpected name.
-// TODO(csilvers): set an envvar instead when we can do it reliably.
-bool GetUniquePathFromEnv(const char* env_name, char* path) {
-  char* envval = getenv(env_name);
-  if (envval == NULL || *envval == '\0')
-    return false;
-  if (envval[0] & 128) {                  // high bit is set
-    snprintf(path, PATH_MAX, "%c%s_%u",   // add pid and clear high bit
-             envval[0] & 127, envval+1, (unsigned int)(getpid()));
-  } else {
-    snprintf(path, PATH_MAX, "%s", envval);
-    envval[0] |= 128;                     // set high bit for kids to see
-  }
-  return true;
-}
-
-void SleepForMilliseconds(int milliseconds) {
-#ifdef PLATFORM_WINDOWS
-  _sleep(milliseconds);   // Windows's _sleep takes milliseconds argument
-#else
-  // Sleep for a few milliseconds
-  struct timespec sleep_time;
-  sleep_time.tv_sec = milliseconds / 1000;
-  sleep_time.tv_nsec = (milliseconds % 1000) * 1000000;
-  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
-    ;  // Ignore signals and wait for the full interval to elapse.
-#endif
-}
-
-int GetSystemCPUsCount()
-{
-#if defined(PLATFORM_WINDOWS)
-  // Get the number of processors.
-  SYSTEM_INFO info;
-  GetSystemInfo(&info);
-  return  info.dwNumberOfProcessors;
-#else
-  long rv = sysconf(_SC_NPROCESSORS_ONLN);
-  if (rv < 0) {
-    return 1;
-  }
-  return static_cast<int>(rv);
-#endif
-}
-
-// ----------------------------------------------------------------------
-
-#if defined __linux__ || defined __FreeBSD__ || defined __sun__ || defined __CYGWIN__ || defined __CYGWIN32__
-static void ConstructFilename(const char* spec, pid_t pid,
-                              char* buf, int buf_size) {
-  CHECK_LT(snprintf(buf, buf_size,
-                    spec,
-                    static_cast<int>(pid ? pid : getpid())), buf_size);
-}
-#endif
-
-// A templatized helper function instantiated for Mach (OS X) only.
-// It can handle finding info for both 32 bits and 64 bits.
-// Returns true if it successfully handled the hdr, false else.
-#ifdef __MACH__          // Mac OS X, almost certainly
-template<uint32_t kMagic, uint32_t kLCSegment,
-         typename MachHeader, typename SegmentCommand>
-static bool NextExtMachHelper(const mach_header* hdr,
-                              int current_image, int current_load_cmd,
-                              uint64 *start, uint64 *end, char **flags,
-                              uint64 *offset, int64 *inode, char **filename,
-                              uint64 *file_mapping, uint64 *file_pages,
-                              uint64 *anon_mapping, uint64 *anon_pages,
-                              dev_t *dev) {
-  static char kDefaultPerms[5] = "r-xp";
-  if (hdr->magic != kMagic)
-    return false;
-  const char* lc = (const char *)hdr + sizeof(MachHeader);
-  // TODO(csilvers): make this not-quadradic (increment and hold state)
-  for (int j = 0; j < current_load_cmd; j++)  // advance to *our* load_cmd
-    lc += ((const load_command *)lc)->cmdsize;
-  if (((const load_command *)lc)->cmd == kLCSegment) {
-    const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image);
-    const SegmentCommand* sc = (const SegmentCommand *)lc;
-    if (start) *start = sc->vmaddr + dlloff;
-    if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
-    if (flags) *flags = kDefaultPerms;  // can we do better?
-    if (offset) *offset = sc->fileoff;
-    if (inode) *inode = 0;
-    if (filename)
-      *filename = const_cast<char*>(_dyld_get_image_name(current_image));
-    if (file_mapping) *file_mapping = 0;
-    if (file_pages) *file_pages = 0;   // could we use sc->filesize?
-    if (anon_mapping) *anon_mapping = 0;
-    if (anon_pages) *anon_pages = 0;
-    if (dev) *dev = 0;
-    return true;
-  }
-
-  return false;
-}
-#endif
-
-// Finds |c| in |text|, and assign '\0' at the found position.
-// The original character at the modified position should be |c|.
-// A pointer to the modified position is stored in |endptr|.
-// |endptr| should not be NULL.
-static bool ExtractUntilChar(char *text, int c, char **endptr) {
-  CHECK_NE(text, NULL);
-  CHECK_NE(endptr, NULL);
-  char *found;
-  found = strchr(text, c);
-  if (found == NULL) {
-    *endptr = NULL;
-    return false;
-  }
-
-  *endptr = found;
-  *found = '\0';
-  return true;
-}
-
-// Increments |*text_pointer| while it points a whitespace character.
-// It is to follow sscanf's whilespace handling.
-static void SkipWhileWhitespace(char **text_pointer, int c) {
-  if (isspace(c)) {
-    while (isspace(**text_pointer) && isspace(*((*text_pointer) + 1))) {
-      ++(*text_pointer);
-    }
-  }
-}
-
-template<class T>
-static T StringToInteger(char *text, char **endptr, int base) {
-  assert(false);
-  return T();
-}
-
-template<>
-int StringToInteger<int>(char *text, char **endptr, int base) {
-  return strtol(text, endptr, base);
-}
-
-template<>
-int64 StringToInteger<int64>(char *text, char **endptr, int base) {
-  return strtoll(text, endptr, base);
-}
-
-template<>
-uint64 StringToInteger<uint64>(char *text, char **endptr, int base) {
-  return strtoull(text, endptr, base);
-}
-
-template<typename T>
-static T StringToIntegerUntilChar(
-    char *text, int base, int c, char **endptr_result) {
-  CHECK_NE(endptr_result, NULL);
-  *endptr_result = NULL;
-
-  char *endptr_extract;
-  if (!ExtractUntilChar(text, c, &endptr_extract))
-    return 0;
-
-  T result;
-  char *endptr_strto;
-  result = StringToInteger<T>(text, &endptr_strto, base);
-  *endptr_extract = c;
-
-  if (endptr_extract != endptr_strto)
-    return 0;
-
-  *endptr_result = endptr_extract;
-  SkipWhileWhitespace(endptr_result, c);
-
-  return result;
-}
-
-static char *CopyStringUntilChar(
-    char *text, unsigned out_len, int c, char *out) {
-  char *endptr;
-  if (!ExtractUntilChar(text, c, &endptr))
-    return NULL;
-
-  strncpy(out, text, out_len);
-  out[out_len-1] = '\0';
-  *endptr = c;
-
-  SkipWhileWhitespace(&endptr, c);
-  return endptr;
-}
-
-template<typename T>
-static bool StringToIntegerUntilCharWithCheck(
-    T *outptr, char *text, int base, int c, char **endptr) {
-  *outptr = StringToIntegerUntilChar<T>(*endptr, base, c, endptr);
-  if (*endptr == NULL || **endptr == '\0') return false;
-  ++(*endptr);
-  return true;
-}
-
-static bool ParseProcMapsLine(char *text, uint64 *start, uint64 *end,
-                              char *flags, uint64 *offset,
-                              int *major, int *minor, int64 *inode,
-                              unsigned *filename_offset) {
-#if defined(__linux__)
-  /*
-   * It's similar to:
-   * sscanf(text, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
-   *        start, end, flags, offset, major, minor, inode, filename_offset)
-   */
-  char *endptr = text;
-  if (endptr == NULL || *endptr == '\0')  return false;
-
-  if (!StringToIntegerUntilCharWithCheck(start, endptr, 16, '-', &endptr))
-    return false;
-
-  if (!StringToIntegerUntilCharWithCheck(end, endptr, 16, ' ', &endptr))
-    return false;
-
-  endptr = CopyStringUntilChar(endptr, 5, ' ', flags);
-  if (endptr == NULL || *endptr == '\0')  return false;
-  ++endptr;
-
-  if (!StringToIntegerUntilCharWithCheck(offset, endptr, 16, ' ', &endptr))
-    return false;
-
-  if (!StringToIntegerUntilCharWithCheck(major, endptr, 16, ':', &endptr))
-    return false;
-
-  if (!StringToIntegerUntilCharWithCheck(minor, endptr, 16, ' ', &endptr))
-    return false;
-
-  if (!StringToIntegerUntilCharWithCheck(inode, endptr, 10, ' ', &endptr))
-    return false;
-
-  *filename_offset = (endptr - text);
-  return true;
-#else
-  return false;
-#endif
-}
-
-ProcMapsIterator::ProcMapsIterator(pid_t pid) {
-  Init(pid, NULL, false);
-}
-
-ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer) {
-  Init(pid, buffer, false);
-}
-
-ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer,
-                                   bool use_maps_backing) {
-  Init(pid, buffer, use_maps_backing);
-}
-
-void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
-                            bool use_maps_backing) {
-  pid_ = pid;
-  using_maps_backing_ = use_maps_backing;
-  dynamic_buffer_ = NULL;
-  if (!buffer) {
-    // If the user didn't pass in any buffer storage, allocate it
-    // now. This is the normal case; the signal handler passes in a
-    // static buffer.
-    buffer = dynamic_buffer_ = new Buffer;
-  } else {
-    dynamic_buffer_ = NULL;
-  }
-
-  ibuf_ = buffer->buf_;
-
-  stext_ = etext_ = nextline_ = ibuf_;
-  ebuf_ = ibuf_ + Buffer::kBufSize - 1;
-  nextline_ = ibuf_;
-
-#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-  if (use_maps_backing) {  // don't bother with clever "self" stuff in this case
-    ConstructFilename("/proc/%d/maps_backing", pid, ibuf_, Buffer::kBufSize);
-  } else if (pid == 0) {
-    // We have to kludge a bit to deal with the args ConstructFilename
-    // expects.  The 1 is never used -- it's only impt. that it's not 0.
-    ConstructFilename("/proc/self/maps", 1, ibuf_, Buffer::kBufSize);
-  } else {
-    ConstructFilename("/proc/%d/maps", pid, ibuf_, Buffer::kBufSize);
-  }
-  // No error logging since this can be called from the crash dump
-  // handler at awkward moments. Users should call Valid() before
-  // using.
-  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
-#elif defined(__FreeBSD__)
-  // We don't support maps_backing on freebsd
-  if (pid == 0) {
-    ConstructFilename("/proc/curproc/map", 1, ibuf_, Buffer::kBufSize);
-  } else {
-    ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
-  }
-  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
-#elif defined(__sun__)
-  if (pid == 0) {
-    ConstructFilename("/proc/self/map", 1, ibuf_, Buffer::kBufSize);
-  } else {
-    ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
-  }
-  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
-#elif defined(__MACH__)
-  current_image_ = _dyld_image_count();   // count down from the top
-  current_load_cmd_ = -1;
-#elif defined(PLATFORM_WINDOWS)
-  snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE |
-                                       TH32CS_SNAPMODULE32,
-                                       GetCurrentProcessId());
-  memset(&module_, 0, sizeof(module_));
-#else
-  fd_ = -1;   // so Valid() is always false
-#endif
-
-}
-
-ProcMapsIterator::~ProcMapsIterator() {
-#if defined(PLATFORM_WINDOWS)
-  if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_);
-#elif defined(__MACH__)
-  // no cleanup necessary!
-#else
-  if (fd_ >= 0) NO_INTR(close(fd_));
-#endif
-  delete dynamic_buffer_;
-}
-
-bool ProcMapsIterator::Valid() const {
-#if defined(PLATFORM_WINDOWS)
-  return snapshot_ != INVALID_HANDLE_VALUE;
-#elif defined(__MACH__)
-  return 1;
-#else
-  return fd_ != -1;
-#endif
-}
-
-bool ProcMapsIterator::Next(uint64 *start, uint64 *end, char **flags,
-                            uint64 *offset, int64 *inode, char **filename) {
-  return NextExt(start, end, flags, offset, inode, filename, NULL, NULL,
-                 NULL, NULL, NULL);
-}
-
-// This has too many arguments.  It should really be building
-// a map object and returning it.  The problem is that this is called
-// when the memory allocator state is undefined, hence the arguments.
-bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
-                               uint64 *offset, int64 *inode, char **filename,
-                               uint64 *file_mapping, uint64 *file_pages,
-                               uint64 *anon_mapping, uint64 *anon_pages,
-                               dev_t *dev) {
-
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-  do {
-    // Advance to the start of the next line
-    stext_ = nextline_;
-
-    // See if we have a complete line in the buffer already
-    nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ - stext_));
-    if (!nextline_) {
-      // Shift/fill the buffer so we do have a line
-      int count = etext_ - stext_;
-
-      // Move the current text to the start of the buffer
-      memmove(ibuf_, stext_, count);
-      stext_ = ibuf_;
-      etext_ = ibuf_ + count;
-
-      int nread = 0;            // fill up buffer with text
-      while (etext_ < ebuf_) {
-        NO_INTR(nread = read(fd_, etext_, ebuf_ - etext_));
-        if (nread > 0)
-          etext_ += nread;
-        else
-          break;
-      }
-
-      // Zero out remaining characters in buffer at EOF to avoid returning
-      // garbage from subsequent calls.
-      if (etext_ != ebuf_ && nread == 0) {
-        memset(etext_, 0, ebuf_ - etext_);
-      }
-      *etext_ = '\n';   // sentinel; safe because ibuf extends 1 char beyond ebuf
-      nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ + 1 - stext_));
-    }
-    *nextline_ = 0;                // turn newline into nul
-    nextline_ += ((nextline_ < etext_)? 1 : 0);  // skip nul if not end of text
-    // stext_ now points at a nul-terminated line
-    uint64 tmpstart, tmpend, tmpoffset;
-    int64 tmpinode;
-    int major, minor;
-    unsigned filename_offset = 0;
-#if defined(__linux__)
-    // for now, assume all linuxes have the same format
-    if (!ParseProcMapsLine(
-        stext_,
-        start ? start : &tmpstart,
-        end ? end : &tmpend,
-        flags_,
-        offset ? offset : &tmpoffset,
-        &major, &minor,
-        inode ? inode : &tmpinode, &filename_offset)) continue;
-#elif defined(__CYGWIN__) || defined(__CYGWIN32__)
-    // cygwin is like linux, except the third field is the "entry point"
-    // rather than the offset (see format_process_maps at
-    // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src
-    // Offset is always be 0 on cygwin: cygwin implements an mmap
-    // by loading the whole file and then calling NtMapViewOfSection.
-    // Cygwin also seems to set its flags kinda randomly; use windows default.
-    char tmpflags[5];
-    if (offset)
-      *offset = 0;
-    strcpy(flags_, "r-xp");
-    if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
-               start ? start : &tmpstart,
-               end ? end : &tmpend,
-               tmpflags,
-               &tmpoffset,
-               &major, &minor,
-               inode ? inode : &tmpinode, &filename_offset) != 7) continue;
-#elif defined(__FreeBSD__)
-    // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup
-    tmpstart = tmpend = tmpoffset = 0;
-    tmpinode = 0;
-    major = minor = 0;   // can't get this info in freebsd
-    if (inode)
-      *inode = 0;        // nor this
-    if (offset)
-      *offset = 0;       // seems like this should be in there, but maybe not
-    // start end resident privateresident obj(?) prot refcnt shadowcnt
-    // flags copy_on_write needs_copy type filename:
-    // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat
-    if (sscanf(stext_, "0x%" SCNx64 " 0x%" SCNx64 " %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n",
-               start ? start : &tmpstart,
-               end ? end : &tmpend,
-               flags_,
-               &filename_offset) != 3) continue;
-#endif
-
-    // Depending on the Linux kernel being used, there may or may not be a space
-    // after the inode if there is no filename.  sscanf will in such situations
-    // nondeterministically either fill in filename_offset or not (the results
-    // differ on multiple calls in the same run even with identical arguments).
-    // We don't want to wander off somewhere beyond the end of the string.
-    size_t stext_length = strlen(stext_);
-    if (filename_offset == 0 || filename_offset > stext_length)
-      filename_offset = stext_length;
-
-    // We found an entry
-    if (flags) *flags = flags_;
-    if (filename) *filename = stext_ + filename_offset;
-    if (dev) *dev = minor | (major << 8);
-
-    if (using_maps_backing_) {
-      // Extract and parse physical page backing info.
-      char *backing_ptr = stext_ + filename_offset +
-          strlen(stext_+filename_offset);
-
-      // find the second '('
-      int paren_count = 0;
-      while (--backing_ptr > stext_) {
-        if (*backing_ptr == '(') {
-          ++paren_count;
-          if (paren_count >= 2) {
-            uint64 tmp_file_mapping;
-            uint64 tmp_file_pages;
-            uint64 tmp_anon_mapping;
-            uint64 tmp_anon_pages;
-
-            sscanf(backing_ptr+1, "F %" SCNx64 " %" SCNd64 ") (A %" SCNx64 " %" SCNd64 ")",
-                   file_mapping ? file_mapping : &tmp_file_mapping,
-                   file_pages ? file_pages : &tmp_file_pages,
-                   anon_mapping ? anon_mapping : &tmp_anon_mapping,
-                   anon_pages ? anon_pages : &tmp_anon_pages);
-            // null terminate the file name (there is a space
-            // before the first (.
-            backing_ptr[-1] = 0;
-            break;
-          }
-        }
-      }
-    }
-
-    return true;
-  } while (etext_ > ibuf_);
-#elif defined(__sun__)
-  // This is based on MA_READ == 4, MA_WRITE == 2, MA_EXEC == 1
-  static char kPerms[8][4] = { "---", "--x", "-w-", "-wx",
-                               "r--", "r-x", "rw-", "rwx" };
-  COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4);
-  COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2);
-  COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1);
-  Buffer object_path;
-  int nread = 0;            // fill up buffer with text
-  NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t)));
-  if (nread == sizeof(prmap_t)) {
-    long inode_from_mapname = 0;
-    prmap_t* mapinfo = reinterpret_cast<prmap_t*>(ibuf_);
-    // Best-effort attempt to get the inode from the filename.  I think the
-    // two middle ints are major and minor device numbers, but I'm not sure.
-    sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname);
-
-    if (pid_ == 0) {
-      CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
-                        "/proc/self/path/%s", mapinfo->pr_mapname),
-               Buffer::kBufSize);
-    } else {
-      CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
-                        "/proc/%d/path/%s",
-                        static_cast<int>(pid_), mapinfo->pr_mapname),
-               Buffer::kBufSize);
-    }
-    ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX);
-    CHECK_LT(len, PATH_MAX);
-    if (len < 0)
-      len = 0;
-    current_filename_[len] = '\0';
-
-    if (start) *start = mapinfo->pr_vaddr;
-    if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size;
-    if (flags) *flags = kPerms[mapinfo->pr_mflags & 7];
-    if (offset) *offset = mapinfo->pr_offset;
-    if (inode) *inode = inode_from_mapname;
-    if (filename) *filename = current_filename_;
-    if (file_mapping) *file_mapping = 0;
-    if (file_pages) *file_pages = 0;
-    if (anon_mapping) *anon_mapping = 0;
-    if (anon_pages) *anon_pages = 0;
-    if (dev) *dev = 0;
-    return true;
-  }
-#elif defined(__MACH__)
-  // We return a separate entry for each segment in the DLL. (TODO(csilvers):
-  // can we do better?)  A DLL ("image") has load-commands, some of which
-  // talk about segment boundaries.
-  // cf image_for_address from http://svn.digium.com/view/asterisk/team/oej/minivoicemail/dlfcn.c?revision=53912
-  for (; current_image_ >= 0; current_image_--) {
-    const mach_header* hdr = _dyld_get_image_header(current_image_);
-    if (!hdr) continue;
-    if (current_load_cmd_ < 0)   // set up for this image
-      current_load_cmd_ = hdr->ncmds;  // again, go from the top down
-
-    // We start with the next load command (we've already looked at this one).
-    for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) {
-#ifdef MH_MAGIC_64
-      if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64,
-                            struct mach_header_64, struct segment_command_64>(
-                                hdr, current_image_, current_load_cmd_,
-                                start, end, flags, offset, inode, filename,
-                                file_mapping, file_pages, anon_mapping,
-                                anon_pages, dev)) {
-        return true;
-      }
-#endif
-      if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT,
-                            struct mach_header, struct segment_command>(
-                                hdr, current_image_, current_load_cmd_,
-                                start, end, flags, offset, inode, filename,
-                                file_mapping, file_pages, anon_mapping,
-                                anon_pages, dev)) {
-        return true;
-      }
-    }
-    // If we get here, no more load_cmd's in this image talk about
-    // segments.  Go on to the next image.
-  }
-#elif defined(PLATFORM_WINDOWS)
-  static char kDefaultPerms[5] = "r-xp";
-  BOOL ok;
-  if (module_.dwSize == 0) {  // only possible before first call
-    module_.dwSize = sizeof(module_);
-    ok = Module32First(snapshot_, &module_);
-  } else {
-    ok = Module32Next(snapshot_, &module_);
-  }
-  if (ok) {
-    uint64 base_addr = reinterpret_cast<DWORD_PTR>(module_.modBaseAddr);
-    if (start) *start = base_addr;
-    if (end) *end = base_addr + module_.modBaseSize;
-    if (flags) *flags = kDefaultPerms;
-    if (offset) *offset = 0;
-    if (inode) *inode = 0;
-    if (filename) *filename = module_.szExePath;
-    if (file_mapping) *file_mapping = 0;
-    if (file_pages) *file_pages = 0;
-    if (anon_mapping) *anon_mapping = 0;
-    if (anon_pages) *anon_pages = 0;
-    if (dev) *dev = 0;
-    return true;
-  }
-#endif
-
-  // We didn't find anything
-  return false;
-}
-
-int ProcMapsIterator::FormatLine(char* buffer, int bufsize,
-                                 uint64 start, uint64 end, const char *flags,
-                                 uint64 offset, int64 inode,
-                                 const char *filename, dev_t dev) {
-  // We assume 'flags' looks like 'rwxp' or 'rwx'.
-  char r = (flags && flags[0] == 'r') ? 'r' : '-';
-  char w = (flags && flags[0] && flags[1] == 'w') ? 'w' : '-';
-  char x = (flags && flags[0] && flags[1] && flags[2] == 'x') ? 'x' : '-';
-  // p always seems set on linux, so we set the default to 'p', not '-'
-  char p = (flags && flags[0] && flags[1] && flags[2] && flags[3] != 'p')
-      ? '-' : 'p';
-
-  const int rc = snprintf(buffer, bufsize,
-                          "%08" PRIx64 "-%08" PRIx64 " %c%c%c%c %08" PRIx64 " %02x:%02x %-11" PRId64 " %s\n",
-                          start, end, r,w,x,p, offset,
-                          static_cast<int>(dev/256), static_cast<int>(dev%256),
-                          inode, filename);
-  return (rc < 0 || rc >= bufsize) ? 0 : rc;
-}
-
-namespace tcmalloc {
-
-// Helper to add the list of mapped shared libraries to a profile.
-// Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size'
-// and return the actual size occupied in 'buf'.  We fill wrote_all to true
-// if we successfully wrote all proc lines to buf, false else.
-// We do not provision for 0-terminating 'buf'.
-int FillProcSelfMaps(char buf[], int size, bool* wrote_all) {
-  ProcMapsIterator::Buffer iterbuf;
-  ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
-
-  uint64 start, end, offset;
-  int64 inode;
-  char *flags, *filename;
-  int bytes_written = 0;
-  *wrote_all = true;
-  while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
-    const int line_length = it.FormatLine(buf + bytes_written,
-                                          size - bytes_written,
-                                          start, end, flags, offset,
-                                          inode, filename, 0);
-    if (line_length == 0)
-      *wrote_all = false;     // failed to write this line out
-    else
-      bytes_written += line_length;
-
-  }
-  return bytes_written;
-}
-
-// Dump the same data as FillProcSelfMaps reads to fd.
-// It seems easier to repeat parts of FillProcSelfMaps here than to
-// reuse it via a call.
-void DumpProcSelfMaps(RawFD fd) {
-  ProcMapsIterator::Buffer iterbuf;
-  ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
-
-  uint64 start, end, offset;
-  int64 inode;
-  char *flags, *filename;
-  ProcMapsIterator::Buffer linebuf;
-  while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
-    int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_),
-                                start, end, flags, offset, inode, filename,
-                                0);
-    RawWrite(fd, linebuf.buf_, written);
-  }
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/base/sysinfo.h b/contrib/libtcmalloc/src/base/sysinfo.h
deleted file mode 100644
index 75b101376c5..00000000000
--- a/contrib/libtcmalloc/src/base/sysinfo.h
+++ /dev/null
@@ -1,232 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2006, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// All functions here are thread-hostile due to file caching unless
-// commented otherwise.
-
-#ifndef _SYSINFO_H_
-#define _SYSINFO_H_
-
-#include "../config.h"
-
-#include <time.h>
-#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
-#include <windows.h>   // for DWORD
-#include <tlhelp32.h>  // for CreateToolhelp32Snapshot
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>    // for pid_t
-#endif
-#include <stddef.h>    // for size_t
-#include <limits.h>    // for PATH_MAX
-#include "base/basictypes.h"
-#include "base/logging.h"   // for RawFD
-
-// This getenv function is safe to call before the C runtime is initialized.
-// On Windows, it utilizes GetEnvironmentVariable() and on unix it uses
-// /proc/self/environ instead calling getenv().  It's intended to be used in
-// routines that run before main(), when the state required for getenv() may
-// not be set up yet.  In particular, errno isn't set up until relatively late
-// (after the pthreads library has a chance to make it threadsafe), and
-// getenv() doesn't work until then. 
-// On some platforms, this call will utilize the same, static buffer for
-// repeated GetenvBeforeMain() calls. Callers should not expect pointers from
-// this routine to be long lived.
-// Note that on unix, /proc only has the environment at the time the
-// application was started, so this routine ignores setenv() calls/etc.  Also
-// note it only reads the first 16K of the environment.
-extern const char* GetenvBeforeMain(const char* name);
-
-// This takes as an argument an environment-variable name (like
-// CPUPROFILE) whose value is supposed to be a file-path, and sets
-// path to that path, and returns true.  Non-trivial for surprising
-// reasons, as documented in sysinfo.cc.  path must have space PATH_MAX.
-extern bool GetUniquePathFromEnv(const char* env_name, char* path);
-
-extern int GetSystemCPUsCount();
-
-void SleepForMilliseconds(int milliseconds);
-
-//  Return true if we're running POSIX (e.g., NPTL on Linux) threads,
-//  as opposed to a non-POSIX thread library.  The thing that we care
-//  about is whether a thread's pid is the same as the thread that
-//  spawned it.  If so, this function returns true.
-//  Thread-safe.
-//  Note: We consider false negatives to be OK.
-bool HasPosixThreads();
-
-#ifndef SWIG  // SWIG doesn't like struct Buffer and variable arguments.
-
-// A ProcMapsIterator abstracts access to /proc/maps for a given
-// process. Needs to be stack-allocatable and avoid using stdio/malloc
-// so it can be used in the google stack dumper, heap-profiler, etc.
-//
-// On Windows and Mac OS X, this iterator iterates *only* over DLLs
-// mapped into this process space.  For Linux, FreeBSD, and Solaris,
-// it iterates over *all* mapped memory regions, including anonymous
-// mmaps.  For other O/Ss, it is unlikely to work at all, and Valid()
-// will always return false.  Also note: this routine only works on
-// FreeBSD if procfs is mounted: make sure this is in your /etc/fstab:
-//    proc            /proc   procfs  rw 0 0
-class ProcMapsIterator {
- public:
-  struct Buffer {
-#ifdef __FreeBSD__
-    // FreeBSD requires us to read all of the maps file at once, so
-    // we have to make a buffer that's "always" big enough
-    static const size_t kBufSize = 102400;
-#else   // a one-line buffer is good enough
-    static const size_t kBufSize = PATH_MAX + 1024;
-#endif
-    char buf_[kBufSize];
-  };
-
-
-  // Create a new iterator for the specified pid.  pid can be 0 for "self".
-  explicit ProcMapsIterator(pid_t pid);
-
-  // Create an iterator with specified storage (for use in signal
-  // handler). "buffer" should point to a ProcMapsIterator::Buffer
-  // buffer can be NULL in which case a bufer will be allocated.
-  ProcMapsIterator(pid_t pid, Buffer *buffer);
-
-  // Iterate through maps_backing instead of maps if use_maps_backing
-  // is true.  Otherwise the same as above.  buffer can be NULL and
-  // it will allocate a buffer itself.
-  ProcMapsIterator(pid_t pid, Buffer *buffer,
-                   bool use_maps_backing);
-
-  // Returns true if the iterator successfully initialized;
-  bool Valid() const;
-
-  // Returns a pointer to the most recently parsed line. Only valid
-  // after Next() returns true, and until the iterator is destroyed or
-  // Next() is called again.  This may give strange results on non-Linux
-  // systems.  Prefer FormatLine() if that may be a concern.
-  const char *CurrentLine() const { return stext_; }
-
-  // Writes the "canonical" form of the /proc/xxx/maps info for a single
-  // line to the passed-in buffer. Returns the number of bytes written,
-  // or 0 if it was not able to write the complete line.  (To guarantee
-  // success, buffer should have size at least Buffer::kBufSize.)
-  // Takes as arguments values set via a call to Next().  The
-  // "canonical" form of the line (taken from linux's /proc/xxx/maps):
-  //    <start_addr(hex)>-<end_addr(hex)> <perms(rwxp)> <offset(hex)>   +
-  //    <major_dev(hex)>:<minor_dev(hex)> <inode> <filename> Note: the
-  // eg
-  //    08048000-0804c000 r-xp 00000000 03:01 3793678    /bin/cat
-  // If you don't have the dev_t (dev), feel free to pass in 0.
-  // (Next() doesn't return a dev_t, though NextExt does.)
-  //
-  // Note: if filename and flags were obtained via a call to Next(),
-  // then the output of this function is only valid if Next() returned
-  // true, and only until the iterator is destroyed or Next() is
-  // called again.  (Since filename, at least, points into CurrentLine.)
-  static int FormatLine(char* buffer, int bufsize,
-                        uint64 start, uint64 end, const char *flags,
-                        uint64 offset, int64 inode, const char *filename,
-                        dev_t dev);
-
-  // Find the next entry in /proc/maps; return true if found or false
-  // if at the end of the file.
-  //
-  // Any of the result pointers can be NULL if you're not interested
-  // in those values.
-  //
-  // If "flags" and "filename" are passed, they end up pointing to
-  // storage within the ProcMapsIterator that is valid only until the
-  // iterator is destroyed or Next() is called again. The caller may
-  // modify the contents of these strings (up as far as the first NUL,
-  // and only until the subsequent call to Next()) if desired.
-
-  // The offsets are all uint64 in order to handle the case of a
-  // 32-bit process running on a 64-bit kernel
-  //
-  // IMPORTANT NOTE: see top-of-class notes for details about what
-  // mapped regions Next() iterates over, depending on O/S.
-  // TODO(csilvers): make flags and filename const.
-  bool Next(uint64 *start, uint64 *end, char **flags,
-            uint64 *offset, int64 *inode, char **filename);
-
-  bool NextExt(uint64 *start, uint64 *end, char **flags,
-               uint64 *offset, int64 *inode, char **filename,
-               uint64 *file_mapping, uint64 *file_pages,
-               uint64 *anon_mapping, uint64 *anon_pages,
-               dev_t *dev);
-
-  ~ProcMapsIterator();
-
- private:
-  void Init(pid_t pid, Buffer *buffer, bool use_maps_backing);
-
-  char *ibuf_;        // input buffer
-  char *stext_;       // start of text
-  char *etext_;       // end of text
-  char *nextline_;    // start of next line
-  char *ebuf_;        // end of buffer (1 char for a nul)
-#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
-  HANDLE snapshot_;   // filehandle on dll info
-  // In a change from the usual W-A pattern, there is no A variant of
-  // MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
-  // We want the original A variants, and this #undef is the only
-  // way I see to get them.  Redefining it when we're done prevents us
-  // from affecting other .cc files.
-# ifdef MODULEENTRY32  // Alias of W
-#   undef MODULEENTRY32
-  MODULEENTRY32 module_;   // info about current dll (and dll iterator)
-#   define MODULEENTRY32 MODULEENTRY32W
-# else  // It's the ascii, the one we want.
-  MODULEENTRY32 module_;   // info about current dll (and dll iterator)
-# endif
-#elif defined(__MACH__)
-  int current_image_; // dll's are called "images" in macos parlance
-  int current_load_cmd_;   // the segment of this dll we're examining
-#elif defined(__sun__)     // Solaris
-  int fd_;
-  char current_filename_[PATH_MAX];
-#else
-  int fd_;            // filehandle on /proc/*/maps
-#endif
-  pid_t pid_;
-  char flags_[10];
-  Buffer* dynamic_buffer_;  // dynamically-allocated Buffer
-  bool using_maps_backing_; // true if we are looking at maps_backing instead of maps.
-};
-
-#endif  /* #ifndef SWIG */
-
-// Helper routines
-
-namespace tcmalloc {
-int FillProcSelfMaps(char buf[], int size, bool* wrote_all);
-void DumpProcSelfMaps(RawFD fd);
-}
-
-#endif   /* #ifndef _SYSINFO_H_ */
diff --git a/contrib/libtcmalloc/src/base/thread_annotations.h b/contrib/libtcmalloc/src/base/thread_annotations.h
deleted file mode 100644
index f57b2999ee7..00000000000
--- a/contrib/libtcmalloc/src/base/thread_annotations.h
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Le-Chun Wu
-//
-// This header file contains the macro definitions for thread safety
-// annotations that allow the developers to document the locking policies
-// of their multi-threaded code. The annotations can also help program
-// analysis tools to identify potential thread safety issues.
-//
-// The annotations are implemented using GCC's "attributes" extension.
-// Using the macros defined here instead of the raw GCC attributes allows
-// for portability and future compatibility.
-//
-// This functionality is not yet fully implemented in perftools,
-// but may be one day.
-
-#ifndef BASE_THREAD_ANNOTATIONS_H_
-#define BASE_THREAD_ANNOTATIONS_H_
-
-
-#if defined(__GNUC__) \
-  && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) \
-  && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG))
-#define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
-#else
-#define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
-#endif
-
-
-// Document if a shared variable/field needs to be protected by a lock.
-// GUARDED_BY allows the user to specify a particular lock that should be
-// held when accessing the annotated variable, while GUARDED_VAR only
-// indicates a shared variable should be guarded (by any lock). GUARDED_VAR
-// is primarily used when the client cannot express the name of the lock.
-#define GUARDED_BY(x)          THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
-#define GUARDED_VAR            THREAD_ANNOTATION_ATTRIBUTE__(guarded)
-
-// Document if the memory location pointed to by a pointer should be guarded
-// by a lock when dereferencing the pointer. Similar to GUARDED_VAR,
-// PT_GUARDED_VAR is primarily used when the client cannot express the name
-// of the lock. Note that a pointer variable to a shared memory location
-// could itself be a shared variable. For example, if a shared global pointer
-// q, which is guarded by mu1, points to a shared memory location that is
-// guarded by mu2, q should be annotated as follows:
-//     int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2);
-#define PT_GUARDED_BY(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x))
-#define PT_GUARDED_VAR \
-  THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded)
-
-// Document the acquisition order between locks that can be held
-// simultaneously by a thread. For any two locks that need to be annotated
-// to establish an acquisition order, only one of them needs the annotation.
-// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER
-// and ACQUIRED_BEFORE.)
-#define ACQUIRED_AFTER(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(x))
-#define ACQUIRED_BEFORE(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(x))
-
-// The following three annotations document the lock requirements for
-// functions/methods.
-
-// Document if a function expects certain locks to be held before it is called
-#define EXCLUSIVE_LOCKS_REQUIRED(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(x))
-
-#define SHARED_LOCKS_REQUIRED(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(x))
-
-// Document the locks acquired in the body of the function. These locks
-// cannot be held when calling this function (as google3's Mutex locks are
-// non-reentrant).
-#define LOCKS_EXCLUDED(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(x))
-
-// Document the lock the annotated function returns without acquiring it.
-#define LOCK_RETURNED(x)       THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
-
-// Document if a class/type is a lockable type (such as the Mutex class).
-#define LOCKABLE               THREAD_ANNOTATION_ATTRIBUTE__(lockable)
-
-// Document if a class is a scoped lockable type (such as the MutexLock class).
-#define SCOPED_LOCKABLE        THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
-
-// The following annotations specify lock and unlock primitives.
-#define EXCLUSIVE_LOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(x))
-
-#define SHARED_LOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(x))
-
-#define EXCLUSIVE_TRYLOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(x))
-
-#define SHARED_TRYLOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(x))
-
-#define UNLOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(unlock(x))
-
-// An escape hatch for thread safety analysis to ignore the annotated function.
-#define NO_THREAD_SAFETY_ANALYSIS \
-  THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
-
-#endif  // BASE_THREAD_ANNOTATIONS_H_
diff --git a/contrib/libtcmalloc/src/base/thread_lister.c b/contrib/libtcmalloc/src/base/thread_lister.c
deleted file mode 100644
index 9dc8d721892..00000000000
--- a/contrib/libtcmalloc/src/base/thread_lister.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Copyright (c) 2005-2007, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#include "config.h"
-
-#include "base/thread_lister.h"
-
-#include <stdio.h>         /* needed for NULL on some powerpc platforms (?!) */
-#include <sys/types.h>
-#include <unistd.h>        /* for getpid */
-
-#ifdef HAVE_SYS_PRCTL
-# include <sys/prctl.h>
-#endif
-
-#include "base/linuxthreads.h"
-/* Include other thread listers here that define THREADS macro
- * only when they can provide a good implementation.
- */
-
-#ifndef THREADS
-
-/* Default trivial thread lister for single-threaded applications,
- * or if the multi-threading code has not been ported, yet.
- */
-
-int TCMalloc_ListAllProcessThreads(void *parameter,
-				   ListAllProcessThreadsCallBack callback, ...) {
-  int rc;
-  va_list ap;
-  pid_t pid;
-
-#ifdef HAVE_SYS_PRCTL
-  int dumpable = prctl(PR_GET_DUMPABLE, 0);
-  if (!dumpable)
-    prctl(PR_SET_DUMPABLE, 1);
-#endif
-  va_start(ap, callback);
-  pid = getpid();
-  rc = callback(parameter, 1, &pid, ap);
-  va_end(ap);
-#ifdef HAVE_SYS_PRCTL
-  if (!dumpable)
-    prctl(PR_SET_DUMPABLE, 0);
-#endif
-  return rc;
-}
-
-int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
-  return 1;
-}
-
-#endif   /* ifndef THREADS */
diff --git a/contrib/libtcmalloc/src/base/thread_lister.h b/contrib/libtcmalloc/src/base/thread_lister.h
deleted file mode 100644
index 6e70b89fef5..00000000000
--- a/contrib/libtcmalloc/src/base/thread_lister.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 2; indent-tabs-mode: nil -*- */
-/* Copyright (c) 2005-2007, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#ifndef _THREAD_LISTER_H
-#define _THREAD_LISTER_H
-
-#include <stdarg.h>
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef int (*ListAllProcessThreadsCallBack)(void *parameter,
-                                             int num_threads,
-                                             pid_t *thread_pids,
-                                             va_list ap);
-
-/* This function gets the list of all linux threads of the current process
- * passes them to the 'callback' along with the 'parameter' pointer; at the
- * call back call time all the threads are paused via
- * PTRACE_ATTACH.
- * The callback is executed from a separate thread which shares only the
- * address space, the filesystem, and the filehandles with the caller. Most
- * notably, it does not share the same pid and ppid; and if it terminates,
- * the rest of the application is still there. 'callback' is supposed to do
- * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if
- * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous
- * signals are blocked. If the 'callback' decides to unblock them, it must
- * ensure that they cannot terminate the application, or that
- * TCMalloc_ResumeAllProcessThreads will get called.
- * It is an error for the 'callback' to make any library calls that could
- * acquire locks. Most notably, this means that most system calls have to
- * avoid going through libc. Also, this means that it is not legal to call
- * exit() or abort().
- * We return -1 on error and the return value of 'callback' on success.
- */
-int TCMalloc_ListAllProcessThreads(void *parameter,
-                                   ListAllProcessThreadsCallBack callback, ...);
-
-/* This function resumes the list of all linux threads that
- * TCMalloc_ListAllProcessThreads pauses before giving to its
- * callback.  The function returns non-zero if at least one thread was
- * suspended and has now been resumed.
- */
-int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* _THREAD_LISTER_H */
diff --git a/contrib/libtcmalloc/src/base/vdso_support.cc b/contrib/libtcmalloc/src/base/vdso_support.cc
deleted file mode 100644
index 730df3011d6..00000000000
--- a/contrib/libtcmalloc/src/base/vdso_support.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Pluzhnikov
-//
-// Allow dynamic symbol lookup in the kernel VDSO page.
-//
-// VDSOSupport -- a class representing kernel VDSO (if present).
-//
-
-#include "base/vdso_support.h"
-
-#ifdef HAVE_VDSO_SUPPORT     // defined in vdso_support.h
-
-#include <fcntl.h>
-#include <stddef.h>   // for ptrdiff_t
-
-#include "base/atomicops.h"  // for MemoryBarrier
-#include "base/linux_syscall_support.h"
-#include "base/logging.h"
-#include "base/dynamic_annotations.h"
-#include "base/basictypes.h"  // for COMPILE_ASSERT
-
-using base::subtle::MemoryBarrier;
-
-#ifndef AT_SYSINFO_EHDR
-#define AT_SYSINFO_EHDR 33
-#endif
-
-namespace base {
-
-const void *VDSOSupport::vdso_base_ = ElfMemImage::kInvalidBase;
-VDSOSupport::VDSOSupport()
-    // If vdso_base_ is still set to kInvalidBase, we got here
-    // before VDSOSupport::Init has been called. Call it now.
-    : image_(vdso_base_ == ElfMemImage::kInvalidBase ? Init() : vdso_base_) {
-}
-
-// NOTE: we can't use GoogleOnceInit() below, because we can be
-// called by tcmalloc, and none of the *once* stuff may be functional yet.
-//
-// In addition, we hope that the VDSOSupportHelper constructor
-// causes this code to run before there are any threads, and before
-// InitGoogle() has executed any chroot or setuid calls.
-//
-// Finally, even if there is a race here, it is harmless, because
-// the operation should be idempotent.
-const void *VDSOSupport::Init() {
-  if (vdso_base_ == ElfMemImage::kInvalidBase) {
-    // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[]
-    // on stack, and so glibc works as if VDSO was not present.
-    // But going directly to kernel via /proc/self/auxv below bypasses
-    // Valgrind zapping. So we check for Valgrind separately.
-    if (RunningOnValgrind()) {
-      vdso_base_ = NULL;
-      return NULL;
-    }
-    int fd = open("/proc/self/auxv", O_RDONLY);
-    if (fd == -1) {
-      // Kernel too old to have a VDSO.
-      vdso_base_ = NULL;
-      return NULL;
-    }
-    ElfW(auxv_t) aux;
-    while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
-      if (aux.a_type == AT_SYSINFO_EHDR) {
-        COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val),
-                       unexpected_sizeof_pointer_NE_sizeof_a_val);
-        vdso_base_ = reinterpret_cast<void *>(aux.a_un.a_val);
-        break;
-      }
-    }
-    close(fd);
-    if (vdso_base_ == ElfMemImage::kInvalidBase) {
-      // Didn't find AT_SYSINFO_EHDR in auxv[].
-      vdso_base_ = NULL;
-    }
-  }
-  return vdso_base_;
-}
-
-const void *VDSOSupport::SetBase(const void *base) {
-  CHECK(base != ElfMemImage::kInvalidBase);
-  const void *old_base = vdso_base_;
-  vdso_base_ = base;
-  image_.Init(base);
-  return old_base;
-}
-
-bool VDSOSupport::LookupSymbol(const char *name,
-                               const char *version,
-                               int type,
-                               SymbolInfo *info) const {
-  return image_.LookupSymbol(name, version, type, info);
-}
-
-bool VDSOSupport::LookupSymbolByAddress(const void *address,
-                                        SymbolInfo *info_out) const {
-  return image_.LookupSymbolByAddress(address, info_out);
-}
-
-// We need to make sure VDSOSupport::Init() is called before
-// the main() runs, since it might do something like setuid or
-// chroot.  If VDSOSupport
-// is used in any global constructor, this will happen, since
-// VDSOSupport's constructor calls Init.  But if not, we need to
-// ensure it here, with a global constructor of our own.  This
-// is an allowed exception to the normal rule against non-trivial
-// global constructors.
-static class VDSOInitHelper {
- public:
-  VDSOInitHelper() { VDSOSupport::Init(); }
-} vdso_init_helper;
-}
-
-#endif  // HAVE_VDSO_SUPPORT
diff --git a/contrib/libtcmalloc/src/base/vdso_support.h b/contrib/libtcmalloc/src/base/vdso_support.h
deleted file mode 100644
index 0c2213c7c06..00000000000
--- a/contrib/libtcmalloc/src/base/vdso_support.h
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Pluzhnikov
-//
-// Allow dynamic symbol lookup in the kernel VDSO page.
-//
-// VDSO stands for "Virtual Dynamic Shared Object" -- a page of
-// executable code, which looks like a shared library, but doesn't
-// necessarily exist anywhere on disk, and which gets mmap()ed into
-// every process by kernels which support VDSO, such as 2.6.x for 32-bit
-// executables, and 2.6.24 and above for 64-bit executables.
-//
-// More details could be found here:
-// http://www.trilithium.com/johan/2005/08/linux-gate/
-//
-// VDSOSupport -- a class representing kernel VDSO (if present).
-//
-// Example usage:
-//  VDSOSupport vdso;
-//  VDSOSupport::SymbolInfo info;
-//  typedef (*FN)(unsigned *, void *, void *);
-//  FN fn = NULL;
-//  if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) {
-//     fn = reinterpret_cast<FN>(info.address);
-//  }
-
-#ifndef BASE_VDSO_SUPPORT_H_
-#define BASE_VDSO_SUPPORT_H_
-
-#include "../config.h"
-#include "base/basictypes.h"
-#include "base/elf_mem_image.h"
-
-#ifdef HAVE_ELF_MEM_IMAGE
-
-#define HAVE_VDSO_SUPPORT 1
-
-#include <stdlib.h>     // for NULL
-
-namespace base {
-
-// NOTE: this class may be used from within tcmalloc, and can not
-// use any memory allocation routines.
-class VDSOSupport {
- public:
-  VDSOSupport();
-
-  typedef ElfMemImage::SymbolInfo SymbolInfo;
-  typedef ElfMemImage::SymbolIterator SymbolIterator;
-
-  // Answers whether we have a vdso at all.
-  bool IsPresent() const { return image_.IsPresent(); }
-
-  // Allow to iterate over all VDSO symbols.
-  SymbolIterator begin() const { return image_.begin(); }
-  SymbolIterator end() const { return image_.end(); }
-
-  // Look up versioned dynamic symbol in the kernel VDSO.
-  // Returns false if VDSO is not present, or doesn't contain given
-  // symbol/version/type combination.
-  // If info_out != NULL, additional details are filled in.
-  bool LookupSymbol(const char *name, const char *version,
-                    int symbol_type, SymbolInfo *info_out) const;
-
-  // Find info about symbol (if any) which overlaps given address.
-  // Returns true if symbol was found; false if VDSO isn't present
-  // or doesn't have a symbol overlapping given address.
-  // If info_out != NULL, additional details are filled in.
-  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
-
-  // Used only for testing. Replace real VDSO base with a mock.
-  // Returns previous value of vdso_base_. After you are done testing,
-  // you are expected to call SetBase() with previous value, in order to
-  // reset state to the way it was.
-  const void *SetBase(const void *s);
-
-  // Computes vdso_base_ and returns it. Should be called as early as
-  // possible; before any thread creation, chroot or setuid.
-  static const void *Init();
-
- private:
-  // image_ represents VDSO ELF image in memory.
-  // image_.ehdr_ == NULL implies there is no VDSO.
-  ElfMemImage image_;
-
-  // Cached value of auxv AT_SYSINFO_EHDR, computed once.
-  // This is a tri-state:
-  //   kInvalidBase   => value hasn't been determined yet.
-  //              0   => there is no VDSO.
-  //           else   => vma of VDSO Elf{32,64}_Ehdr.
-  //
-  // When testing with mock VDSO, low bit is set.
-  // The low bit is always available because vdso_base_ is
-  // page-aligned.
-  static const void *vdso_base_;
-
-  DISALLOW_COPY_AND_ASSIGN(VDSOSupport);
-};
-
-}  // namespace base
-
-#endif  // HAVE_ELF_MEM_IMAGE
-
-#endif  // BASE_VDSO_SUPPORT_H_
diff --git a/contrib/libtcmalloc/src/central_freelist.cc b/contrib/libtcmalloc/src/central_freelist.cc
deleted file mode 100644
index 11b190dcfee..00000000000
--- a/contrib/libtcmalloc/src/central_freelist.cc
+++ /dev/null
@@ -1,387 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#include <algorithm>
-#include "central_freelist.h"
-#include "internal_logging.h"  // for ASSERT, MESSAGE
-#include "linked_list.h"       // for SLL_Next, SLL_Push, etc
-#include "page_heap.h"         // for PageHeap
-#include "static_vars.h"       // for Static
-
-using std::min;
-using std::max;
-
-namespace tcmalloc {
-
-void CentralFreeList::Init(size_t cl) {
-  size_class_ = cl;
-  tcmalloc::DLL_Init(&empty_);
-  tcmalloc::DLL_Init(&nonempty_);
-  num_spans_ = 0;
-  counter_ = 0;
-
-  max_cache_size_ = kMaxNumTransferEntries;
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-  // Disable the transfer cache for the small footprint case.
-  cache_size_ = 0;
-#else
-  cache_size_ = 16;
-#endif
-  if (cl > 0) {
-    // Limit the maximum size of the cache based on the size class.  If this
-    // is not done, large size class objects will consume a lot of memory if
-    // they just sit in the transfer cache.
-    int32_t bytes = Static::sizemap()->ByteSizeForClass(cl);
-    int32_t objs_to_move = Static::sizemap()->num_objects_to_move(cl);
-
-    ASSERT(objs_to_move > 0 && bytes > 0);
-    // Limit each size class cache to at most 1MB of objects or one entry,
-    // whichever is greater. Total transfer cache memory used across all
-    // size classes then can't be greater than approximately
-    // 1MB * kMaxNumTransferEntries.
-    // min and max are in parens to avoid macro-expansion on windows.
-    max_cache_size_ = (min)(max_cache_size_,
-                          (max)(1, (1024 * 1024) / (bytes * objs_to_move)));
-    cache_size_ = (min)(cache_size_, max_cache_size_);
-  }
-  used_slots_ = 0;
-  ASSERT(cache_size_ <= max_cache_size_);
-}
-
-void CentralFreeList::ReleaseListToSpans(void* start) {
-  while (start) {
-    void *next = SLL_Next(start);
-    ReleaseToSpans(start);
-    start = next;
-  }
-}
-
-// MapObjectToSpan should logically be part of ReleaseToSpans.  But
-// this triggers an optimization bug in gcc 4.5.0.  Moving to a
-// separate function, and making sure that function isn't inlined,
-// seems to fix the problem.  It also should be fixed for gcc 4.5.1.
-static
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0
-__attribute__ ((noinline))
-#endif
-Span* MapObjectToSpan(void* object) {
-  const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
-  Span* span = Static::pageheap()->GetDescriptor(p);
-  return span;
-}
-
-void CentralFreeList::ReleaseToSpans(void* object) {
-  Span* span = MapObjectToSpan(object);
-  ASSERT(span != NULL);
-  ASSERT(span->refcount > 0);
-
-  // If span is empty, move it to non-empty list
-  if (span->objects == NULL) {
-    tcmalloc::DLL_Remove(span);
-    tcmalloc::DLL_Prepend(&nonempty_, span);
-    Event(span, 'N', 0);
-  }
-
-  // The following check is expensive, so it is disabled by default
-  if (false) {
-    // Check that object does not occur in list
-    int got = 0;
-    for (void* p = span->objects; p != NULL; p = *((void**) p)) {
-      ASSERT(p != object);
-      got++;
-    }
-    ASSERT(got + span->refcount ==
-           (span->length<<kPageShift) /
-           Static::sizemap()->ByteSizeForClass(span->sizeclass));
-  }
-
-  counter_++;
-  span->refcount--;
-  if (span->refcount == 0) {
-    Event(span, '#', 0);
-    counter_ -= ((span->length<<kPageShift) /
-                 Static::sizemap()->ByteSizeForClass(span->sizeclass));
-    tcmalloc::DLL_Remove(span);
-    --num_spans_;
-
-    // Release central list lock while operating on pageheap
-    lock_.Unlock();
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      Static::pageheap()->Delete(span);
-    }
-    lock_.Lock();
-  } else {
-    *(reinterpret_cast<void**>(object)) = span->objects;
-    span->objects = object;
-  }
-}
-
-bool CentralFreeList::EvictRandomSizeClass(
-    int locked_size_class, bool force) {
-  static int race_counter = 0;
-  int t = race_counter++;  // Updated without a lock, but who cares.
-  if (t >= kNumClasses) {
-    while (t >= kNumClasses) {
-      t -= kNumClasses;
-    }
-    race_counter = t;
-  }
-  ASSERT(t >= 0);
-  ASSERT(t < kNumClasses);
-  if (t == locked_size_class) return false;
-  return Static::central_cache()[t].ShrinkCache(locked_size_class, force);
-}
-
-bool CentralFreeList::MakeCacheSpace() {
-  // Is there room in the cache?
-  if (used_slots_ < cache_size_) return true;
-  // Check if we can expand this cache?
-  if (cache_size_ == max_cache_size_) return false;
-  // Ok, we'll try to grab an entry from some other size class.
-  if (EvictRandomSizeClass(size_class_, false) ||
-      EvictRandomSizeClass(size_class_, true)) {
-    // Succeeded in evicting, we're going to make our cache larger.
-    // However, we may have dropped and re-acquired the lock in
-    // EvictRandomSizeClass (via ShrinkCache and the LockInverter), so the
-    // cache_size may have changed.  Therefore, check and verify that it is
-    // still OK to increase the cache_size.
-    if (cache_size_ < max_cache_size_) {
-      cache_size_++;
-      return true;
-    }
-  }
-  return false;
-}
-
-
-namespace {
-class LockInverter {
- private:
-  SpinLock *held_, *temp_;
- public:
-  inline explicit LockInverter(SpinLock* held, SpinLock *temp)
-    : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
-  inline ~LockInverter() { temp_->Unlock(); held_->Lock();  }
-};
-}
-
-// This function is marked as NO_THREAD_SAFETY_ANALYSIS because it uses
-// LockInverter to release one lock and acquire another in scoped-lock
-// style, which our current annotation/analysis does not support.
-bool CentralFreeList::ShrinkCache(int locked_size_class, bool force)
-    NO_THREAD_SAFETY_ANALYSIS {
-  // Start with a quick check without taking a lock.
-  if (cache_size_ == 0) return false;
-  // We don't evict from a full cache unless we are 'forcing'.
-  if (force == false && used_slots_ == cache_size_) return false;
-
-  // Grab lock, but first release the other lock held by this thread.  We use
-  // the lock inverter to ensure that we never hold two size class locks
-  // concurrently.  That can create a deadlock because there is no well
-  // defined nesting order.
-  LockInverter li(&Static::central_cache()[locked_size_class].lock_, &lock_);
-  ASSERT(used_slots_ <= cache_size_);
-  ASSERT(0 <= cache_size_);
-  if (cache_size_ == 0) return false;
-  if (used_slots_ == cache_size_) {
-    if (force == false) return false;
-    // ReleaseListToSpans releases the lock, so we have to make all the
-    // updates to the central list before calling it.
-    cache_size_--;
-    used_slots_--;
-    ReleaseListToSpans(tc_slots_[used_slots_].head);
-    return true;
-  }
-  cache_size_--;
-  return true;
-}
-
-void CentralFreeList::InsertRange(void *start, void *end, int N) {
-  SpinLockHolder h(&lock_);
-  if (N == Static::sizemap()->num_objects_to_move(size_class_) &&
-    MakeCacheSpace()) {
-    int slot = used_slots_++;
-    ASSERT(slot >=0);
-    ASSERT(slot < max_cache_size_);
-    TCEntry *entry = &tc_slots_[slot];
-    entry->head = start;
-    entry->tail = end;
-    return;
-  }
-  ReleaseListToSpans(start);
-}
-
-int CentralFreeList::RemoveRange(void **start, void **end, int N) {
-  ASSERT(N > 0);
-  lock_.Lock();
-  if (N == Static::sizemap()->num_objects_to_move(size_class_) &&
-      used_slots_ > 0) {
-    int slot = --used_slots_;
-    ASSERT(slot >= 0);
-    TCEntry *entry = &tc_slots_[slot];
-    *start = entry->head;
-    *end = entry->tail;
-    lock_.Unlock();
-    return N;
-  }
-
-  int result = 0;
-  *start = NULL;
-  *end = NULL;
-  // TODO: Prefetch multiple TCEntries?
-  result = FetchFromOneSpansSafe(N, start, end);
-  if (result != 0) {
-    while (result < N) {
-      int n;
-      void* head = NULL;
-      void* tail = NULL;
-      n = FetchFromOneSpans(N - result, &head, &tail);
-      if (!n) break;
-      result += n;
-      SLL_PushRange(start, head, tail);
-    }
-  }
-  lock_.Unlock();
-  return result;
-}
-
-
-int CentralFreeList::FetchFromOneSpansSafe(int N, void **start, void **end) {
-  int result = FetchFromOneSpans(N, start, end);
-  if (!result) {
-    Populate();
-    result = FetchFromOneSpans(N, start, end);
-  }
-  return result;
-}
-
-int CentralFreeList::FetchFromOneSpans(int N, void **start, void **end) {
-  if (tcmalloc::DLL_IsEmpty(&nonempty_)) return 0;
-  Span* span = nonempty_.next;
-
-  ASSERT(span->objects != NULL);
-
-  int result = 0;
-  void *prev, *curr;
-  curr = span->objects;
-  do {
-    prev = curr;
-    curr = *(reinterpret_cast<void**>(curr));
-  } while (++result < N && curr != NULL);
-
-  if (curr == NULL) {
-    // Move to empty list
-    tcmalloc::DLL_Remove(span);
-    tcmalloc::DLL_Prepend(&empty_, span);
-    Event(span, 'E', 0);
-  }
-
-  *start = span->objects;
-  *end = prev;
-  span->objects = curr;
-  SLL_SetNext(*end, NULL);
-  span->refcount += result;
-  counter_ -= result;
-  return result;
-}
-
-// Fetch memory from the system and add to the central cache freelist.
-void CentralFreeList::Populate() {
-  // Release central list lock while operating on pageheap
-  lock_.Unlock();
-  const size_t npages = Static::sizemap()->class_to_pages(size_class_);
-
-  Span* span;
-  {
-    SpinLockHolder h(Static::pageheap_lock());
-    span = Static::pageheap()->New(npages);
-    if (span) Static::pageheap()->RegisterSizeClass(span, size_class_);
-  }
-  if (span == NULL) {
-    Log(kLog, __FILE__, __LINE__,
-        "tcmalloc: allocation failed", npages << kPageShift);
-    lock_.Lock();
-    return;
-  }
-  ASSERT(span->length == npages);
-  // Cache sizeclass info eagerly.  Locking is not necessary.
-  // (Instead of being eager, we could just replace any stale info
-  // about this span, but that seems to be no better in practice.)
-  for (int i = 0; i < npages; i++) {
-    Static::pageheap()->CacheSizeClass(span->start + i, size_class_);
-  }
-
-  // Split the block into pieces and add to the free-list
-  // TODO: coloring of objects to avoid cache conflicts?
-  void** tail = &span->objects;
-  char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
-  char* limit = ptr + (npages << kPageShift);
-  const size_t size = Static::sizemap()->ByteSizeForClass(size_class_);
-  int num = 0;
-  while (ptr + size <= limit) {
-    *tail = ptr;
-    tail = reinterpret_cast<void**>(ptr);
-    ptr += size;
-    num++;
-  }
-  ASSERT(ptr <= limit);
-  *tail = NULL;
-  span->refcount = 0; // No sub-object in use yet
-
-  // Add span to list of non-empty spans
-  lock_.Lock();
-  tcmalloc::DLL_Prepend(&nonempty_, span);
-  ++num_spans_;
-  counter_ += num;
-}
-
-int CentralFreeList::tc_length() {
-  SpinLockHolder h(&lock_);
-  return used_slots_ * Static::sizemap()->num_objects_to_move(size_class_);
-}
-
-size_t CentralFreeList::OverheadBytes() {
-  SpinLockHolder h(&lock_);
-  if (size_class_ == 0) {  // 0 holds the 0-sized allocations
-    return 0;
-  }
-  const size_t pages_per_span = Static::sizemap()->class_to_pages(size_class_);
-  const size_t object_size = Static::sizemap()->class_to_size(size_class_);
-  ASSERT(object_size > 0);
-  const size_t overhead_per_span = (pages_per_span * kPageSize) % object_size;
-  return num_spans_ * overhead_per_span;
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/central_freelist.h b/contrib/libtcmalloc/src/central_freelist.h
deleted file mode 100644
index 4148680d20a..00000000000
--- a/contrib/libtcmalloc/src/central_freelist.h
+++ /dev/null
@@ -1,211 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#ifndef TCMALLOC_CENTRAL_FREELIST_H_
-#define TCMALLOC_CENTRAL_FREELIST_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for int32_t
-#endif
-#include "base/spinlock.h"
-#include "base/thread_annotations.h"
-#include "common.h"
-#include "span.h"
-
-namespace tcmalloc {
-
-// Data kept per size-class in central cache.
-class CentralFreeList {
- public:
-  // A CentralFreeList may be used before its constructor runs.
-  // So we prevent lock_'s constructor from doing anything to the
-  // lock_ state.
-  CentralFreeList() : lock_(base::LINKER_INITIALIZED) { }
-
-  void Init(size_t cl);
-
-  // These methods all do internal locking.
-
-  // Insert the specified range into the central freelist.  N is the number of
-  // elements in the range.  RemoveRange() is the opposite operation.
-  void InsertRange(void *start, void *end, int N);
-
-  // Returns the actual number of fetched elements and sets *start and *end.
-  int RemoveRange(void **start, void **end, int N);
-
-  // Returns the number of free objects in cache.
-  int length() {
-    SpinLockHolder h(&lock_);
-    return counter_;
-  }
-
-  // Returns the number of free objects in the transfer cache.
-  int tc_length();
-
-  // Returns the memory overhead (internal fragmentation) attributable
-  // to the freelist.  This is memory lost when the size of elements
-  // in a freelist doesn't exactly divide the page-size (an 8192-byte
-  // page full of 5-byte objects would have 2 bytes memory overhead).
-  size_t OverheadBytes();
-
-  // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call
-  // to set the lock in a consistent state before the fork.
-  void Lock() {
-    lock_.Lock();
-  }
-
-  void Unlock() {
-    lock_.Unlock();
-  }
-
- private:
-  // TransferCache is used to cache transfers of
-  // sizemap.num_objects_to_move(size_class) back and forth between
-  // thread caches and the central cache for a given size class.
-  struct TCEntry {
-    void *head;  // Head of chain of objects.
-    void *tail;  // Tail of chain of objects.
-  };
-
-  // A central cache freelist can have anywhere from 0 to kMaxNumTransferEntries
-  // slots to put link list chains into.
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-  // For the small memory model, the transfer cache is not used.
-  static const int kMaxNumTransferEntries = 0;
-#else
-  // Starting point for the the maximum number of entries in the transfer cache.
-  // This actual maximum for a given size class may be lower than this
-  // maximum value.
-  static const int kMaxNumTransferEntries = 64;
-#endif
-
-  // REQUIRES: lock_ is held
-  // Remove object from cache and return.
-  // Return NULL if no free entries in cache.
-  int FetchFromOneSpans(int N, void **start, void **end) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ is held
-  // Remove object from cache and return.  Fetches
-  // from pageheap if cache is empty.  Only returns
-  // NULL on allocation failure.
-  int FetchFromOneSpansSafe(int N, void **start, void **end) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ is held
-  // Release a linked list of objects to spans.
-  // May temporarily release lock_.
-  void ReleaseListToSpans(void *start) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ is held
-  // Release an object to spans.
-  // May temporarily release lock_.
-  void ReleaseToSpans(void* object) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ is held
-  // Populate cache by fetching from the page heap.
-  // May temporarily release lock_.
-  void Populate() EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock is held.
-  // Tries to make room for a TCEntry.  If the cache is full it will try to
-  // expand it at the cost of some other cache size.  Return false if there is
-  // no space.
-  bool MakeCacheSpace() EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ for locked_size_class is held.
-  // Picks a "random" size class to steal TCEntry slot from.  In reality it
-  // just iterates over the sizeclasses but does so without taking a lock.
-  // Returns true on success.
-  // May temporarily lock a "random" size class.
-  static bool EvictRandomSizeClass(int locked_size_class, bool force);
-
-  // REQUIRES: lock_ is *not* held.
-  // Tries to shrink the Cache.  If force is true it will relase objects to
-  // spans if it allows it to shrink the cache.  Return false if it failed to
-  // shrink the cache.  Decrements cache_size_ on succeess.
-  // May temporarily take lock_.  If it takes lock_, the locked_size_class
-  // lock is released to keep the thread from holding two size class locks
-  // concurrently which could lead to a deadlock.
-  bool ShrinkCache(int locked_size_class, bool force) LOCKS_EXCLUDED(lock_);
-
-  // This lock protects all the data members.  cached_entries and cache_size_
-  // may be looked at without holding the lock.
-  SpinLock lock_;
-
-  // We keep linked lists of empty and non-empty spans.
-  size_t   size_class_;     // My size class
-  Span     empty_;          // Dummy header for list of empty spans
-  Span     nonempty_;       // Dummy header for list of non-empty spans
-  size_t   num_spans_;      // Number of spans in empty_ plus nonempty_
-  size_t   counter_;        // Number of free objects in cache entry
-
-  // Here we reserve space for TCEntry cache slots.  Space is preallocated
-  // for the largest possible number of entries than any one size class may
-  // accumulate.  Not all size classes are allowed to accumulate
-  // kMaxNumTransferEntries, so there is some wasted space for those size
-  // classes.
-  TCEntry tc_slots_[kMaxNumTransferEntries];
-
-  // Number of currently used cached entries in tc_slots_.  This variable is
-  // updated under a lock but can be read without one.
-  int32_t used_slots_;
-  // The current number of slots for this size class.  This is an
-  // adaptive value that is increased if there is lots of traffic
-  // on a given size class.
-  int32_t cache_size_;
-  // Maximum size of the cache for a given size class.
-  int32_t max_cache_size_;
-};
-
-// Pads each CentralCache object to multiple of 64 bytes.  Since some
-// compilers (such as MSVC) don't like it when the padding is 0, I use
-// template specialization to remove the padding entirely when
-// sizeof(CentralFreeList) is a multiple of 64.
-template<int kFreeListSizeMod64>
-class CentralFreeListPaddedTo : public CentralFreeList {
- private:
-  char pad_[64 - kFreeListSizeMod64];
-};
-
-template<>
-class CentralFreeListPaddedTo<0> : public CentralFreeList {
-};
-
-class CentralFreeListPadded : public CentralFreeListPaddedTo<
-  sizeof(CentralFreeList) % 64> {
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_CENTRAL_FREELIST_H_
diff --git a/contrib/libtcmalloc/src/common.cc b/contrib/libtcmalloc/src/common.cc
deleted file mode 100644
index 313848c37b6..00000000000
--- a/contrib/libtcmalloc/src/common.cc
+++ /dev/null
@@ -1,275 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include <stdlib.h> // for getenv and strtol
-#include "config.h"
-#include "common.h"
-#include "system-alloc.h"
-#include "base/spinlock.h"
-#include "getenv_safe.h" // TCMallocGetenvSafe
-
-namespace tcmalloc {
-
-// Define the maximum number of object per classe type to transfer between
-// thread and central caches.
-static int32 FLAGS_tcmalloc_transfer_num_objects;
-
-static const int32 kDefaultTransferNumObjecs = 512;
-
-// The init function is provided to explicit initialize the variable value
-// from the env. var to avoid C++ global construction that might defer its
-// initialization after a malloc/new call.
-static inline void InitTCMallocTransferNumObjects()
-{
-  if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) {
-    const char *envval = TCMallocGetenvSafe("TCMALLOC_TRANSFER_NUM_OBJ");
-    FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs :
-      strtol(envval, NULL, 10);
-  }
-}
-
-// Note: the following only works for "n"s that fit in 32-bits, but
-// that is fine since we only use it for small sizes.
-static inline int LgFloor(size_t n) {
-  int log = 0;
-  for (int i = 4; i >= 0; --i) {
-    int shift = (1 << i);
-    size_t x = n >> shift;
-    if (x != 0) {
-      n = x;
-      log += shift;
-    }
-  }
-  ASSERT(n == 1);
-  return log;
-}
-
-int AlignmentForSize(size_t size) {
-  int alignment = kAlignment;
-  if (size > kMaxSize) {
-    // Cap alignment at kPageSize for large sizes.
-    alignment = kPageSize;
-  } else if (size >= 128) {
-    // Space wasted due to alignment is at most 1/8, i.e., 12.5%.
-    alignment = (1 << LgFloor(size)) / 8;
-  } else if (size >= kMinAlign) {
-    // We need an alignment of at least 16 bytes to satisfy
-    // requirements for some SSE types.
-    alignment = kMinAlign;
-  }
-  // Maximum alignment allowed is page size alignment.
-  if (alignment > kPageSize) {
-    alignment = kPageSize;
-  }
-  CHECK_CONDITION(size < kMinAlign || alignment >= kMinAlign);
-  CHECK_CONDITION((alignment & (alignment - 1)) == 0);
-  return alignment;
-}
-
-int SizeMap::NumMoveSize(size_t size) {
-  if (size == 0) return 0;
-  // Use approx 64k transfers between thread and central caches.
-  int num = static_cast<int>(64.0 * 1024.0 / size);
-  if (num < 2) num = 2;
-
-  // Avoid bringing too many objects into small object free lists.
-  // If this value is too large:
-  // - We waste memory with extra objects sitting in the thread caches.
-  // - The central freelist holds its lock for too long while
-  //   building a linked list of objects, slowing down the allocations
-  //   of other threads.
-  // If this value is too small:
-  // - We go to the central freelist too often and we have to acquire
-  //   its lock each time.
-  // This value strikes a balance between the constraints above.
-  if (num > FLAGS_tcmalloc_transfer_num_objects)
-    num = FLAGS_tcmalloc_transfer_num_objects;
-
-  return num;
-}
-
-// Initialize the mapping arrays
-void SizeMap::Init() {
-  InitTCMallocTransferNumObjects();
-
-  // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
-  if (ClassIndex(0) != 0) {
-    Log(kCrash, __FILE__, __LINE__,
-        "Invalid class index for size 0", ClassIndex(0));
-  }
-  if (ClassIndex(kMaxSize) >= sizeof(class_array_)) {
-    Log(kCrash, __FILE__, __LINE__,
-        "Invalid class index for kMaxSize", ClassIndex(kMaxSize));
-  }
-
-  // Compute the size classes we want to use
-  int sc = 1;   // Next size class to assign
-  int alignment = kAlignment;
-  CHECK_CONDITION(kAlignment <= kMinAlign);
-  for (size_t size = kAlignment; size <= kMaxSize; size += alignment) {
-    alignment = AlignmentForSize(size);
-    CHECK_CONDITION((size % alignment) == 0);
-
-    int blocks_to_move = NumMoveSize(size) / 4;
-    size_t psize = 0;
-    do {
-      psize += kPageSize;
-      // Allocate enough pages so leftover is less than 1/8 of total.
-      // This bounds wasted space to at most 12.5%.
-      while ((psize % size) > (psize >> 3)) {
-        psize += kPageSize;
-      }
-      // Continue to add pages until there are at least as many objects in
-      // the span as are needed when moving objects from the central
-      // freelists and spans to the thread caches.
-    } while ((psize / size) < (blocks_to_move));
-    const size_t my_pages = psize >> kPageShift;
-
-    if (sc > 1 && my_pages == class_to_pages_[sc-1]) {
-      // See if we can merge this into the previous class without
-      // increasing the fragmentation of the previous class.
-      const size_t my_objects = (my_pages << kPageShift) / size;
-      const size_t prev_objects = (class_to_pages_[sc-1] << kPageShift)
-                                  / class_to_size_[sc-1];
-      if (my_objects == prev_objects) {
-        // Adjust last class to include this size
-        class_to_size_[sc-1] = size;
-        continue;
-      }
-    }
-
-    // Add new class
-    class_to_pages_[sc] = my_pages;
-    class_to_size_[sc] = size;
-    sc++;
-  }
-  if (sc != kNumClasses) {
-    Log(kCrash, __FILE__, __LINE__,
-        "wrong number of size classes: (found vs. expected )", sc, kNumClasses);
-  }
-
-  // Initialize the mapping arrays
-  int next_size = 0;
-  for (int c = 1; c < kNumClasses; c++) {
-    const int max_size_in_class = class_to_size_[c];
-    for (int s = next_size; s <= max_size_in_class; s += kAlignment) {
-      class_array_[ClassIndex(s)] = c;
-    }
-    next_size = max_size_in_class + kAlignment;
-  }
-
-  // Double-check sizes just to be safe
-  for (size_t size = 0; size <= kMaxSize;) {
-    const int sc = SizeClass(size);
-    if (sc <= 0 || sc >= kNumClasses) {
-      Log(kCrash, __FILE__, __LINE__,
-          "Bad size class (class, size)", sc, size);
-    }
-    if (sc > 1 && size <= class_to_size_[sc-1]) {
-      Log(kCrash, __FILE__, __LINE__,
-          "Allocating unnecessarily large class (class, size)", sc, size);
-    }
-    const size_t s = class_to_size_[sc];
-    if (size > s || s == 0) {
-      Log(kCrash, __FILE__, __LINE__,
-          "Bad (class, size, requested)", sc, s, size);
-    }
-    if (size <= kMaxSmallSize) {
-      size += 8;
-    } else {
-      size += 128;
-    }
-  }
-
-  // Initialize the num_objects_to_move array.
-  for (size_t cl = 1; cl  < kNumClasses; ++cl) {
-    num_objects_to_move_[cl] = NumMoveSize(ByteSizeForClass(cl));
-  }
-}
-
-// Metadata allocator -- keeps stats about how many bytes allocated.
-static uint64_t metadata_system_bytes_ = 0;
-static const size_t kMetadataAllocChunkSize = 8*1024*1024;
-// As ThreadCache objects are allocated with MetaDataAlloc, and also
-// CACHELINE_ALIGNED, we must use the same alignment as TCMalloc_SystemAlloc.
-static const size_t kMetadataAllignment = sizeof(MemoryAligner);
-
-static char *metadata_chunk_alloc_;
-static size_t metadata_chunk_avail_;
-
-static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED);
-
-void* MetaDataAlloc(size_t bytes) {
-  if (bytes >= kMetadataAllocChunkSize) {
-    void *rv = TCMalloc_SystemAlloc(bytes,
-                                    NULL, kMetadataAllignment);
-    if (rv != NULL) {
-      metadata_system_bytes_ += bytes;
-    }
-    return rv;
-  }
-
-  SpinLockHolder h(&metadata_alloc_lock);
-
-  // the following works by essentially turning address to integer of
-  // log_2 kMetadataAllignment size and negating it. I.e. negated
-  // value + original value gets 0 and that's what we want modulo
-  // kMetadataAllignment. Note, we negate before masking higher bits
-  // off, otherwise we'd have to mask them off after negation anyways.
-  intptr_t alignment = -reinterpret_cast<intptr_t>(metadata_chunk_alloc_) & (kMetadataAllignment-1);
-
-  if (metadata_chunk_avail_ < bytes + alignment) {
-    size_t real_size;
-    void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize,
-                                     &real_size, kMetadataAllignment);
-    if (ptr == NULL) {
-      return NULL;
-    }
-
-    metadata_chunk_alloc_ = static_cast<char *>(ptr);
-    metadata_chunk_avail_ = real_size;
-
-    alignment = 0;
-  }
-
-  void *rv = static_cast<void *>(metadata_chunk_alloc_ + alignment);
-  bytes += alignment;
-  metadata_chunk_alloc_ += bytes;
-  metadata_chunk_avail_ -= bytes;
-  metadata_system_bytes_ += bytes;
-  return rv;
-}
-
-uint64_t metadata_system_bytes() { return metadata_system_bytes_; }
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/common.h b/contrib/libtcmalloc/src/common.h
deleted file mode 100644
index e8a1ba6972c..00000000000
--- a/contrib/libtcmalloc/src/common.h
+++ /dev/null
@@ -1,295 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Common definitions for tcmalloc code.
-
-#ifndef TCMALLOC_COMMON_H_
-#define TCMALLOC_COMMON_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uintptr_t, uint64_t
-#endif
-#include "internal_logging.h"  // for ASSERT, etc
-#include "base/basictypes.h"   // for LIKELY, etc
-
-#ifdef HAVE_BUILTIN_EXPECT
-#define LIKELY(x) __builtin_expect(!!(x), 1)
-#define UNLIKELY(x) __builtin_expect(!!(x), 0)
-#else
-#define LIKELY(x) (x)
-#define UNLIKELY(x) (x)
-#endif
-
-// Type that can hold a page number
-typedef uintptr_t PageID;
-
-// Type that can hold the length of a run of pages
-typedef uintptr_t Length;
-
-//-------------------------------------------------------------------
-// Configuration
-//-------------------------------------------------------------------
-
-#if defined(TCMALLOC_ALIGN_8BYTES)
-// Unless we force to use 8 bytes alignment we use an alignment of
-// at least 16 bytes to statisfy requirements for some SSE types.
-// Keep in mind when using the 16 bytes alignment you can have a space
-// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
-static const size_t kMinAlign   = 8;
-// Number of classes created until reach page size 128.
-static const size_t kBaseClasses = 16;
-#else
-static const size_t kMinAlign   = 16;
-static const size_t kBaseClasses = 9;
-#endif
-
-// Using large pages speeds up the execution at a cost of larger memory use.
-// Deallocation may speed up by a factor as the page map gets 8x smaller, so
-// lookups in the page map result in fewer L2 cache misses, which translates to
-// speedup for application/platform combinations with high L2 cache pressure.
-// As the number of size classes increases with large pages, we increase
-// the thread cache allowance to avoid passing more free ranges to and from
-// central lists.  Also, larger pages are less likely to get freed.
-// These two factors cause a bounded increase in memory use.
-#if defined(TCMALLOC_32K_PAGES)
-static const size_t kPageShift  = 15;
-static const size_t kNumClasses = kBaseClasses + 69;
-#elif defined(TCMALLOC_64K_PAGES)
-static const size_t kPageShift  = 16;
-static const size_t kNumClasses = kBaseClasses + 73;
-#else
-static const size_t kPageShift  = 13;
-static const size_t kNumClasses = kBaseClasses + 79;
-#endif
-
-static const size_t kMaxThreadCacheSize = 4 << 20;
-
-static const size_t kPageSize   = 1 << kPageShift;
-static const size_t kMaxSize    = 256 * 1024;
-static const size_t kAlignment  = 8;
-static const size_t kLargeSizeClass = 0;
-// For all span-lengths < kMaxPages we keep an exact-size list.
-static const size_t kMaxPages = 1 << (20 - kPageShift);
-
-// Default bound on the total amount of thread caches.
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-// Make the overall thread cache no bigger than that of a single thread
-// for the small memory footprint case.
-static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
-#else
-static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
-#endif
-
-// Lower bound on the per-thread cache sizes
-static const size_t kMinThreadCacheSize = kMaxSize * 2;
-
-// The number of bytes one ThreadCache will steal from another when
-// the first ThreadCache is forced to Scavenge(), delaying the
-// next call to Scavenge for this thread.
-static const size_t kStealAmount = 1 << 16;
-
-// The number of times that a deallocation can cause a freelist to
-// go over its max_length() before shrinking max_length().
-static const int kMaxOverages = 3;
-
-// Maximum length we allow a per-thread free-list to have before we
-// move objects from it into the corresponding central free-list.  We
-// want this big to avoid locking the central free-list too often.  It
-// should not hurt to make this list somewhat big because the
-// scavenging code will shrink it down when its contents are not in use.
-static const int kMaxDynamicFreeListLength = 8192;
-
-static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
-
-#if defined __x86_64__
-// All current and planned x86_64 processors only look at the lower 48 bits
-// in virtual to physical address translation.  The top 16 are thus unused.
-// TODO(rus): Under what operating systems can we increase it safely to 17?
-// This lets us use smaller page maps.  On first allocation, a 36-bit page map
-// uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
-static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
-#else
-static const int kAddressBits = 8 * sizeof(void*);
-#endif
-
-namespace tcmalloc {
-
-// Convert byte size into pages.  This won't overflow, but may return
-// an unreasonably large value if bytes is huge enough.
-inline Length pages(size_t bytes) {
-  return (bytes >> kPageShift) +
-      ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
-}
-
-// For larger allocation sizes, we use larger memory alignments to
-// reduce the number of size classes.
-int AlignmentForSize(size_t size);
-
-// Size-class information + mapping
-class SizeMap {
- private:
-  // Number of objects to move between a per-thread list and a central
-  // list in one shot.  We want this to be not too small so we can
-  // amortize the lock overhead for accessing the central list.  Making
-  // it too big may temporarily cause unnecessary memory wastage in the
-  // per-thread free list until the scavenger cleans up the list.
-  int num_objects_to_move_[kNumClasses];
-
-  //-------------------------------------------------------------------
-  // Mapping from size to size_class and vice versa
-  //-------------------------------------------------------------------
-
-  // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
-  // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
-  // So for these larger sizes we have an array indexed by ceil(size/128).
-  //
-  // We flatten both logical arrays into one physical array and use
-  // arithmetic to compute an appropriate index.  The constants used by
-  // ClassIndex() were selected to make the flattening work.
-  //
-  // Examples:
-  //   Size       Expression                      Index
-  //   -------------------------------------------------------
-  //   0          (0 + 7) / 8                     0
-  //   1          (1 + 7) / 8                     1
-  //   ...
-  //   1024       (1024 + 7) / 8                  128
-  //   1025       (1025 + 127 + (120<<7)) / 128   129
-  //   ...
-  //   32768      (32768 + 127 + (120<<7)) / 128  376
-  static const int kMaxSmallSize = 1024;
-  static const size_t kClassArraySize =
-      ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
-  unsigned char class_array_[kClassArraySize];
-
-  static inline size_t SmallSizeClass(size_t s) {
-    return (static_cast<uint32_t>(s) + 7) >> 3;
-  }
-
-  static inline size_t LargeSizeClass(size_t s) {
-    return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
-  }
-
-  // Compute index of the class_array[] entry for a given size
-  static inline size_t ClassIndex(size_t s) {
-    // Use unsigned arithmetic to avoid unnecessary sign extensions.
-    ASSERT(0 <= s);
-    ASSERT(s <= kMaxSize);
-    if (LIKELY(s <= kMaxSmallSize)) {
-      return SmallSizeClass(s);
-    } else {
-      return LargeSizeClass(s);
-    }
-  }
-
-  int NumMoveSize(size_t size);
-
-  // Mapping from size class to max size storable in that class
-  size_t class_to_size_[kNumClasses];
-
-  // Mapping from size class to number of pages to allocate at a time
-  size_t class_to_pages_[kNumClasses];
-
- public:
-  // Constructor should do nothing since we rely on explicit Init()
-  // call, which may or may not be called before the constructor runs.
-  SizeMap() { }
-
-  // Initialize the mapping arrays
-  void Init();
-
-  inline int SizeClass(size_t size) {
-    return class_array_[ClassIndex(size)];
-  }
-
-  inline bool MaybeSizeClass(size_t size, size_t *size_class) {
-    size_t class_idx;
-    if (LIKELY(size <= kMaxSmallSize)) {
-      class_idx = SmallSizeClass(size);
-    } else if (size <= kMaxSize) {
-      class_idx = LargeSizeClass(size);
-    } else {
-      return false;
-    }
-    *size_class = class_array_[class_idx];
-    return true;
-  }
-
-  // Get the byte-size for a specified class
-  inline size_t ByteSizeForClass(size_t cl) {
-    return class_to_size_[cl];
-  }
-
-  // Mapping from size class to max size storable in that class
-  inline size_t class_to_size(size_t cl) {
-    return class_to_size_[cl];
-  }
-
-  // Mapping from size class to number of pages to allocate at a time
-  inline size_t class_to_pages(size_t cl) {
-    return class_to_pages_[cl];
-  }
-
-  // Number of objects to move between a per-thread list and a central
-  // list in one shot.  We want this to be not too small so we can
-  // amortize the lock overhead for accessing the central list.  Making
-  // it too big may temporarily cause unnecessary memory wastage in the
-  // per-thread free list until the scavenger cleans up the list.
-  inline int num_objects_to_move(size_t cl) {
-    return num_objects_to_move_[cl];
-  }
-};
-
-// Allocates "bytes" worth of memory and returns it.  Increments
-// metadata_system_bytes appropriately.  May return NULL if allocation
-// fails.  Requires pageheap_lock is held.
-void* MetaDataAlloc(size_t bytes);
-
-// Returns the total number of bytes allocated from the system.
-// Requires pageheap_lock is held.
-uint64_t metadata_system_bytes();
-
-// size/depth are made the same size as a pointer so that some generic
-// code below can conveniently cast them back and forth to void*.
-static const int kMaxStackDepth = 31;
-struct StackTrace {
-  uintptr_t size;          // Size of object
-  uintptr_t depth;         // Number of PC values stored in array below
-  void*     stack[kMaxStackDepth];
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_COMMON_H_
diff --git a/contrib/libtcmalloc/src/config.h b/contrib/libtcmalloc/src/config.h
deleted file mode 100644
index 9f9a7a259e5..00000000000
--- a/contrib/libtcmalloc/src/config.h
+++ /dev/null
@@ -1,323 +0,0 @@
-/* src/config.h.  Generated from config.h.in by configure.  */
-/* src/config.h.in.  Generated from configure.ac by autoheader.  */
-
-
-#ifndef GPERFTOOLS_CONFIG_H_
-#define GPERFTOOLS_CONFIG_H_
-
-
-/* Build runtime detection for sized delete */
-/* #undef ENABLE_DYNAMIC_SIZED_DELETE */
-
-/* Build sized deletion operators */
-/* #undef ENABLE_SIZED_DELETE */
-
-/* Define to 1 if compiler supports __builtin_expect */
-#if _MSC_VER
-#define HAVE_BUILTIN_EXPECT 0
-#else
-#define HAVE_BUILTIN_EXPECT 1
-#endif
-
-/* Define to 1 if compiler supports __builtin_stack_pointer */
-/* #undef HAVE_BUILTIN_STACK_POINTER */
-
-/* Define to 1 if you have the <conflict-signal.h> header file. */
-/* #undef HAVE_CONFLICT_SIGNAL_H */
-
-/* Define to 1 if you have the <cygwin/signal.h> header file. */
-/* #undef HAVE_CYGWIN_SIGNAL_H */
-
-/* Define to 1 if you have the declaration of `backtrace', and to 0 if you
-   don't. */
-/* #undef HAVE_DECL_BACKTRACE */
-
-/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't.
-   */
-#define HAVE_DECL_CFREE 1
-
-/* Define to 1 if you have the declaration of `memalign', and to 0 if you
-   don't. */
-#define HAVE_DECL_MEMALIGN 1
-
-/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you
-   don't. */
-/* #undef HAVE_DECL_NANOSLEEP */
-
-/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if
-   you don't. */
-#define HAVE_DECL_POSIX_MEMALIGN 1
-
-/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you
-   don't. */
-#define HAVE_DECL_PVALLOC 1
-
-/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't.
-   */
-/* #undef HAVE_DECL_SLEEP */
-
-/* Define to 1 if you have the declaration of `uname', and to 0 if you don't.
-   */
-#define HAVE_DECL_UNAME 1
-
-/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't.
-   */
-#define HAVE_DECL_VALLOC 1
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#define HAVE_DLFCN_H 1
-
-/* Define to 1 if the system has the type `Elf32_Versym'. */
-#define HAVE_ELF32_VERSYM 1
-
-/* Define to 1 if you have the <execinfo.h> header file. */
-#define HAVE_EXECINFO_H 1
-
-/* Define to 1 if you have the <fcntl.h> header file. */
-#define HAVE_FCNTL_H 1
-
-/* Define to 1 if you have the <features.h> header file. */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#define HAVE_FEATURES_H 1
-#endif
-
-/* Define to 1 if you have the `fork' function. */
-#define HAVE_FORK 1
-
-/* Define to 1 if you have the `geteuid' function. */
-#define HAVE_GETEUID 1
-
-/* Define to 1 if you have the `getpagesize' function. */
-#define HAVE_GETPAGESIZE 1
-
-/* Define to 1 if you have the <glob.h> header file. */
-#define HAVE_GLOB_H 1
-
-/* Define to 1 if you have the <grp.h> header file. */
-#define HAVE_GRP_H 1
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#define HAVE_INTTYPES_H 1
-
-/* Define to 1 if you have the <libunwind.h> header file. */
-//#define HAVE_LIBUNWIND_H 1
-
-/* Define to 1 if you have the <linux/ptrace.h> header file. */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#define HAVE_LINUX_PTRACE_H 1
-#endif
-
-/* Define if this is Linux that has SIGEV_THREAD_ID */
-#define HAVE_LINUX_SIGEV_THREAD_ID 1
-
-/* Define to 1 if you have the <malloc.h> header file. */
-#if !defined(__FreeBSD__)
-#define HAVE_MALLOC_H 1
-#endif
-
-/* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
-
-/* Define to 1 if you have a working `mmap' system call. */
-#define HAVE_MMAP 1
-
-/* define if the compiler implements namespaces */
-#define HAVE_NAMESPACES 1
-
-/* Define to 1 if you have the <poll.h> header file. */
-#define HAVE_POLL_H 1
-
-/* define if libc has program_invocation_name */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#define HAVE_PROGRAM_INVOCATION_NAME 1
-#endif
-
-/* Define if you have POSIX threads libraries and header files. */
-#define HAVE_PTHREAD 1
-
-/* defined to 1 if pthread symbols are exposed even without include pthread.h
-   */
-/* #undef HAVE_PTHREAD_DESPITE_ASKING_FOR */
-
-/* Define to 1 if you have the <pwd.h> header file. */
-#define HAVE_PWD_H 1
-
-/* Define to 1 if you have the `sbrk' function. */
-#define HAVE_SBRK 1
-
-/* Define to 1 if you have the <sched.h> header file. */
-#define HAVE_SCHED_H 1
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#define HAVE_STDINT_H 1
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
-
-/* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
-
-/* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
-
-/* Define to 1 if the system has the type `struct mallinfo'. */
-//#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#if !defined(__APPLE__)
-#define HAVE_STRUCT_MALLINFO 1
-#endif
-
-/* Define to 1 if you have the <sys/cdefs.h> header file. */
-#define HAVE_SYS_CDEFS_H 1
-
-/* Define to 1 if you have the <sys/param.h> header file. */
-#define HAVE_SYS_PARAM_H 1
-
-/* Define to 1 if you have the <sys/prctl.h> header file. */
-#define HAVE_SYS_PRCTL_H 1
-
-/* Define to 1 if you have the <sys/resource.h> header file. */
-#define HAVE_SYS_RESOURCE_H 1
-
-/* Define to 1 if you have the <sys/socket.h> header file. */
-#define HAVE_SYS_SOCKET_H 1
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#define HAVE_SYS_STAT_H 1
-
-/* Define to 1 if you have the <sys/syscall.h> header file. */
-#define HAVE_SYS_SYSCALL_H 1
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
-
-/* Define to 1 if you have the <sys/ucontext.h> header file. */
-/* #undef HAVE_SYS_UCONTEXT_H */
-
-/* Define to 1 if you have the <sys/wait.h> header file. */
-#define HAVE_SYS_WAIT_H 1
-
-/* Define to 1 if compiler supports __thread */
-#define HAVE_TLS 1
-
-/* Define to 1 if you have the <ucontext.h> header file. */
-/* #undef HAVE_UCONTEXT_H */
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#define HAVE_UNISTD_H 1
-
-/* Whether <unwind.h> contains _Unwind_Backtrace */
-#define HAVE_UNWIND_BACKTRACE 1
-
-/* Define to 1 if you have the <unwind.h> header file. */
-#define HAVE_UNWIND_H 1
-
-/* Define to 1 if you have the <valgrind.h> header file. */
-/* #undef HAVE_VALGRIND_H */
-
-/* define if your compiler has __attribute__ */
-#define HAVE___ATTRIBUTE__ 1
-
-/* Define to 1 if compiler supports __environ */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#define HAVE___ENVIRON 1
-#endif
-
-/* Define to 1 if the system has the type `__int64'. */
-/* #undef HAVE___INT64 */
-
-/* prefix where we look for installed files */
-#define INSTALL_PREFIX "/usr/local"
-
-/* Define to 1 if int32_t is equivalent to intptr_t */
-/* #undef INT32_EQUALS_INTPTR */
-
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
-   */
-#define LT_OBJDIR ".libs/"
-
-/* Name of package */
-#define PACKAGE "gperftools"
-
-/* Define to the address where bug reports for this package should be sent. */
-#define PACKAGE_BUGREPORT "gperftools@googlegroups.com"
-
-/* Define to the full name of this package. */
-#define PACKAGE_NAME "gperftools"
-
-/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "gperftools 2.5"
-
-/* Define to the one symbol short name of this package. */
-#define PACKAGE_TARNAME "gperftools"
-
-/* Define to the home page for this package. */
-#define PACKAGE_URL ""
-
-/* Define to the version of this package. */
-#define PACKAGE_VERSION "2.5"
-
-/* How to access the PC from a struct ucontext */
-/* #undef PC_FROM_UCONTEXT */
-
-/* Always the empty-string on non-windows systems. On windows, should be
-   "__declspec(dllexport)". This way, when we compile the dll, we export our
-   functions/classes. It's safe to define this here because config.h is only
-   used internally, to compile the DLL, and every DLL source file #includes
-   "config.h" before anything else. */
-#define PERFTOOLS_DLL_DECL /**/
-
-/* printf format code for printing a size_t and ssize_t */
-#define PRIdS "ld"
-
-/* printf format code for printing a size_t and ssize_t */
-#define PRIuS "lu"
-
-/* printf format code for printing a size_t and ssize_t */
-#define PRIxS "lx"
-
-/* Mark the systems where we know it's bad if pthreads runs too
-   early before main (before threads are initialized, presumably).  */
-#ifdef __FreeBSD__
-#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1
-#endif
-
-/* Define to necessary symbol if this constant uses a non-standard name on
-   your system. */
-/* #undef PTHREAD_CREATE_JOINABLE */
-
-/* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
-
-/* the namespace where STL code like vector<> is defined */
-#define STL_NAMESPACE std
-
-/* Define 32K of internal pages size for tcmalloc */
-/* #undef TCMALLOC_32K_PAGES */
-
-/* Define 64K of internal pages size for tcmalloc */
-/* #undef TCMALLOC_64K_PAGES */
-
-/* Define 8 bytes of allocation alignment for tcmalloc */
-/* #undef TCMALLOC_ALIGN_8BYTES */
-
-/* Version number of package */
-#define VERSION "2.5"
-
-/* C99 says: define this to get the PRI... macros from stdint.h */
-#ifndef __STDC_FORMAT_MACROS
-# define __STDC_FORMAT_MACROS 1
-#endif
-
-/* Define to `__inline__' or `__inline' if that's what the C compiler
-   calls it, or to nothing if 'inline' is not supported under any name.  */
-#ifndef __cplusplus
-/* #undef inline */
-#endif
-
-
-#ifdef __MINGW32__
-#include "windows/mingw.h"
-#endif
-
-#endif  /* #ifndef GPERFTOOLS_CONFIG_H_ */
-
diff --git a/contrib/libtcmalloc/src/debugallocation.cc b/contrib/libtcmalloc/src/debugallocation.cc
deleted file mode 100644
index 178809bc8a3..00000000000
--- a/contrib/libtcmalloc/src/debugallocation.cc
+++ /dev/null
@@ -1,1500 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2000, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Urs Holzle <opensource@google.com>
-
-#include "config.h"
-#include <errno.h>
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-// We only need malloc.h for struct mallinfo.
-#ifdef HAVE_STRUCT_MALLINFO
-// Malloc can be in several places on older versions of OS X.
-# if defined(HAVE_MALLOC_H)
-# include <malloc.h>
-# elif defined(HAVE_MALLOC_MALLOC_H)
-# include <malloc/malloc.h>
-# elif defined(HAVE_SYS_MALLOC_H)
-# include <sys/malloc.h>
-# endif
-#endif
-#ifdef HAVE_PTHREAD
-#include <pthread.h>
-#endif
-#include <stdarg.h>
-#include <stdio.h>
-#include <string.h>
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-#include <sys/stat.h>
-#include <sys/types.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include <gperftools/malloc_extension.h>
-#include <gperftools/malloc_hook.h>
-#include <gperftools/stacktrace.h>
-#include "addressmap-inl.h"
-#include "base/commandlineflags.h"
-#include "base/googleinit.h"
-#include "base/logging.h"
-#include "base/spinlock.h"
-#include "malloc_hook-inl.h"
-#include "symbolize.h"
-
-// NOTE: due to #define below, tcmalloc.cc will omit tc_XXX
-// definitions. So that debug implementations can be defined
-// instead. We're going to use do_malloc, do_free and other do_XXX
-// functions that are defined in tcmalloc.cc for actual memory
-// management
-#define TCMALLOC_USING_DEBUGALLOCATION
-#include "tcmalloc.cc"
-
-// __THROW is defined in glibc systems.  It means, counter-intuitively,
-// "This function will never throw an exception."  It's an optional
-// optimization tool, but we may need to use it to match glibc prototypes.
-#ifndef __THROW    // I guess we're not on a glibc system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
-// form of the name instead.
-#ifndef MAP_ANONYMOUS
-# define MAP_ANONYMOUS MAP_ANON
-#endif
-
-#pragma GCC diagnostic push
-#ifdef __clang__
-#pragma GCC diagnostic ignored "-Wunused-private-field"
-#pragma GCC diagnostic ignored "-Wgnu-alignof-expression"
-#endif
-
-// ========================================================================= //
-
-DEFINE_bool(malloctrace,
-            EnvToBool("TCMALLOC_TRACE", false),
-            "Enables memory (de)allocation tracing to /tmp/google.alloc.");
-#ifdef HAVE_MMAP
-DEFINE_bool(malloc_page_fence,
-            EnvToBool("TCMALLOC_PAGE_FENCE", false),
-            "Enables putting of memory allocations at page boundaries "
-            "with a guard page following the allocation (to catch buffer "
-            "overruns right when they happen).");
-DEFINE_bool(malloc_page_fence_never_reclaim,
-            EnvToBool("TCMALLOC_PAGE_FRANCE_NEVER_RECLAIM", false),
-            "Enables making the virtual address space inaccessible "
-            "upon a deallocation instead of returning it and reusing later.");
-#else
-DEFINE_bool(malloc_page_fence, false, "Not usable (requires mmap)");
-DEFINE_bool(malloc_page_fence_never_reclaim, false, "Not usable (required mmap)");
-#endif
-DEFINE_bool(malloc_reclaim_memory,
-            EnvToBool("TCMALLOC_RECLAIM_MEMORY", true),
-            "If set to false, we never return memory to malloc "
-            "when an object is deallocated. This ensures that all "
-            "heap object addresses are unique.");
-DEFINE_int32(max_free_queue_size,
-             EnvToInt("TCMALLOC_MAX_FREE_QUEUE_SIZE", 10*1024*1024),
-             "If greater than 0, keep freed blocks in a queue instead of "
-             "releasing them to the allocator immediately.  Release them when "
-             "the total size of all blocks in the queue would otherwise exceed "
-             "this limit.");
-
-DEFINE_bool(symbolize_stacktrace,
-            EnvToBool("TCMALLOC_SYMBOLIZE_STACKTRACE", true),
-            "Symbolize the stack trace when provided (on some error exits)");
-
-// If we are LD_PRELOAD-ed against a non-pthreads app, then
-// pthread_once won't be defined.  We declare it here, for that
-// case (with weak linkage) which will cause the non-definition to
-// resolve to NULL.  We can then check for NULL or not in Instance.
-extern "C" int pthread_once(pthread_once_t *, void (*)(void))
-    ATTRIBUTE_WEAK;
-
-// ========================================================================= //
-
-// A safe version of printf() that does not do any allocation and
-// uses very little stack space.
-static void TracePrintf(int fd, const char *fmt, ...)
-  __attribute__ ((__format__ (__printf__, 2, 3)));
-
-// Round "value" up to next "alignment" boundary.
-// Requires that "alignment" be a power of two.
-static intptr_t RoundUp(intptr_t value, intptr_t alignment) {
-  return (value + alignment - 1) & ~(alignment - 1);
-}
-
-// ========================================================================= //
-
-class MallocBlock;
-
-// A circular buffer to hold freed blocks of memory.  MallocBlock::Deallocate
-// (below) pushes blocks into this queue instead of returning them to the
-// underlying allocator immediately.  See MallocBlock::Deallocate for more
-// information.
-//
-// We can't use an STL class for this because we need to be careful not to
-// perform any heap de-allocations in any of the code in this class, since the
-// code in MallocBlock::Deallocate is not re-entrant.
-template <typename QueueEntry>
-class FreeQueue {
- public:
-  FreeQueue() : q_front_(0), q_back_(0) {}
-
-  bool Full() {
-    return (q_front_ + 1) % kFreeQueueSize == q_back_;
-  }
-
-  void Push(const QueueEntry& block) {
-    q_[q_front_] = block;
-    q_front_ = (q_front_ + 1) % kFreeQueueSize;
-  }
-
-  QueueEntry Pop() {
-    RAW_CHECK(q_back_ != q_front_, "Queue is empty");
-    const QueueEntry& ret = q_[q_back_];
-    q_back_ = (q_back_ + 1) % kFreeQueueSize;
-    return ret;
-  }
-
-  size_t size() const {
-    return (q_front_ - q_back_ + kFreeQueueSize) % kFreeQueueSize;
-  }
-
- private:
-  // Maximum number of blocks kept in the free queue before being freed.
-  static const int kFreeQueueSize = 1024;
-
-  QueueEntry q_[kFreeQueueSize];
-  int q_front_;
-  int q_back_;
-};
-
-struct MallocBlockQueueEntry {
-  MallocBlockQueueEntry() : block(NULL), size(0),
-                            num_deleter_pcs(0), deleter_threadid(0) {}
-  MallocBlockQueueEntry(MallocBlock* b, size_t s) : block(b), size(s) {
-    if (FLAGS_max_free_queue_size != 0 && b != NULL) {
-      // Adjust the number of frames to skip (4) if you change the
-      // location of this call.
-      num_deleter_pcs =
-          GetStackTrace(deleter_pcs,
-                        sizeof(deleter_pcs) / sizeof(deleter_pcs[0]),
-                        4);
-      deleter_threadid = pthread_self();
-    } else {
-      num_deleter_pcs = 0;
-      // Zero is an illegal pthread id by my reading of the pthread
-      // implementation:
-      deleter_threadid = 0;
-    }
-  }
-
-  MallocBlock* block;
-  size_t size;
-
-  // When deleted and put in the free queue, we (flag-controlled)
-  // record the stack so that if corruption is later found, we can
-  // print the deleter's stack.  (These three vars add 144 bytes of
-  // overhead under the LP64 data model.)
-  void* deleter_pcs[16];
-  int num_deleter_pcs;
-  pthread_t deleter_threadid;
-};
-
-class MallocBlock {
- public:  // allocation type constants
-
-  // Different allocation types we distinguish.
-  // Note: The lower 4 bits are not random: we index kAllocName array
-  // by these values masked with kAllocTypeMask;
-  // the rest are "random" magic bits to help catch memory corruption.
-  static const int kMallocType = 0xEFCDAB90;
-  static const int kNewType = 0xFEBADC81;
-  static const int kArrayNewType = 0xBCEADF72;
-
- private:  // constants
-
-  // A mask used on alloc types above to get to 0, 1, 2
-  static const int kAllocTypeMask = 0x3;
-  // An additional bit to set in AllocType constants
-  // to mark now deallocated regions.
-  static const int kDeallocatedTypeBit = 0x4;
-
-  // For better memory debugging, we initialize all storage to known
-  // values, and overwrite the storage when it's deallocated:
-  // Byte that fills uninitialized storage.
-  static const int kMagicUninitializedByte = 0xAB;
-  // Byte that fills deallocated storage.
-  // NOTE: tcmalloc.cc depends on the value of kMagicDeletedByte
-  //       to work around a bug in the pthread library.
-  static const int kMagicDeletedByte = 0xCD;
-  // A size_t (type of alloc_type_ below) in a deallocated storage
-  // filled with kMagicDeletedByte.
-  static const size_t kMagicDeletedSizeT =
-      0xCDCDCDCD | (((size_t)0xCDCDCDCD << 16) << 16);
-    // Initializer works for 32 and 64 bit size_ts;
-    // "<< 16 << 16" is to fool gcc from issuing a warning
-    // when size_ts are 32 bits.
-
-  // NOTE: on Linux, you can enable malloc debugging support in libc by
-  // setting the environment variable MALLOC_CHECK_ to 1 before you
-  // start the program (see man malloc).
-
-  // We use either do_malloc or mmap to make the actual allocation. In
-  // order to remember which one of the two was used for any block, we store an
-  // appropriate magic word next to the block.
-  static const size_t kMagicMalloc = 0xDEADBEEF;
-  static const size_t kMagicMMap = 0xABCDEFAB;
-
-  // This array will be filled with 0xCD, for use with memcmp.
-  static unsigned char kMagicDeletedBuffer[1024];
-  static pthread_once_t deleted_buffer_initialized_;
-  static bool deleted_buffer_initialized_no_pthreads_;
-
- private:  // data layout
-
-                    // The four fields size1_,offset_,magic1_,alloc_type_
-                    // should together occupy a multiple of 16 bytes. (At the
-                    // moment, sizeof(size_t) == 4 or 8 depending on piii vs
-                    // k8, and 4 of those sum to 16 or 32 bytes).
-                    // This, combined with do_malloc's alignment guarantees,
-                    // ensures that SSE types can be stored into the returned
-                    // block, at &size2_.
-  size_t size1_;
-  size_t offset_;   // normally 0 unless memaligned memory
-                    // see comments in memalign() and FromRawPointer().
-  size_t magic1_;
-  size_t alloc_type_;
-  // here comes the actual data (variable length)
-  // ...
-  // then come the size2_ and magic2_, or a full page of mprotect-ed memory
-  // if the malloc_page_fence feature is enabled.
-  size_t size2_;
-  size_t magic2_;
-
- private:  // static data and helpers
-
-  // Allocation map: stores the allocation type for each allocated object,
-  // or the type or'ed with kDeallocatedTypeBit
-  // for each formerly allocated object.
-  typedef AddressMap<int> AllocMap;
-  static AllocMap* alloc_map_;
-  // This protects alloc_map_ and consistent state of metadata
-  // for each still-allocated object in it.
-  // We use spin locks instead of pthread_mutex_t locks
-  // to prevent crashes via calls to pthread_mutex_(un)lock
-  // for the (de)allocations coming from pthreads initialization itself.
-  static SpinLock alloc_map_lock_;
-
-  // A queue of freed blocks.  Instead of releasing blocks to the allocator
-  // immediately, we put them in a queue, freeing them only when necessary
-  // to keep the total size of all the freed blocks below the limit set by
-  // FLAGS_max_free_queue_size.
-  static FreeQueue<MallocBlockQueueEntry>* free_queue_;
-
-  static size_t free_queue_size_;  // total size of blocks in free_queue_
-  // protects free_queue_ and free_queue_size_
-  static SpinLock free_queue_lock_;
-
-  // Names of allocation types (kMallocType, kNewType, kArrayNewType)
-  static const char* const kAllocName[];
-  // Names of corresponding deallocation types
-  static const char* const kDeallocName[];
-
-  static const char* AllocName(int type) {
-    return kAllocName[type & kAllocTypeMask];
-  }
-
-  static const char* DeallocName(int type) {
-    return kDeallocName[type & kAllocTypeMask];
-  }
-
- private:  // helper accessors
-
-  bool IsMMapped() const { return kMagicMMap == magic1_; }
-
-  bool IsValidMagicValue(size_t value) const {
-    return kMagicMMap == value  ||  kMagicMalloc == value;
-  }
-
-  static size_t real_malloced_size(size_t size) {
-    return size + sizeof(MallocBlock);
-  }
-
-  /*
-   * Here we assume size of page is kMinAlign aligned,
-   * so if size is MALLOC_ALIGNMENT aligned too, then we could
-   * guarantee return address is also kMinAlign aligned, because
-   * mmap return address at nearby page boundary on Linux.
-   */
-  static size_t real_mmapped_size(size_t size) {
-    size_t tmp = size + MallocBlock::data_offset();
-    tmp = RoundUp(tmp, kMinAlign);
-    return tmp;
-  }
-
-  size_t real_size() {
-    return IsMMapped() ? real_mmapped_size(size1_) : real_malloced_size(size1_);
-  }
-
-  // NOTE: if the block is mmapped (that is, we're using the
-  // malloc_page_fence option) then there's no size2 or magic2
-  // (instead, the guard page begins where size2 would be).
-
-  size_t* size2_addr() { return (size_t*)((char*)&size2_ + size1_); }
-  const size_t* size2_addr() const {
-    return (const size_t*)((char*)&size2_ + size1_);
-  }
-
-  size_t* magic2_addr() { return (size_t*)(size2_addr() + 1); }
-  const size_t* magic2_addr() const { return (const size_t*)(size2_addr() + 1); }
-
- private:  // other helpers
-
-  void Initialize(size_t size, int type) {
-    RAW_CHECK(IsValidMagicValue(magic1_), "");
-    // record us as allocated in the map
-    alloc_map_lock_.Lock();
-    if (!alloc_map_) {
-      void* p = do_malloc(sizeof(AllocMap));
-      alloc_map_ = new(p) AllocMap(do_malloc, do_free);
-    }
-    alloc_map_->Insert(data_addr(), type);
-    // initialize us
-    size1_ = size;
-    offset_ = 0;
-    alloc_type_ = type;
-    if (!IsMMapped()) {
-      bit_store(magic2_addr(), &magic1_);
-      bit_store(size2_addr(), &size);
-    }
-    alloc_map_lock_.Unlock();
-    memset(data_addr(), kMagicUninitializedByte, size);
-    if (!IsMMapped()) {
-      RAW_CHECK(memcmp(&size1_, size2_addr(), sizeof(size1_)) == 0, "should hold");
-      RAW_CHECK(memcmp(&magic1_, magic2_addr(), sizeof(magic1_)) == 0, "should hold");
-    }
-  }
-
-  size_t CheckAndClear(int type, size_t given_size) {
-    alloc_map_lock_.Lock();
-    CheckLocked(type);
-    if (!IsMMapped()) {
-      RAW_CHECK(memcmp(&size1_, size2_addr(), sizeof(size1_)) == 0, "should hold");
-    }
-    // record us as deallocated in the map
-    alloc_map_->Insert(data_addr(), type | kDeallocatedTypeBit);
-    alloc_map_lock_.Unlock();
-    // clear us
-    const size_t size = real_size();
-    RAW_CHECK(!given_size || given_size == size1_,
-              "right size must be passed to sized delete");
-    memset(this, kMagicDeletedByte, size);
-    return size;
-  }
-
-  void CheckLocked(int type) const {
-    int map_type = 0;
-    const int* found_type =
-      alloc_map_ != NULL ? alloc_map_->Find(data_addr()) : NULL;
-    if (found_type == NULL) {
-      RAW_LOG(FATAL, "memory allocation bug: object at %p "
-                     "has never been allocated", data_addr());
-    } else {
-      map_type = *found_type;
-    }
-    if ((map_type & kDeallocatedTypeBit) != 0) {
-      RAW_LOG(FATAL, "memory allocation bug: object at %p "
-                     "has been already deallocated (it was allocated with %s)",
-                     data_addr(), AllocName(map_type & ~kDeallocatedTypeBit));
-    }
-    if (alloc_type_ == kMagicDeletedSizeT) {
-      RAW_LOG(FATAL, "memory stomping bug: a word before object at %p "
-                     "has been corrupted; or else the object has been already "
-                     "deallocated and our memory map has been corrupted",
-                     data_addr());
-    }
-    if (!IsValidMagicValue(magic1_)) {
-      RAW_LOG(FATAL, "memory stomping bug: a word before object at %p "
-                     "has been corrupted; "
-                     "or else our memory map has been corrupted and this is a "
-                     "deallocation for not (currently) heap-allocated object",
-                     data_addr());
-    }
-    if (!IsMMapped()) {
-      if (memcmp(&size1_, size2_addr(), sizeof(size1_))) {
-        RAW_LOG(FATAL, "memory stomping bug: a word after object at %p "
-                       "has been corrupted", data_addr());
-      }
-      size_t addr;
-      bit_store(&addr, magic2_addr());
-      if (!IsValidMagicValue(addr)) {
-        RAW_LOG(FATAL, "memory stomping bug: a word after object at %p "
-                "has been corrupted", data_addr());
-      }
-    }
-    if (alloc_type_ != type) {
-      if ((alloc_type_ != MallocBlock::kMallocType) &&
-          (alloc_type_ != MallocBlock::kNewType)    &&
-          (alloc_type_ != MallocBlock::kArrayNewType)) {
-        RAW_LOG(FATAL, "memory stomping bug: a word before object at %p "
-                       "has been corrupted", data_addr());
-      }
-      RAW_LOG(FATAL, "memory allocation/deallocation mismatch at %p: "
-                     "allocated with %s being deallocated with %s",
-                     data_addr(), AllocName(alloc_type_), DeallocName(type));
-    }
-    if (alloc_type_ != map_type) {
-      RAW_LOG(FATAL, "memory stomping bug: our memory map has been corrupted : "
-                     "allocation at %p made with %s "
-                     "is recorded in the map to be made with %s",
-                     data_addr(), AllocName(alloc_type_),  AllocName(map_type));
-    }
-  }
-
- public:  // public accessors
-
-  void* data_addr() { return (void*)&size2_; }
-  const void* data_addr() const { return (const void*)&size2_; }
-
-  static size_t data_offset() { return OFFSETOF_MEMBER(MallocBlock, size2_); }
-
-  size_t data_size() const { return size1_; }
-
-  void set_offset(int offset) { this->offset_ = offset; }
-
- public:  // our main interface
-
-  static MallocBlock* Allocate(size_t size, int type) {
-    // Prevent an integer overflow / crash with large allocation sizes.
-    // TODO - Note that for a e.g. 64-bit size_t, max_size_t may not actually
-    // be the maximum value, depending on how the compiler treats ~0. The worst
-    // practical effect is that allocations are limited to 4Gb or so, even if
-    // the address space could take more.
-    static size_t max_size_t = ~0;
-    if (size > max_size_t - sizeof(MallocBlock)) {
-      RAW_LOG(ERROR, "Massive size passed to malloc: %" PRIuS "", size);
-      return NULL;
-    }
-    MallocBlock* b = NULL;
-    const bool use_malloc_page_fence = FLAGS_malloc_page_fence;
-#ifdef HAVE_MMAP
-    if (use_malloc_page_fence) {
-      // Put the block towards the end of the page and make the next page
-      // inaccessible. This will catch buffer overrun right when it happens.
-      size_t sz = real_mmapped_size(size);
-      int pagesize = getpagesize();
-      int num_pages = (sz + pagesize - 1) / pagesize + 1;
-      char* p = (char*) mmap(NULL, num_pages * pagesize, PROT_READ|PROT_WRITE,
-                             MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-      if (p == MAP_FAILED) {
-        // If the allocation fails, abort rather than returning NULL to
-        // malloc. This is because in most cases, the program will run out
-        // of memory in this mode due to tremendous amount of wastage. There
-        // is no point in propagating the error elsewhere.
-        RAW_LOG(FATAL, "Out of memory: possibly due to page fence overhead: %s",
-                strerror(errno));
-      }
-      // Mark the page after the block inaccessible
-      if (mprotect(p + (num_pages - 1) * pagesize, pagesize, PROT_NONE)) {
-        RAW_LOG(FATAL, "Guard page setup failed: %s", strerror(errno));
-      }
-      b = (MallocBlock*) (p + (num_pages - 1) * pagesize - sz);
-    } else {
-      b = (MallocBlock*) do_malloc(real_malloced_size(size));
-    }
-#else
-    b = (MallocBlock*) do_malloc(real_malloced_size(size));
-#endif
-
-    // It would be nice to output a diagnostic on allocation failure
-    // here, but logging (other than FATAL) requires allocating
-    // memory, which could trigger a nasty recursion. Instead, preserve
-    // malloc semantics and return NULL on failure.
-    if (b != NULL) {
-      b->magic1_ = use_malloc_page_fence ? kMagicMMap : kMagicMalloc;
-      b->Initialize(size, type);
-    }
-    return b;
-  }
-
-  void Deallocate(int type, size_t given_size) {
-    if (IsMMapped()) {  // have to do this before CheckAndClear
-#ifdef HAVE_MMAP
-      int size = CheckAndClear(type, given_size);
-      int pagesize = getpagesize();
-      int num_pages = (size + pagesize - 1) / pagesize + 1;
-      char* p = (char*) this;
-      if (FLAGS_malloc_page_fence_never_reclaim  ||
-          !FLAGS_malloc_reclaim_memory) {
-        mprotect(p - (num_pages - 1) * pagesize + size,
-                 num_pages * pagesize, PROT_NONE);
-      } else {
-        munmap(p - (num_pages - 1) * pagesize + size, num_pages * pagesize);
-      }
-#endif
-    } else {
-      const size_t size = CheckAndClear(type, given_size);
-      if (FLAGS_malloc_reclaim_memory) {
-        // Instead of freeing the block immediately, push it onto a queue of
-        // recently freed blocks.  Free only enough blocks to keep from
-        // exceeding the capacity of the queue or causing the total amount of
-        // un-released memory in the queue from exceeding
-        // FLAGS_max_free_queue_size.
-        ProcessFreeQueue(this, size, FLAGS_max_free_queue_size);
-      }
-    }
-  }
-
-  static size_t FreeQueueSize() {
-    SpinLockHolder l(&free_queue_lock_);
-    return free_queue_size_;
-  }
-
-  static void ProcessFreeQueue(MallocBlock* b, size_t size,
-                               int max_free_queue_size) {
-    // MallocBlockQueueEntry are about 144 in size, so we can only
-    // use a small array of them on the stack.
-    MallocBlockQueueEntry entries[4];
-    int num_entries = 0;
-    MallocBlockQueueEntry new_entry(b, size);
-    free_queue_lock_.Lock();
-    if (free_queue_ == NULL)
-      free_queue_ = new FreeQueue<MallocBlockQueueEntry>;
-    RAW_CHECK(!free_queue_->Full(), "Free queue mustn't be full!");
-
-    if (b != NULL) {
-      free_queue_size_ += size + sizeof(MallocBlockQueueEntry);
-      free_queue_->Push(new_entry);
-    }
-
-    // Free blocks until the total size of unfreed blocks no longer exceeds
-    // max_free_queue_size, and the free queue has at least one free
-    // space in it.
-    while (free_queue_size_ > max_free_queue_size || free_queue_->Full()) {
-      RAW_CHECK(num_entries < arraysize(entries), "entries array overflow");
-      entries[num_entries] = free_queue_->Pop();
-      free_queue_size_ -=
-          entries[num_entries].size + sizeof(MallocBlockQueueEntry);
-      num_entries++;
-      if (num_entries == arraysize(entries)) {
-        // The queue will not be full at this point, so it is ok to
-        // release the lock.  The queue may still contain more than
-        // max_free_queue_size, but this is not a strict invariant.
-        free_queue_lock_.Unlock();
-        for (int i = 0; i < num_entries; i++) {
-          CheckForDanglingWrites(entries[i]);
-          do_free(entries[i].block);
-        }
-        num_entries = 0;
-        free_queue_lock_.Lock();
-      }
-    }
-    RAW_CHECK(free_queue_size_ >= 0, "Free queue size went negative!");
-    free_queue_lock_.Unlock();
-    for (int i = 0; i < num_entries; i++) {
-      CheckForDanglingWrites(entries[i]);
-      do_free(entries[i].block);
-    }
-  }
-
-  static void InitDeletedBuffer() {
-    memset(kMagicDeletedBuffer, kMagicDeletedByte, sizeof(kMagicDeletedBuffer));
-    deleted_buffer_initialized_no_pthreads_ = true;
-  }
-
-  static void CheckForDanglingWrites(const MallocBlockQueueEntry& queue_entry) {
-    // Initialize the buffer if necessary.
-    if (pthread_once)
-      pthread_once(&deleted_buffer_initialized_, &InitDeletedBuffer);
-    if (!deleted_buffer_initialized_no_pthreads_) {
-      // This will be the case on systems that don't link in pthreads,
-      // including on FreeBSD where pthread_once has a non-zero address
-      // (but doesn't do anything) even when pthreads isn't linked in.
-      InitDeletedBuffer();
-    }
-
-    const unsigned char* p =
-        reinterpret_cast<unsigned char*>(queue_entry.block);
-
-    static const size_t size_of_buffer = sizeof(kMagicDeletedBuffer);
-    const size_t size = queue_entry.size;
-    const size_t buffers = size / size_of_buffer;
-    const size_t remainder = size % size_of_buffer;
-    size_t buffer_idx;
-    for (buffer_idx = 0; buffer_idx < buffers; ++buffer_idx) {
-      CheckForCorruptedBuffer(queue_entry, buffer_idx, p, size_of_buffer);
-      p += size_of_buffer;
-    }
-    CheckForCorruptedBuffer(queue_entry, buffer_idx, p, remainder);
-  }
-
-  static void CheckForCorruptedBuffer(const MallocBlockQueueEntry& queue_entry,
-                                      size_t buffer_idx,
-                                      const unsigned char* buffer,
-                                      size_t size_of_buffer) {
-    if (memcmp(buffer, kMagicDeletedBuffer, size_of_buffer) == 0) {
-      return;
-    }
-
-    RAW_LOG(ERROR,
-            "Found a corrupted memory buffer in MallocBlock (may be offset "
-            "from user ptr): buffer index: %zd, buffer ptr: %p, size of "
-            "buffer: %zd", buffer_idx, buffer, size_of_buffer);
-
-    // The magic deleted buffer should only be 1024 bytes, but in case
-    // this changes, let's put an upper limit on the number of debug
-    // lines we'll output:
-    if (size_of_buffer <= 1024) {
-      for (int i = 0; i < size_of_buffer; ++i) {
-        if (buffer[i] != kMagicDeletedByte) {
-          RAW_LOG(ERROR, "Buffer byte %d is 0x%02x (should be 0x%02x).",
-                  i, buffer[i], kMagicDeletedByte);
-        }
-      }
-    } else {
-      RAW_LOG(ERROR, "Buffer too large to print corruption.");
-    }
-
-    const MallocBlock* b = queue_entry.block;
-    const size_t size = queue_entry.size;
-    if (queue_entry.num_deleter_pcs > 0) {
-      TracePrintf(STDERR_FILENO, "Deleted by thread %p\n",
-                  reinterpret_cast<void*>(
-                      PRINTABLE_PTHREAD(queue_entry.deleter_threadid)));
-
-      // We don't want to allocate or deallocate memory here, so we use
-      // placement-new.  It's ok that we don't destroy this, since we're
-      // just going to error-exit below anyway.  Union is for alignment.
-      union { void* alignment; char buf[sizeof(SymbolTable)]; } tablebuf;
-      SymbolTable* symbolization_table = new (tablebuf.buf) SymbolTable;
-      for (int i = 0; i < queue_entry.num_deleter_pcs; i++) {
-        // Symbolizes the previous address of pc because pc may be in the
-        // next function.  This may happen when the function ends with
-        // a call to a function annotated noreturn (e.g. CHECK).
-        char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]);
-        symbolization_table->Add(pc - 1);
-      }
-      if (FLAGS_symbolize_stacktrace)
-        symbolization_table->Symbolize();
-      for (int i = 0; i < queue_entry.num_deleter_pcs; i++) {
-        char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]);
-        TracePrintf(STDERR_FILENO, "    @ %p %s\n",
-                    pc, symbolization_table->GetSymbol(pc - 1));
-      }
-    } else {
-      RAW_LOG(ERROR,
-              "Skipping the printing of the deleter's stack!  Its stack was "
-              "not found; either the corruption occurred too early in "
-              "execution to obtain a stack trace or --max_free_queue_size was "
-              "set to 0.");
-    }
-
-    RAW_LOG(FATAL,
-            "Memory was written to after being freed.  MallocBlock: %p, user "
-            "ptr: %p, size: %zd.  If you can't find the source of the error, "
-            "try using ASan (http://code.google.com/p/address-sanitizer/), "
-            "Valgrind, or Purify, or study the "
-            "output of the deleter's stack printed above.",
-            b, b->data_addr(), size);
-  }
-
-  static MallocBlock* FromRawPointer(void* p) {
-    const size_t data_offset = MallocBlock::data_offset();
-    // Find the header just before client's memory.
-    MallocBlock *mb = reinterpret_cast<MallocBlock *>(
-                reinterpret_cast<char *>(p) - data_offset);
-    // If mb->alloc_type_ is kMagicDeletedSizeT, we're not an ok pointer.
-    if (mb->alloc_type_ == kMagicDeletedSizeT) {
-      RAW_LOG(FATAL, "memory allocation bug: object at %p has been already"
-                     " deallocated; or else a word before the object has been"
-                     " corrupted (memory stomping bug)", p);
-    }
-    // If mb->offset_ is zero (common case), mb is the real header.
-    // If mb->offset_ is non-zero, this block was allocated by debug
-    // memallign implementation, and mb->offset_ is the distance
-    // backwards to the real header from mb, which is a fake header.
-    if (mb->offset_ == 0) {
-      return mb;
-    }
-
-    MallocBlock *main_block = reinterpret_cast<MallocBlock *>(
-      reinterpret_cast<char *>(mb) - mb->offset_);
-
-    if (main_block->offset_ != 0) {
-      RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted."
-              " Need 0 but got %x",
-              (unsigned)(main_block->offset_));
-    }
-    if (main_block >= p) {
-      RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted."
-              " Detected main_block address overflow: %x",
-              (unsigned)(mb->offset_));
-    }
-    if (main_block->size2_addr() < p) {
-      RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted."
-              " It points below it's own main_block: %x",
-              (unsigned)(mb->offset_));
-    }
-
-    return main_block;
-  }
-
-  static const MallocBlock* FromRawPointer(const void* p) {
-    // const-safe version: we just cast about
-    return FromRawPointer(const_cast<void*>(p));
-  }
-
-  void Check(int type) const {
-    alloc_map_lock_.Lock();
-    CheckLocked(type);
-    alloc_map_lock_.Unlock();
-  }
-
-  static bool CheckEverything() {
-    alloc_map_lock_.Lock();
-    if (alloc_map_ != NULL)  alloc_map_->Iterate(CheckCallback, 0);
-    alloc_map_lock_.Unlock();
-    return true;  // if we get here, we're okay
-  }
-
-  static bool MemoryStats(int* blocks, size_t* total,
-                          int histogram[kMallocHistogramSize]) {
-    memset(histogram, 0, kMallocHistogramSize * sizeof(int));
-    alloc_map_lock_.Lock();
-    stats_blocks_ = 0;
-    stats_total_ = 0;
-    stats_histogram_ = histogram;
-    if (alloc_map_ != NULL) alloc_map_->Iterate(StatsCallback, 0);
-    *blocks = stats_blocks_;
-    *total = stats_total_;
-    alloc_map_lock_.Unlock();
-    return true;
-  }
-
- private:  // helpers for CheckEverything and MemoryStats
-
-  static void CheckCallback(const void* ptr, int* type, int dummy) {
-    if ((*type & kDeallocatedTypeBit) == 0) {
-      FromRawPointer(ptr)->CheckLocked(*type);
-    }
-  }
-
-  // Accumulation variables for StatsCallback protected by alloc_map_lock_
-  static int stats_blocks_;
-  static size_t stats_total_;
-  static int* stats_histogram_;
-
-  static void StatsCallback(const void* ptr, int* type, int dummy) {
-    if ((*type & kDeallocatedTypeBit) == 0) {
-      const MallocBlock* b = FromRawPointer(ptr);
-      b->CheckLocked(*type);
-      ++stats_blocks_;
-      size_t mysize = b->size1_;
-      int entry = 0;
-      stats_total_ += mysize;
-      while (mysize) {
-        ++entry;
-        mysize >>= 1;
-      }
-      RAW_CHECK(entry < kMallocHistogramSize,
-                "kMallocHistogramSize should be at least as large as log2 "
-                "of the maximum process memory size");
-      stats_histogram_[entry] += 1;
-    }
-  }
-};
-
-void DanglingWriteChecker() {
-  // Clear out the remaining free queue to check for dangling writes.
-  MallocBlock::ProcessFreeQueue(NULL, 0, 0);
-}
-
-// ========================================================================= //
-
-const size_t MallocBlock::kMagicMalloc;
-const size_t MallocBlock::kMagicMMap;
-
-MallocBlock::AllocMap* MallocBlock::alloc_map_ = NULL;
-SpinLock MallocBlock::alloc_map_lock_(SpinLock::LINKER_INITIALIZED);
-
-FreeQueue<MallocBlockQueueEntry>* MallocBlock::free_queue_ = NULL;
-size_t MallocBlock::free_queue_size_ = 0;
-SpinLock MallocBlock::free_queue_lock_(SpinLock::LINKER_INITIALIZED);
-
-unsigned char MallocBlock::kMagicDeletedBuffer[1024];
-pthread_once_t MallocBlock::deleted_buffer_initialized_ = PTHREAD_ONCE_INIT;
-bool MallocBlock::deleted_buffer_initialized_no_pthreads_ = false;
-
-const char* const MallocBlock::kAllocName[] = {
-  "malloc",
-  "new",
-  "new []",
-  NULL,
-};
-
-const char* const MallocBlock::kDeallocName[] = {
-  "free",
-  "delete",
-  "delete []",
-  NULL,
-};
-
-int MallocBlock::stats_blocks_;
-size_t MallocBlock::stats_total_;
-int* MallocBlock::stats_histogram_;
-
-// ========================================================================= //
-
-// The following cut-down version of printf() avoids
-// using stdio or ostreams.
-// This is to guarantee no recursive calls into
-// the allocator and to bound the stack space consumed.  (The pthread
-// manager thread in linuxthreads has a very small stack,
-// so fprintf can't be called.)
-static void TracePrintf(int fd, const char *fmt, ...) {
-  char buf[64];
-  int i = 0;
-  va_list ap;
-  va_start(ap, fmt);
-  const char *p = fmt;
-  char numbuf[25];
-  if (fd < 0) {
-    return;
-  }
-  numbuf[sizeof(numbuf)-1] = 0;
-  while (*p != '\0') {              // until end of format string
-    char *s = &numbuf[sizeof(numbuf)-1];
-    if (p[0] == '%' && p[1] != 0) {  // handle % formats
-      int64 l = 0;
-      unsigned long base = 0;
-      if (*++p == 's') {                            // %s
-        s = va_arg(ap, char *);
-      } else if (*p == 'l' && p[1] == 'd') {        // %ld
-        l = va_arg(ap, long);
-        base = 10;
-        p++;
-      } else if (*p == 'l' && p[1] == 'u') {        // %lu
-        l = va_arg(ap, unsigned long);
-        base = 10;
-        p++;
-      } else if (*p == 'z' && p[1] == 'u') {        // %zu
-        l = va_arg(ap, size_t);
-        base = 10;
-        p++;
-      } else if (*p == 'u') {                       // %u
-        l = va_arg(ap, unsigned int);
-        base = 10;
-      } else if (*p == 'd') {                       // %d
-        l = va_arg(ap, int);
-        base = 10;
-      } else if (*p == 'p') {                       // %p
-        l = va_arg(ap, intptr_t);
-        base = 16;
-      } else {
-        write(STDERR_FILENO, "Unimplemented TracePrintf format\n", 33);
-        write(STDERR_FILENO, p, 2);
-        write(STDERR_FILENO, "\n", 1);
-        abort();
-      }
-      p++;
-      if (base != 0) {
-        bool minus = (l < 0 && base == 10);
-        uint64 ul = minus? -l : l;
-        do {
-          *--s = "0123456789abcdef"[ul % base];
-          ul /= base;
-        } while (ul != 0);
-        if (base == 16) {
-          *--s = 'x';
-          *--s = '0';
-        } else if (minus) {
-          *--s = '-';
-        }
-      }
-    } else {                        // handle normal characters
-      *--s = *p++;
-    }
-    while (*s != 0) {
-      if (i == sizeof(buf)) {
-        write(fd, buf, i);
-        i = 0;
-      }
-      buf[i++] = *s++;
-    }
-  }
-  if (i != 0) {
-    write(fd, buf, i);
-  }
-  va_end(ap);
-}
-
-// Return the file descriptor we're writing a log to
-static int TraceFd() {
-  static int trace_fd = -1;
-  if (trace_fd == -1) {            // Open the trace file on the first call
-    const char *val = getenv("TCMALLOC_TRACE_FILE");
-    bool fallback_to_stderr = false;
-    if (!val) {
-      val = "/tmp/google.alloc";
-      fallback_to_stderr = true;
-    }
-    trace_fd = open(val, O_CREAT|O_TRUNC|O_WRONLY, 0666);
-    if (trace_fd == -1) {
-      if (fallback_to_stderr) {
-        trace_fd = 2;
-        TracePrintf(trace_fd, "Can't open %s.  Logging to stderr.\n", val);
-      } else {
-        TracePrintf(2, "Can't open %s.  Logging disabled.\n", val);
-      }
-    }
-    // Add a header to the log.
-    TracePrintf(trace_fd, "Trace started: %lu\n",
-                static_cast<unsigned long>(time(NULL)));
-    TracePrintf(trace_fd,
-                "func\tsize\tptr\tthread_id\tstack pcs for tools/symbolize\n");
-  }
-  return trace_fd;
-}
-
-// Print the hex stack dump on a single line.   PCs are separated by tabs.
-static void TraceStack(void) {
-  void *pcs[16];
-  int n = GetStackTrace(pcs, sizeof(pcs)/sizeof(pcs[0]), 0);
-  for (int i = 0; i != n; i++) {
-    TracePrintf(TraceFd(), "\t%p", pcs[i]);
-  }
-}
-
-// This protects MALLOC_TRACE, to make sure its info is atomically written.
-static SpinLock malloc_trace_lock(SpinLock::LINKER_INITIALIZED);
-
-#define MALLOC_TRACE(name, size, addr)                                  \
-  do {                                                                  \
-    if (FLAGS_malloctrace) {                                            \
-      SpinLockHolder l(&malloc_trace_lock);                             \
-      TracePrintf(TraceFd(), "%s\t%" PRIuS "\t%p\t%" GPRIuPTHREAD,      \
-                  name, size, addr, PRINTABLE_PTHREAD(pthread_self())); \
-      TraceStack();                                                     \
-      TracePrintf(TraceFd(), "\n");                                     \
-    }                                                                   \
-  } while (0)
-
-// ========================================================================= //
-
-// Write the characters buf[0, ..., size-1] to
-// the malloc trace buffer.
-// This function is intended for debugging,
-// and is not declared in any header file.
-// You must insert a declaration of it by hand when you need
-// to use it.
-void __malloctrace_write(const char *buf, size_t size) {
-  if (FLAGS_malloctrace) {
-    write(TraceFd(), buf, size);
-  }
-}
-
-// ========================================================================= //
-
-// General debug allocation/deallocation
-
-static inline void* DebugAllocate(size_t size, int type) {
-  MallocBlock* ptr = MallocBlock::Allocate(size, type);
-  if (ptr == NULL)  return NULL;
-  MALLOC_TRACE("malloc", size, ptr->data_addr());
-  return ptr->data_addr();
-}
-
-static inline void DebugDeallocate(void* ptr, int type, size_t given_size) {
-  MALLOC_TRACE("free",
-               (ptr != 0 ? MallocBlock::FromRawPointer(ptr)->data_size() : 0),
-               ptr);
-  if (ptr)  MallocBlock::FromRawPointer(ptr)->Deallocate(type, given_size);
-}
-
-// ========================================================================= //
-
-// The following functions may be called via MallocExtension::instance()
-// for memory verification and statistics.
-class DebugMallocImplementation : public TCMallocImplementation {
- public:
-  virtual bool GetNumericProperty(const char* name, size_t* value) {
-    bool result = TCMallocImplementation::GetNumericProperty(name, value);
-    if (result && (strcmp(name, "generic.current_allocated_bytes") == 0)) {
-      // Subtract bytes kept in the free queue
-      size_t qsize = MallocBlock::FreeQueueSize();
-      if (*value >= qsize) {
-        *value -= qsize;
-      }
-    }
-    return result;
-  }
-
-  virtual bool VerifyNewMemory(const void* p) {
-    if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType);
-    return true;
-  }
-
-  virtual bool VerifyArrayNewMemory(const void* p) {
-    if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType);
-    return true;
-  }
-
-  virtual bool VerifyMallocMemory(const void* p) {
-    if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType);
-    return true;
-  }
-
-  virtual bool VerifyAllMemory() {
-    return MallocBlock::CheckEverything();
-  }
-
-  virtual bool MallocMemoryStats(int* blocks, size_t* total,
-                                 int histogram[kMallocHistogramSize]) {
-    return MallocBlock::MemoryStats(blocks, total, histogram);
-  }
-
-  virtual size_t GetEstimatedAllocatedSize(size_t size) {
-    return size;
-  }
-
-  virtual size_t GetAllocatedSize(const void* p) {
-    if (p) {
-      RAW_CHECK(GetOwnership(p) != MallocExtension::kNotOwned,
-                "ptr not allocated by tcmalloc");
-      return MallocBlock::FromRawPointer(p)->data_size();
-    }
-    return 0;
-  }
-
-  virtual MallocExtension::Ownership GetOwnership(const void* p) {
-    if (!p) {
-      // nobody owns NULL
-      return MallocExtension::kNotOwned;
-    }
-
-    // FIXME: note that correct GetOwnership should not touch memory
-    // that is not owned by tcmalloc. Main implementation is using
-    // pagemap to discover if page in question is owned by us or
-    // not. But pagemap only has marks for first and last page of
-    // spans.  Note that if p was returned out of our memalign with
-    // big alignment, then it will point outside of marked pages. Also
-    // note that FromRawPointer call below requires touching memory
-    // before pointer in order to handle memalign-ed chunks
-    // (offset_). This leaves us with two options:
-    //
-    // * do FromRawPointer first and have possibility of crashing if
-    //   we're given not owned pointer
-    //
-    // * return incorrect ownership for those large memalign chunks
-    //
-    // I've decided to choose later, which appears to happen rarer and
-    // therefore is arguably a lesser evil
-
-    MallocExtension::Ownership rv = TCMallocImplementation::GetOwnership(p);
-    if (rv != MallocExtension::kOwned) {
-      return rv;
-    }
-
-    const MallocBlock* mb = MallocBlock::FromRawPointer(p);
-    return TCMallocImplementation::GetOwnership(mb);
-  }
-
-  virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) {
-    static const char* kDebugFreeQueue = "debug.free_queue";
-
-    TCMallocImplementation::GetFreeListSizes(v);
-
-    MallocExtension::FreeListInfo i;
-    i.type = kDebugFreeQueue;
-    i.min_object_size = 0;
-    i.max_object_size = numeric_limits<size_t>::max();
-    i.total_bytes_free = MallocBlock::FreeQueueSize();
-    v->push_back(i);
-  }
-
- };
-
-static union {
-  char chars[sizeof(DebugMallocImplementation)];
-  void *ptr;
-} debug_malloc_implementation_space;
-
-REGISTER_MODULE_INITIALIZER(debugallocation, {
-#if (__cplusplus >= 201103L)
-    COMPILE_ASSERT(alignof(debug_malloc_implementation_space) >= alignof(DebugMallocImplementation),
-                   debug_malloc_implementation_space_is_not_properly_aligned);
-#endif
-  // Either we or valgrind will control memory management.  We
-  // register our extension if we're the winner. Otherwise let
-  // Valgrind use its own malloc (so don't register our extension).
-  if (!RunningOnValgrind()) {
-    DebugMallocImplementation *impl = new (debug_malloc_implementation_space.chars) DebugMallocImplementation();
-    MallocExtension::Register(impl);
-  }
-});
-
-REGISTER_MODULE_DESTRUCTOR(debugallocation, {
-  if (!RunningOnValgrind()) {
-    // When the program exits, check all blocks still in the free
-    // queue for corruption.
-    DanglingWriteChecker();
-  }
-});
-
-// ========================================================================= //
-
-struct debug_alloc_retry_data {
-  size_t size;
-  int new_type;
-};
-
-static void *retry_debug_allocate(void *arg) {
-  debug_alloc_retry_data *data = static_cast<debug_alloc_retry_data *>(arg);
-  return DebugAllocate(data->size, data->new_type);
-}
-
-// This is mostly the same a cpp_alloc in tcmalloc.cc.
-// TODO(csilvers): change Allocate() above to call cpp_alloc, so we
-// don't have to reproduce the logic here.  To make tc_new_mode work
-// properly, I think we'll need to separate out the logic of throwing
-// from the logic of calling the new-handler.
-inline void* debug_cpp_alloc(size_t size, int new_type, bool nothrow) {
-  void* p = DebugAllocate(size, new_type);
-  if (p != NULL) {
-    return p;
-  }
-  struct debug_alloc_retry_data data;
-  data.size = size;
-  data.new_type = new_type;
-  return handle_oom(retry_debug_allocate, &data,
-                    true, nothrow);
-}
-
-inline void* do_debug_malloc_or_debug_cpp_alloc(size_t size) {
-  void* p = DebugAllocate(size, MallocBlock::kMallocType);
-  if (p != NULL) {
-    return p;
-  }
-  struct debug_alloc_retry_data data;
-  data.size = size;
-  data.new_type = MallocBlock::kMallocType;
-  return handle_oom(retry_debug_allocate, &data,
-                    false, true);
-}
-
-// Exported routines
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW {
-  if (ThreadCache::IsUseEmergencyMalloc()) {
-    return tcmalloc::EmergencyMalloc(size);
-  }
-  void* ptr = do_debug_malloc_or_debug_cpp_alloc(size);
-  MallocHook::InvokeNewHook(ptr, size);
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW {
-  if (tcmalloc::IsEmergencyPtr(ptr)) {
-    return tcmalloc::EmergencyFree(ptr);
-  }
-  MallocHook::InvokeDeleteHook(ptr);
-  DebugDeallocate(ptr, MallocBlock::kMallocType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(ptr);
-  DebugDeallocate(ptr, MallocBlock::kMallocType, size);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t count, size_t size) PERFTOOLS_THROW {
-  if (ThreadCache::IsUseEmergencyMalloc()) {
-    return tcmalloc::EmergencyCalloc(count, size);
-  }
-  // Overflow check
-  const size_t total_size = count * size;
-  if (size != 0 && total_size / size != count) return NULL;
-
-  void* block = do_debug_malloc_or_debug_cpp_alloc(total_size);
-  MallocHook::InvokeNewHook(block, total_size);
-  if (block)  memset(block, 0, total_size);
-  return block;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW {
-  if (tcmalloc::IsEmergencyPtr(ptr)) {
-    return tcmalloc::EmergencyFree(ptr);
-  }
-  MallocHook::InvokeDeleteHook(ptr);
-  DebugDeallocate(ptr, MallocBlock::kMallocType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW {
-  if (tcmalloc::IsEmergencyPtr(ptr)) {
-    return tcmalloc::EmergencyRealloc(ptr, size);
-  }
-  if (ptr == NULL) {
-    ptr = do_debug_malloc_or_debug_cpp_alloc(size);
-    MallocHook::InvokeNewHook(ptr, size);
-    return ptr;
-  }
-  if (size == 0) {
-    MallocHook::InvokeDeleteHook(ptr);
-    DebugDeallocate(ptr, MallocBlock::kMallocType, 0);
-    return NULL;
-  }
-  MallocBlock* old = MallocBlock::FromRawPointer(ptr);
-  old->Check(MallocBlock::kMallocType);
-  MallocBlock* p = MallocBlock::Allocate(size, MallocBlock::kMallocType);
-
-  // If realloc fails we are to leave the old block untouched and
-  // return null
-  if (p == NULL)  return NULL;
-
-  // if ptr was allocated via memalign, then old->data_size() is not
-  // start of user data. So we must be careful to copy only user-data
-  char *old_begin = (char *)old->data_addr();
-  char *old_end = old_begin + old->data_size();
-
-  ssize_t old_ssize = old_end - (char *)ptr;
-  CHECK_CONDITION(old_ssize >= 0);
-
-  size_t old_size = (size_t)old_ssize;
-  CHECK_CONDITION(old_size <= old->data_size());
-
-  memcpy(p->data_addr(), ptr, (old_size < size) ? old_size : size);
-  MallocHook::InvokeDeleteHook(ptr);
-  MallocHook::InvokeNewHook(p->data_addr(), size);
-  DebugDeallocate(ptr, MallocBlock::kMallocType, 0);
-  MALLOC_TRACE("realloc", p->data_size(), p->data_addr());
-  return p->data_addr();
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) {
-  void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, false);
-  MallocHook::InvokeNewHook(ptr, size);
-  if (ptr == NULL) {
-    RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new failed.", size);
-  }
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW {
-  void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, true);
-  MallocHook::InvokeNewHook(ptr, size);
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kNewType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw() {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kNewType, size);
-}
-
-// Some STL implementations explicitly invoke this.
-// It is completely equivalent to a normal delete (delete never throws).
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kNewType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) {
-  void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, false);
-  MallocHook::InvokeNewHook(ptr, size);
-  if (ptr == NULL) {
-    RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new[] failed.", size);
-  }
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&)
-    PERFTOOLS_THROW {
-  void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, true);
-  MallocHook::InvokeNewHook(ptr, size);
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kArrayNewType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw() {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kArrayNewType, size);
-}
-
-// Some STL implementations explicitly invoke this.
-// It is completely equivalent to a normal delete (delete never throws).
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kArrayNewType, 0);
-}
-
-// This is mostly the same as do_memalign in tcmalloc.cc.
-static void *do_debug_memalign(size_t alignment, size_t size) {
-  // Allocate >= size bytes aligned on "alignment" boundary
-  // "alignment" is a power of two.
-  void *p = 0;
-  RAW_CHECK((alignment & (alignment-1)) == 0, "must be power of two");
-  const size_t data_offset = MallocBlock::data_offset();
-  // Allocate "alignment-1" extra bytes to ensure alignment is possible, and
-  // a further data_offset bytes for an additional fake header.
-  size_t extra_bytes = data_offset + alignment - 1;
-  if (size + extra_bytes < size) return NULL;         // Overflow
-  p = DebugAllocate(size + extra_bytes, MallocBlock::kMallocType);
-  if (p != 0) {
-    intptr_t orig_p = reinterpret_cast<intptr_t>(p);
-    // Leave data_offset bytes for fake header, and round up to meet
-    // alignment.
-    p = reinterpret_cast<void *>(RoundUp(orig_p + data_offset, alignment));
-    // Create a fake header block with an offset_ that points back to the
-    // real header.  FromRawPointer uses this value.
-    MallocBlock *fake_hdr = reinterpret_cast<MallocBlock *>(
-                reinterpret_cast<char *>(p) - data_offset);
-    // offset_ is distance between real and fake headers.
-    // p is now end of fake header (beginning of client area),
-    // and orig_p is the end of the real header, so offset_
-    // is their difference.
-    //
-    // Note that other fields of fake_hdr are initialized with
-    // kMagicUninitializedByte
-    fake_hdr->set_offset(reinterpret_cast<intptr_t>(p) - orig_p);
-  }
-  return p;
-}
-
-struct memalign_retry_data {
-  size_t align;
-  size_t size;
-};
-
-static void *retry_debug_memalign(void *arg) {
-  memalign_retry_data *data = static_cast<memalign_retry_data *>(arg);
-  return do_debug_memalign(data->align, data->size);
-}
-
-inline void* do_debug_memalign_or_debug_cpp_memalign(size_t align,
-                                                     size_t size) {
-  void* p = do_debug_memalign(align, size);
-  if (p != NULL) {
-    return p;
-  }
-
-  struct memalign_retry_data data;
-  data.align = align;
-  data.size = size;
-  return handle_oom(retry_debug_memalign, &data,
-                    false, true);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, size_t size) PERFTOOLS_THROW {
-  void *p = do_debug_memalign_or_debug_cpp_memalign(align, size);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-// Implementation taken from tcmalloc/tcmalloc.cc
-extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(void** result_ptr, size_t align, size_t size)
-    PERFTOOLS_THROW {
-  if (((align % sizeof(void*)) != 0) ||
-      ((align & (align - 1)) != 0) ||
-      (align == 0)) {
-    return EINVAL;
-  }
-
-  void* result = do_debug_memalign_or_debug_cpp_memalign(align, size);
-  MallocHook::InvokeNewHook(result, size);
-  if (result == NULL) {
-    return ENOMEM;
-  } else {
-    *result_ptr = result;
-    return 0;
-  }
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) PERFTOOLS_THROW {
-  // Allocate >= size bytes starting on a page boundary
-  void *p = do_debug_memalign_or_debug_cpp_memalign(getpagesize(), size);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) PERFTOOLS_THROW {
-  // Round size up to a multiple of pages
-  // then allocate memory on a page boundary
-  int pagesize = getpagesize();
-  size = RoundUp(size, pagesize);
-  if (size == 0) {     // pvalloc(0) should allocate one page, according to
-    size = pagesize;   // http://man.free4web.biz/man3/libmpatrol.3.html
-  }
-  void *p = do_debug_memalign_or_debug_cpp_memalign(pagesize, size);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-// malloc_stats just falls through to the base implementation.
-extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW {
-  do_malloc_stats();
-}
-
-extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW {
-  return do_mallopt(cmd, value);
-}
-
-#ifdef HAVE_STRUCT_MALLINFO
-extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW {
-  return do_mallinfo();
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW {
-  return MallocExtension::instance()->GetAllocatedSize(ptr);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW {
-  void* result = DebugAllocate(size, MallocBlock::kMallocType);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-#pragma GCC diagnostic pop
diff --git a/contrib/libtcmalloc/src/getenv_safe.h b/contrib/libtcmalloc/src/getenv_safe.h
deleted file mode 100644
index 3b9f4dbbcb2..00000000000
--- a/contrib/libtcmalloc/src/getenv_safe.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
- * Copyright (c) 2014, gperftools Contributors
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef GETENV_SAFE_H
-#define GETENV_SAFE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* 
- * This getenv function is safe to call before the C runtime is initialized.
- * On Windows, it utilizes GetEnvironmentVariable() and on unix it uses
- * /proc/self/environ instead calling getenv().  It's intended to be used in
- * routines that run before main(), when the state required for getenv() may
- * not be set up yet.  In particular, errno isn't set up until relatively late
- * (after the pthreads library has a chance to make it threadsafe), and
- * getenv() doesn't work until then.
- * On some platforms, this call will utilize the same, static buffer for
- * repeated GetenvBeforeMain() calls. Callers should not expect pointers from
- * this routine to be long lived.
- * Note that on unix, /proc only has the environment at the time the
- * application was started, so this routine ignores setenv() calls/etc.  Also
- * note it only reads the first 16K of the environment.
- * 
- * NOTE: this is version of GetenvBeforeMain that's usable from
- * C. Implementation is in sysinfo.cc
- */
-const char* TCMallocGetenvSafe(const char* name);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/contrib/libtcmalloc/src/getpc.h b/contrib/libtcmalloc/src/getpc.h
deleted file mode 100644
index 163873eabc6..00000000000
--- a/contrib/libtcmalloc/src/getpc.h
+++ /dev/null
@@ -1,192 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein
-//
-// This is an internal header file used by profiler.cc.  It defines
-// the single (inline) function GetPC.  GetPC is used in a signal
-// handler to figure out the instruction that was being executed when
-// the signal-handler was triggered.
-//
-// To get this, we use the ucontext_t argument to the signal-handler
-// callback, which holds the full context of what was going on when
-// the signal triggered.  How to get from a ucontext_t to a Program
-// Counter is OS-dependent.
-
-#ifndef BASE_GETPC_H_
-#define BASE_GETPC_H_
-
-#include "config.h"
-
-// On many linux systems, we may need _GNU_SOURCE to get access to
-// the defined constants that define the register we want to see (eg
-// REG_EIP).  Note this #define must come first!
-#define _GNU_SOURCE 1
-// If #define _GNU_SOURCE causes problems, this might work instead.
-// It will cause problems for FreeBSD though!, because it turns off
-// the needed __BSD_VISIBLE.
-//#define _XOPEN_SOURCE 500
-
-#include <string.h>         // for memcmp
-#if defined(HAVE_SYS_UCONTEXT_H)
-#include <sys/ucontext.h>
-#elif defined(HAVE_UCONTEXT_H)
-#include <ucontext.h>       // for ucontext_t (and also mcontext_t)
-#elif defined(HAVE_CYGWIN_SIGNAL_H)
-#include <cygwin/signal.h>
-typedef ucontext ucontext_t;
-#endif
-
-
-// Take the example where function Foo() calls function Bar().  For
-// many architectures, Bar() is responsible for setting up and tearing
-// down its own stack frame.  In that case, it's possible for the
-// interrupt to happen when execution is in Bar(), but the stack frame
-// is not properly set up (either before it's done being set up, or
-// after it's been torn down but before Bar() returns).  In those
-// cases, the stack trace cannot see the caller function anymore.
-//
-// GetPC can try to identify this situation, on architectures where it
-// might occur, and unwind the current function call in that case to
-// avoid false edges in the profile graph (that is, edges that appear
-// to show a call skipping over a function).  To do this, we hard-code
-// in the asm instructions we might see when setting up or tearing
-// down a stack frame.
-//
-// This is difficult to get right: the instructions depend on the
-// processor, the compiler ABI, and even the optimization level.  This
-// is a best effort patch -- if we fail to detect such a situation, or
-// mess up the PC, nothing happens; the returned PC is not used for
-// any further processing.
-struct CallUnrollInfo {
-  // Offset from (e)ip register where this instruction sequence
-  // should be matched. Interpreted as bytes. Offset 0 is the next
-  // instruction to execute. Be extra careful with negative offsets in
-  // architectures of variable instruction length (like x86) - it is
-  // not that easy as taking an offset to step one instruction back!
-  int pc_offset;
-  // The actual instruction bytes. Feel free to make it larger if you
-  // need a longer sequence.
-  unsigned char ins[16];
-  // How many bytes to match from ins array?
-  int ins_size;
-  // The offset from the stack pointer (e)sp where to look for the
-  // call return address. Interpreted as bytes.
-  int return_sp_offset;
-};
-
-
-// The dereferences needed to get the PC from a struct ucontext were
-// determined at configure time, and stored in the macro
-// PC_FROM_UCONTEXT in config.h.  The only thing we need to do here,
-// then, is to do the magic call-unrolling for systems that support it.
-
-// -- Special case 1: linux x86, for which we have CallUnrollInfo
-#if defined(__linux) && defined(__i386) && defined(__GNUC__)
-static const CallUnrollInfo callunrollinfo[] = {
-  // Entry to a function:  push %ebp;  mov  %esp,%ebp
-  // Top-of-stack contains the caller IP.
-  { 0,
-    {0x55, 0x89, 0xe5}, 3,
-    0
-  },
-  // Entry to a function, second instruction:  push %ebp;  mov  %esp,%ebp
-  // Top-of-stack contains the old frame, caller IP is +4.
-  { -1,
-    {0x55, 0x89, 0xe5}, 3,
-    4
-  },
-  // Return from a function: RET.
-  // Top-of-stack contains the caller IP.
-  { 0,
-    {0xc3}, 1,
-    0
-  }
-};
-
-inline void* GetPC(const ucontext_t& signal_ucontext) {
-  // See comment above struct CallUnrollInfo.  Only try instruction
-  // flow matching if both eip and esp looks reasonable.
-  const int eip = signal_ucontext.uc_mcontext.gregs[REG_EIP];
-  const int esp = signal_ucontext.uc_mcontext.gregs[REG_ESP];
-  if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 &&
-      (esp & 0xffff0000) != 0) {
-    char* eip_char = reinterpret_cast<char*>(eip);
-    for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) {
-      if (!memcmp(eip_char + callunrollinfo[i].pc_offset,
-                  callunrollinfo[i].ins, callunrollinfo[i].ins_size)) {
-        // We have a match.
-        void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset);
-        return *retaddr;
-      }
-    }
-  }
-  return (void*)eip;
-}
-
-// Special case #2: Windows, which has to do something totally different.
-#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
-// If this is ever implemented, probably the way to do it is to have
-// profiler.cc use a high-precision timer via timeSetEvent:
-//    http://msdn2.microsoft.com/en-us/library/ms712713.aspx
-// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC.
-// The callback function would be something like prof_handler, but
-// alas the arguments are different: no ucontext_t!  I don't know
-// how we'd get the PC (using StackWalk64?)
-//    http://msdn2.microsoft.com/en-us/library/ms680650.aspx
-
-#include "base/logging.h"   // for RAW_LOG
-#ifndef HAVE_CYGWIN_SIGNAL_H
-typedef int ucontext_t;
-#endif
-
-inline void* GetPC(const struct ucontext_t& signal_ucontext) {
-  RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n");
-  return NULL;
-}
-
-// Normal cases.  If this doesn't compile, it's probably because
-// PC_FROM_UCONTEXT is the empty string.  You need to figure out
-// the right value for your system, and add it to the list in
-// configure.ac (or set it manually in your config.h).
-#else
-inline void* GetPC(const ucontext_t& signal_ucontext) {
-#if defined(__s390__) && !defined(__s390x__)
-  // Mask out the AMODE31 bit from the PC recorded in the context.
-  return (void*)((unsigned long)signal_ucontext.PC_FROM_UCONTEXT & 0x7fffffffUL);
-#else
-  return (void*)signal_ucontext.PC_FROM_UCONTEXT;   // defined in config.h
-#endif
-}
-
-#endif
-
-#endif  // BASE_GETPC_H_
diff --git a/contrib/libtcmalloc/src/heap-checker-bcad.cc b/contrib/libtcmalloc/src/heap-checker-bcad.cc
deleted file mode 100644
index 00efdb7cfd4..00000000000
--- a/contrib/libtcmalloc/src/heap-checker-bcad.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// All Rights Reserved.
-//
-// Author: Maxim Lifantsev
-//
-// A file to ensure that components of heap leak checker run before
-// all global object constructors and after all global object
-// destructors.
-//
-// This file must be the last library any binary links against.
-// Otherwise, the heap checker may not be able to run early enough to
-// catalog all the global objects in your program.  If this happens,
-// and later in the program you allocate memory and have one of these
-// "uncataloged" global objects point to it, the heap checker will
-// consider that allocation to be a leak, even though it's not (since
-// the allocated object is reachable from global data and hence "live").
-
-#include <stdlib.h>      // for abort()
-#include <gperftools/malloc_extension.h>
-
-// A dummy variable to refer from heap-checker.cc.  This is to make
-// sure this file is not optimized out by the linker.
-bool heap_leak_checker_bcad_variable;
-
-extern void HeapLeakChecker_AfterDestructors();  // in heap-checker.cc
-
-// A helper class to ensure that some components of heap leak checking
-// can happen before construction and after destruction
-// of all global/static objects.
-class HeapLeakCheckerGlobalPrePost {
- public:
-  HeapLeakCheckerGlobalPrePost() {
-    if (count_ == 0) {
-      // The 'new int' will ensure that we have run an initial malloc
-      // hook, which will set up the heap checker via
-      // MallocHook_InitAtFirstAllocation_HeapLeakChecker.  See malloc_hook.cc.
-      // This is done in this roundabout fashion in order to avoid self-deadlock
-      // if we directly called HeapLeakChecker_BeforeConstructors here.
-      delete new int;
-      // This needs to be called before the first allocation of an STL
-      // object, but after libc is done setting up threads (because it
-      // calls setenv, which requires a thread-aware errno).  By
-      // putting it here, we hope it's the first bit of code executed
-      // after the libc global-constructor code.
-      MallocExtension::Initialize();
-    }
-    ++count_;
-  }
-  ~HeapLeakCheckerGlobalPrePost() {
-    if (count_ <= 0)  abort();
-    --count_;
-    if (count_ == 0)  HeapLeakChecker_AfterDestructors();
-  }
- private:
-  // Counter of constructions/destructions of objects of this class
-  // (just in case there are more than one of them).
-  static int count_;
-};
-
-int HeapLeakCheckerGlobalPrePost::count_ = 0;
-
-// The early-construction/late-destruction global object.
-static const HeapLeakCheckerGlobalPrePost heap_leak_checker_global_pre_post;
diff --git a/contrib/libtcmalloc/src/heap-checker.cc b/contrib/libtcmalloc/src/heap-checker.cc
deleted file mode 100644
index 9c82dea08e4..00000000000
--- a/contrib/libtcmalloc/src/heap-checker.cc
+++ /dev/null
@@ -1,2388 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// All Rights Reserved.
-//
-// Author: Maxim Lifantsev
-//
-
-#include "config.h"
-
-#include <fcntl.h>    // for O_RDONLY (we use syscall to do actual reads)
-#include <string.h>
-#include <errno.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-#ifdef HAVE_PTHREAD
-#include <pthread.h>
-#endif
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <time.h>
-#include <assert.h>
-
-#if defined(HAVE_LINUX_PTRACE_H)
-#include <linux/ptrace.h>
-#endif
-#ifdef HAVE_SYS_SYSCALL_H
-#include <sys/syscall.h>
-#endif
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
-#include <wtypes.h>
-#include <winbase.h>
-#undef ERROR     // windows defines these as macros, which can cause trouble
-#undef max
-#undef min
-#endif
-
-#include <string>
-#include <vector>
-#include <map>
-#include <set>
-#include <algorithm>
-#include <functional>
-
-#include <gperftools/heap-checker.h>
-
-#include "base/basictypes.h"
-#include "base/googleinit.h"
-#include "base/logging.h"
-#include <gperftools/stacktrace.h>
-#include "base/commandlineflags.h"
-#include "base/elfcore.h"              // for i386_regs
-#include "base/thread_lister.h"
-#include "heap-profile-table.h"
-#include "base/low_level_alloc.h"
-#include "malloc_hook-inl.h"
-#include <gperftools/malloc_hook.h>
-#include <gperftools/malloc_extension.h>
-#include "maybe_threads.h"
-#include "memory_region_map.h"
-#include "base/spinlock.h"
-#include "base/sysinfo.h"
-#include "base/stl_allocator.h"
-
-using std::string;
-using std::basic_string;
-using std::pair;
-using std::map;
-using std::set;
-using std::vector;
-using std::swap;
-using std::make_pair;
-using std::min;
-using std::max;
-using std::less;
-using std::char_traits;
-
-// If current process is being ptrace()d, 'TracerPid' in /proc/self/status
-// will be non-zero.
-static bool IsDebuggerAttached(void) {    // only works under linux, probably
-  char buf[256];   // TracerPid comes relatively earlier in status output
-  int fd = open("/proc/self/status", O_RDONLY);
-  if (fd == -1) {
-    return false;  // Can't tell for sure.
-  }
-  const int len = read(fd, buf, sizeof(buf));
-  bool rc = false;
-  if (len > 0) {
-    const char *const kTracerPid = "TracerPid:\t";
-    buf[len - 1] = '\0';
-    const char *p = strstr(buf, kTracerPid);
-    if (p != NULL) {
-      rc = (strncmp(p + strlen(kTracerPid), "0\n", 2) != 0);
-    }
-  }
-  close(fd);
-  return rc;
-}
-
-// This is the default if you don't link in -lprofiler
-extern "C" {
-ATTRIBUTE_WEAK PERFTOOLS_DLL_DECL bool ProfilingIsEnabledForAllThreads();
-bool ProfilingIsEnabledForAllThreads() { return false; }
-}
-
-//----------------------------------------------------------------------
-// Flags that control heap-checking
-//----------------------------------------------------------------------
-
-DEFINE_string(heap_check,
-              EnvToString("HEAPCHECK", ""),
-              "The heap leak checking to be done over the whole executable: "
-              "\"minimal\", \"normal\", \"strict\", "
-              "\"draconian\", \"as-is\", and \"local\" "
-              " or the empty string are the supported choices. "
-              "(See HeapLeakChecker_InternalInitStart for details.)");
-
-DEFINE_bool(heap_check_report, true, "Obsolete");
-
-DEFINE_bool(heap_check_before_constructors,
-            true,
-            "deprecated; pretty much always true now");
-
-DEFINE_bool(heap_check_after_destructors,
-            EnvToBool("HEAP_CHECK_AFTER_DESTRUCTORS", false),
-            "If overall heap check is to end after global destructors "
-            "or right after all REGISTER_HEAPCHECK_CLEANUP's");
-
-DEFINE_bool(heap_check_strict_check, true, "Obsolete");
-
-DEFINE_bool(heap_check_ignore_global_live,
-            EnvToBool("HEAP_CHECK_IGNORE_GLOBAL_LIVE", true),
-            "If overall heap check is to ignore heap objects reachable "
-            "from the global data");
-
-DEFINE_bool(heap_check_identify_leaks,
-            EnvToBool("HEAP_CHECK_IDENTIFY_LEAKS", false),
-            "If heap check should generate the addresses of the leaked "
-            "objects in the memory leak profiles.  This may be useful "
-            "in tracking down leaks where only a small fraction of "
-            "objects allocated at the same stack trace are leaked.");
-
-DEFINE_bool(heap_check_ignore_thread_live,
-            EnvToBool("HEAP_CHECK_IGNORE_THREAD_LIVE", true),
-            "If set to true, objects reachable from thread stacks "
-            "and registers are not reported as leaks");
-
-DEFINE_bool(heap_check_test_pointer_alignment,
-            EnvToBool("HEAP_CHECK_TEST_POINTER_ALIGNMENT", false),
-            "Set to true to check if the found leak can be due to "
-            "use of unaligned pointers");
-
-// Alignment at which all pointers in memory are supposed to be located;
-// use 1 if any alignment is ok.
-// heap_check_test_pointer_alignment flag guides if we try the value of 1.
-// The larger it can be, the lesser is the chance of missing real leaks.
-static const size_t kPointerSourceAlignment = sizeof(void*);
-DEFINE_int32(heap_check_pointer_source_alignment,
-	     EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT",
-                      kPointerSourceAlignment),
-             "Alignment at which all pointers in memory are supposed to be "
-             "located.  Use 1 if any alignment is ok.");
-
-// A reasonable default to handle pointers inside of typical class objects:
-// Too low and we won't be able to traverse pointers to normally-used
-// nested objects and base parts of multiple-inherited objects.
-// Too high and it will both slow down leak checking (FindInsideAlloc
-// in HaveOnHeapLocked will get slower when there are large on-heap objects)
-// and make it probabilistically more likely to miss leaks
-// of large-sized objects.
-static const int64 kHeapCheckMaxPointerOffset = 1024;
-DEFINE_int64(heap_check_max_pointer_offset,
-	     EnvToInt("HEAP_CHECK_MAX_POINTER_OFFSET",
-                      kHeapCheckMaxPointerOffset),
-             "Largest pointer offset for which we traverse "
-             "pointers going inside of heap allocated objects. "
-             "Set to -1 to use the actual largest heap object size.");
-
-DEFINE_bool(heap_check_run_under_gdb,
-            EnvToBool("HEAP_CHECK_RUN_UNDER_GDB", false),
-            "If false, turns off heap-checking library when running under gdb "
-            "(normally, set to 'true' only when debugging the heap-checker)");
-
-DEFINE_int32(heap_check_delay_seconds, 0,
-             "Number of seconds to delay on-exit heap checking."
-             " If you set this flag,"
-             " you may also want to set exit_timeout_seconds in order to"
-             " avoid exit timeouts.\n"
-             "NOTE: This flag is to be used only to help diagnose issues"
-             " where it is suspected that the heap checker is reporting"
-             " false leaks that will disappear if the heap checker delays"
-             " its checks. Report any such issues to the heap-checker"
-             " maintainer(s).");
-
-//----------------------------------------------------------------------
-
-DEFINE_string(heap_profile_pprof,
-              EnvToString("PPROF_PATH", "pprof"),
-              "OBSOLETE; not used");
-
-DEFINE_string(heap_check_dump_directory,
-              EnvToString("HEAP_CHECK_DUMP_DIRECTORY", "/tmp"),
-              "Directory to put heap-checker leak dump information");
-
-
-//----------------------------------------------------------------------
-// HeapLeakChecker global data
-//----------------------------------------------------------------------
-
-// Global lock for all the global data of this module.
-static SpinLock heap_checker_lock(SpinLock::LINKER_INITIALIZED);
-
-//----------------------------------------------------------------------
-
-// Heap profile prefix for leak checking profiles.
-// Gets assigned once when leak checking is turned on, then never modified.
-static const string* profile_name_prefix = NULL;
-
-// Whole-program heap leak checker.
-// Gets assigned once when leak checking is turned on,
-// then main_heap_checker is never deleted.
-static HeapLeakChecker* main_heap_checker = NULL;
-
-// Whether we will use main_heap_checker to do a check at program exit
-// automatically. In any case user can ask for more checks on main_heap_checker
-// via GlobalChecker().
-static bool do_main_heap_check = false;
-
-// The heap profile we use to collect info about the heap.
-// This is created in HeapLeakChecker::BeforeConstructorsLocked
-// together with setting heap_checker_on (below) to true
-// and registering our new/delete malloc hooks;
-// similarly all are unset in HeapLeakChecker::TurnItselfOffLocked.
-static HeapProfileTable* heap_profile = NULL;
-
-// If we are doing (or going to do) any kind of heap-checking.
-static bool heap_checker_on = false;
-
-// pid of the process that does whole-program heap leak checking
-static pid_t heap_checker_pid = 0;
-
-// If we did heap profiling during global constructors execution
-static bool constructor_heap_profiling = false;
-
-// RAW_VLOG level we dump key INFO messages at.  If you want to turn
-// off these messages, set the environment variable PERFTOOLS_VERBOSE=-1.
-static const int heap_checker_info_level = 0;
-
-//----------------------------------------------------------------------
-// HeapLeakChecker's own memory allocator that is
-// independent of the normal program allocator.
-//----------------------------------------------------------------------
-
-// Wrapper of LowLevelAlloc for STL_Allocator and direct use.
-// We always access this class under held heap_checker_lock,
-// this allows us to in particular protect the period when threads are stopped
-// at random spots with TCMalloc_ListAllProcessThreads by heap_checker_lock,
-// w/o worrying about the lock in LowLevelAlloc::Arena.
-// We rely on the fact that we use an own arena with an own lock here.
-class HeapLeakChecker::Allocator {
- public:
-  static void Init() {
-    RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-    RAW_DCHECK(arena_ == NULL, "");
-    arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena());
-  }
-  static void Shutdown() {
-    RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-    if (!LowLevelAlloc::DeleteArena(arena_)  ||  alloc_count_ != 0) {
-      RAW_LOG(FATAL, "Internal heap checker leak of %d objects", alloc_count_);
-    }
-  }
-  static int alloc_count() {
-    RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-    return alloc_count_;
-  }
-  static void* Allocate(size_t n) {
-    RAW_DCHECK(arena_  &&  heap_checker_lock.IsHeld(), "");
-    void* p = LowLevelAlloc::AllocWithArena(n, arena_);
-    if (p) alloc_count_ += 1;
-    return p;
-  }
-  static void Free(void* p) {
-    RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-    if (p) alloc_count_ -= 1;
-    LowLevelAlloc::Free(p);
-  }
-  static void Free(void* p, size_t /* n */) {
-    Free(p);
-  }
-  // destruct, free, and make *p to be NULL
-  template<typename T> static void DeleteAndNull(T** p) {
-    (*p)->~T();
-    Free(*p);
-    *p = NULL;
-  }
-  template<typename T> static void DeleteAndNullIfNot(T** p) {
-    if (*p != NULL) DeleteAndNull(p);
-  }
- private:
-  static LowLevelAlloc::Arena* arena_;
-  static int alloc_count_;
-};
-
-LowLevelAlloc::Arena* HeapLeakChecker::Allocator::arena_ = NULL;
-int HeapLeakChecker::Allocator::alloc_count_ = 0;
-
-//----------------------------------------------------------------------
-// HeapLeakChecker live object tracking components
-//----------------------------------------------------------------------
-
-// Cases of live object placement we distinguish
-enum ObjectPlacement {
-  MUST_BE_ON_HEAP,   // Must point to a live object of the matching size in the
-                     // heap_profile map of the heap when we get to it
-  IGNORED_ON_HEAP,   // Is a live (ignored) object on heap
-  MAYBE_LIVE,        // Is a piece of writable memory from /proc/self/maps
-  IN_GLOBAL_DATA,    // Is part of global data region of the executable
-  THREAD_DATA,       // Part of a thread stack and a thread descriptor with TLS
-  THREAD_REGISTERS,  // Values in registers of some thread
-};
-
-// Information about an allocated object
-struct AllocObject {
-  const void* ptr;        // the object
-  uintptr_t size;         // its size
-  ObjectPlacement place;  // where ptr points to
-
-  AllocObject(const void* p, size_t s, ObjectPlacement l)
-    : ptr(p), size(s), place(l) { }
-};
-
-// All objects (memory ranges) ignored via HeapLeakChecker::IgnoreObject
-// Key is the object's address; value is its size.
-typedef map<uintptr_t, size_t, less<uintptr_t>,
-            STL_Allocator<pair<const uintptr_t, size_t>,
-                          HeapLeakChecker::Allocator>
-           > IgnoredObjectsMap;
-static IgnoredObjectsMap* ignored_objects = NULL;
-
-// All objects (memory ranges) that we consider to be the sources of pointers
-// to live (not leaked) objects.
-// At different times this holds (what can be reached from) global data regions
-// and the objects we've been told to ignore.
-// For any AllocObject::ptr "live_objects" is supposed to contain at most one
-// record at any time. We maintain this by checking with the heap_profile map
-// of the heap and removing the live heap objects we've handled from it.
-// This vector is maintained as a stack and the frontier of reachable
-// live heap objects in our flood traversal of them.
-typedef vector<AllocObject,
-               STL_Allocator<AllocObject, HeapLeakChecker::Allocator>
-              > LiveObjectsStack;
-static LiveObjectsStack* live_objects = NULL;
-
-// A special string type that uses my allocator
-typedef basic_string<char, char_traits<char>,
-                     STL_Allocator<char, HeapLeakChecker::Allocator>
-                    > HCL_string;
-
-// A placeholder to fill-in the starting values for live_objects
-// for each library so we can keep the library-name association for logging.
-typedef map<HCL_string, LiveObjectsStack, less<HCL_string>,
-            STL_Allocator<pair<const HCL_string, LiveObjectsStack>,
-                          HeapLeakChecker::Allocator>
-           > LibraryLiveObjectsStacks;
-static LibraryLiveObjectsStacks* library_live_objects = NULL;
-
-// Value stored in the map of disabled address ranges;
-// its key is the end of the address range.
-// We'll ignore allocations with a return address in a disabled range
-// if the address occurs at 'max_depth' or less in the stack trace.
-struct HeapLeakChecker::RangeValue {
-  uintptr_t start_address;  // the start of the range
-  int       max_depth;      // the maximal stack depth to disable at
-};
-typedef map<uintptr_t, HeapLeakChecker::RangeValue, less<uintptr_t>,
-            STL_Allocator<pair<const uintptr_t, HeapLeakChecker::RangeValue>,
-                          HeapLeakChecker::Allocator>
-           > DisabledRangeMap;
-// The disabled program counter address ranges for profile dumping
-// that are registered with HeapLeakChecker::DisableChecksFromToLocked.
-static DisabledRangeMap* disabled_ranges = NULL;
-
-// Set of stack tops.
-// These are used to consider live only appropriate chunks of the memory areas
-// that are used for stacks (and maybe thread-specific data as well)
-// so that we do not treat pointers from outdated stack frames as live.
-typedef set<uintptr_t, less<uintptr_t>,
-            STL_Allocator<uintptr_t, HeapLeakChecker::Allocator>
-           > StackTopSet;
-static StackTopSet* stack_tops = NULL;
-
-// A map of ranges of code addresses for the system libraries
-// that can mmap/mremap/sbrk-allocate memory regions for stacks
-// and thread-local storage that we want to consider as live global data.
-// Maps from the end address to the start address.
-typedef map<uintptr_t, uintptr_t, less<uintptr_t>,
-            STL_Allocator<pair<const uintptr_t, uintptr_t>,
-                          HeapLeakChecker::Allocator>
-           > GlobalRegionCallerRangeMap;
-static GlobalRegionCallerRangeMap* global_region_caller_ranges = NULL;
-
-// TODO(maxim): make our big data structs into own modules
-
-// Disabler is implemented by keeping track of a per-thread count
-// of active Disabler objects.  Any objects allocated while the
-// count > 0 are not reported.
-
-#ifdef HAVE_TLS
-
-static __thread int thread_disable_counter
-// The "inital exec" model is faster than the default TLS model, at
-// the cost you can't dlopen this library.  But dlopen on heap-checker
-// doesn't work anyway -- it must run before main -- so this is a good
-// trade-off.
-# ifdef HAVE___ATTRIBUTE__
-   __attribute__ ((tls_model ("initial-exec")))
-# endif
-    ;
-inline int get_thread_disable_counter() {
-  return thread_disable_counter;
-}
-inline void set_thread_disable_counter(int value) {
-  thread_disable_counter = value;
-}
-
-#else  // #ifdef HAVE_TLS
-
-static pthread_key_t thread_disable_counter_key;
-static int main_thread_counter;   // storage for use before main()
-static bool use_main_thread_counter = true;
-
-// TODO(csilvers): this is called from NewHook, in the middle of malloc().
-// If perftools_pthread_getspecific calls malloc, that will lead to an
-// infinite loop.  I don't know how to fix that, so I hope it never happens!
-inline int get_thread_disable_counter() {
-  if (use_main_thread_counter)  // means we're running really early
-    return main_thread_counter;
-  void* p = perftools_pthread_getspecific(thread_disable_counter_key);
-  return (intptr_t)p;   // kinda evil: store the counter directly in the void*
-}
-
-inline void set_thread_disable_counter(int value) {
-  if (use_main_thread_counter) {   // means we're running really early
-    main_thread_counter = value;
-    return;
-  }
-  intptr_t pointer_sized_value = value;
-  // kinda evil: store the counter directly in the void*
-  void* p = (void*)pointer_sized_value;
-  // NOTE: this may call malloc, which will call NewHook which will call
-  // get_thread_disable_counter() which will call pthread_getspecific().  I
-  // don't know if anything bad can happen if we call getspecific() in the
-  // middle of a setspecific() call.  It seems to work ok in practice...
-  perftools_pthread_setspecific(thread_disable_counter_key, p);
-}
-
-// The idea here is that this initializer will run pretty late: after
-// pthreads have been totally set up.  At this point we can call
-// pthreads routines, so we set those up.
-class InitThreadDisableCounter {
- public:
-  InitThreadDisableCounter() {
-    perftools_pthread_key_create(&thread_disable_counter_key, NULL);
-    // Set up the main thread's value, which we have a special variable for.
-    void* p = (void*)main_thread_counter;   // store the counter directly
-    perftools_pthread_setspecific(thread_disable_counter_key, p);
-    use_main_thread_counter = false;
-  }
-};
-InitThreadDisableCounter init_thread_disable_counter;
-
-#endif  // #ifdef HAVE_TLS
-
-HeapLeakChecker::Disabler::Disabler() {
-  // It is faster to unconditionally increment the thread-local
-  // counter than to check whether or not heap-checking is on
-  // in a thread-safe manner.
-  int counter = get_thread_disable_counter();
-  set_thread_disable_counter(counter + 1);
-  RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1);
-}
-
-HeapLeakChecker::Disabler::~Disabler() {
-  int counter = get_thread_disable_counter();
-  RAW_DCHECK(counter > 0, "");
-  if (counter > 0) {
-    set_thread_disable_counter(counter - 1);
-    RAW_VLOG(10, "Decreasing thread disable counter to %d", counter);
-  } else {
-    RAW_VLOG(0, "Thread disable counter underflow : %d", counter);
-  }
-}
-
-//----------------------------------------------------------------------
-
-// The size of the largest heap object allocated so far.
-static size_t max_heap_object_size = 0;
-// The possible range of addresses that can point
-// into one of the elements of heap_objects.
-static uintptr_t min_heap_address = uintptr_t(-1LL);
-static uintptr_t max_heap_address = 0;
-
-//----------------------------------------------------------------------
-
-// Simple casting helpers for uintptr_t and void*:
-template<typename T>
-inline static const void* AsPtr(T addr) {
-  return reinterpret_cast<void*>(addr);
-}
-inline static uintptr_t AsInt(const void* ptr) {
-  return reinterpret_cast<uintptr_t>(ptr);
-}
-
-//----------------------------------------------------------------------
-
-// We've seen reports that strstr causes heap-checker crashes in some
-// libc's (?):
-//    http://code.google.com/p/gperftools/issues/detail?id=263
-// It's simple enough to use our own.  This is not in time-critical code.
-static const char* hc_strstr(const char* s1, const char* s2) {
-  const size_t len = strlen(s2);
-  RAW_CHECK(len > 0, "Unexpected empty string passed to strstr()");
-  for (const char* p = strchr(s1, *s2); p != NULL; p = strchr(p+1, *s2)) {
-    if (strncmp(p, s2, len) == 0) {
-      return p;
-    }
-  }
-  return NULL;
-}
-
-//----------------------------------------------------------------------
-
-// Our hooks for MallocHook
-static void NewHook(const void* ptr, size_t size) {
-  if (ptr != NULL) {
-    const int counter = get_thread_disable_counter();
-    const bool ignore = (counter > 0);
-    RAW_VLOG(16, "Recording Alloc: %p of %" PRIuS "; %d", ptr, size,
-             int(counter));
-
-    // Fetch the caller's stack trace before acquiring heap_checker_lock.
-    void* stack[HeapProfileTable::kMaxStackDepth];
-    int depth = HeapProfileTable::GetCallerStackTrace(0, stack);
-
-    { SpinLockHolder l(&heap_checker_lock);
-      if (size > max_heap_object_size) max_heap_object_size = size;
-      uintptr_t addr = AsInt(ptr);
-      if (addr < min_heap_address) min_heap_address = addr;
-      addr += size;
-      if (addr > max_heap_address) max_heap_address = addr;
-      if (heap_checker_on) {
-        heap_profile->RecordAlloc(ptr, size, depth, stack);
-        if (ignore) {
-          heap_profile->MarkAsIgnored(ptr);
-        }
-      }
-    }
-    RAW_VLOG(17, "Alloc Recorded: %p of %" PRIuS "", ptr, size);
-  }
-}
-
-static void DeleteHook(const void* ptr) {
-  if (ptr != NULL) {
-    RAW_VLOG(16, "Recording Free %p", ptr);
-    { SpinLockHolder l(&heap_checker_lock);
-      if (heap_checker_on) heap_profile->RecordFree(ptr);
-    }
-    RAW_VLOG(17, "Free Recorded: %p", ptr);
-  }
-}
-
-//----------------------------------------------------------------------
-
-enum StackDirection {
-  GROWS_TOWARDS_HIGH_ADDRESSES,
-  GROWS_TOWARDS_LOW_ADDRESSES,
-  UNKNOWN_DIRECTION
-};
-
-// Determine which way the stack grows:
-
-static StackDirection ATTRIBUTE_NOINLINE GetStackDirection(
-    const uintptr_t *const ptr) {
-  uintptr_t x;
-  if (&x < ptr)
-    return GROWS_TOWARDS_LOW_ADDRESSES;
-  if (ptr < &x)
-    return GROWS_TOWARDS_HIGH_ADDRESSES;
-
-  RAW_CHECK(0, "");  // Couldn't determine the stack direction.
-
-  return UNKNOWN_DIRECTION;
-}
-
-// Direction of stack growth (will initialize via GetStackDirection())
-static StackDirection stack_direction = UNKNOWN_DIRECTION;
-
-// This routine is called for every thread stack we know about to register it.
-static void RegisterStackLocked(const void* top_ptr) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-  RAW_VLOG(10, "Thread stack at %p", top_ptr);
-  uintptr_t top = AsInt(top_ptr);
-  stack_tops->insert(top);  // add for later use
-
-  // make sure stack_direction is initialized
-  if (stack_direction == UNKNOWN_DIRECTION) {
-    stack_direction = GetStackDirection(&top);
-  }
-
-  // Find memory region with this stack
-  MemoryRegionMap::Region region;
-  if (MemoryRegionMap::FindAndMarkStackRegion(top, &region)) {
-    // Make the proper portion of the stack live:
-    if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
-      RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
-                  top_ptr, region.end_addr - top);
-      live_objects->push_back(AllocObject(top_ptr, region.end_addr - top,
-                                          THREAD_DATA));
-    } else {  // GROWS_TOWARDS_HIGH_ADDRESSES
-      RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
-                  AsPtr(region.start_addr),
-                  top - region.start_addr);
-      live_objects->push_back(AllocObject(AsPtr(region.start_addr),
-                                          top - region.start_addr,
-                                          THREAD_DATA));
-    }
-  // not in MemoryRegionMap, look in library_live_objects:
-  } else if (FLAGS_heap_check_ignore_global_live) {
-    for (LibraryLiveObjectsStacks::iterator lib = library_live_objects->begin();
-         lib != library_live_objects->end(); ++lib) {
-      for (LiveObjectsStack::iterator span = lib->second.begin();
-           span != lib->second.end(); ++span) {
-        uintptr_t start = AsInt(span->ptr);
-        uintptr_t end = start + span->size;
-        if (start <= top  &&  top < end) {
-          RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p",
-                      top_ptr, AsPtr(start), AsPtr(end));
-          // Shrink start..end region by chopping away the memory regions in
-          // MemoryRegionMap that land in it to undo merging of regions
-          // in /proc/self/maps, so that we correctly identify what portion
-          // of start..end is actually the stack region.
-          uintptr_t stack_start = start;
-          uintptr_t stack_end = end;
-          // can optimize-away this loop, but it does not run often
-          RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-          for (MemoryRegionMap::RegionIterator r =
-                 MemoryRegionMap::BeginRegionLocked();
-               r != MemoryRegionMap::EndRegionLocked(); ++r) {
-            if (top < r->start_addr  &&  r->start_addr < stack_end) {
-              stack_end = r->start_addr;
-            }
-            if (stack_start < r->end_addr  &&  r->end_addr <= top) {
-              stack_start = r->end_addr;
-            }
-          }
-          if (stack_start != start  ||  stack_end != end) {
-            RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p",
-                        top_ptr, AsPtr(stack_start), AsPtr(stack_end));
-          }
-          // Make the proper portion of the stack live:
-          if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
-            RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
-                        top_ptr, stack_end - top);
-            live_objects->push_back(
-              AllocObject(top_ptr, stack_end - top, THREAD_DATA));
-          } else {  // GROWS_TOWARDS_HIGH_ADDRESSES
-            RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
-                        AsPtr(stack_start), top - stack_start);
-            live_objects->push_back(
-              AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA));
-          }
-          lib->second.erase(span);  // kill the rest of the region
-          // Put the non-stack part(s) of the region back:
-          if (stack_start != start) {
-            lib->second.push_back(AllocObject(AsPtr(start), stack_start - start,
-                                  MAYBE_LIVE));
-          }
-          if (stack_end != end) {
-            lib->second.push_back(AllocObject(AsPtr(stack_end), end - stack_end,
-                                  MAYBE_LIVE));
-          }
-          return;
-        }
-      }
-    }
-    RAW_LOG(ERROR, "Memory region for stack at %p not found. "
-                   "Will likely report false leak positives.", top_ptr);
-  }
-}
-
-// Iterator for heap allocation map data to make ignored objects "live"
-// (i.e., treated as roots for the mark-and-sweep phase)
-static void MakeIgnoredObjectsLiveCallbackLocked(
-    const void* ptr, const HeapProfileTable::AllocInfo& info) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  if (info.ignored) {
-    live_objects->push_back(AllocObject(ptr, info.object_size,
-                                        MUST_BE_ON_HEAP));
-  }
-}
-
-// Iterator for heap allocation map data to make objects allocated from
-// disabled regions of code to be live.
-static void MakeDisabledLiveCallbackLocked(
-    const void* ptr, const HeapProfileTable::AllocInfo& info) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  bool stack_disable = false;
-  bool range_disable = false;
-  for (int depth = 0; depth < info.stack_depth; depth++) {
-    uintptr_t addr = AsInt(info.call_stack[depth]);
-    if (disabled_ranges) {
-      DisabledRangeMap::const_iterator iter
-        = disabled_ranges->upper_bound(addr);
-      if (iter != disabled_ranges->end()) {
-        RAW_DCHECK(iter->first > addr, "");
-        if (iter->second.start_address < addr  &&
-            iter->second.max_depth > depth) {
-          range_disable = true;  // in range; dropping
-          break;
-        }
-      }
-    }
-  }
-  if (stack_disable || range_disable) {
-    uintptr_t start_address = AsInt(ptr);
-    uintptr_t end_address = start_address + info.object_size;
-    StackTopSet::const_iterator iter
-      = stack_tops->lower_bound(start_address);
-    if (iter != stack_tops->end()) {
-      RAW_DCHECK(*iter >= start_address, "");
-      if (*iter < end_address) {
-        // We do not disable (treat as live) whole allocated regions
-        // if they are used to hold thread call stacks
-        // (i.e. when we find a stack inside).
-        // The reason is that we'll treat as live the currently used
-        // stack portions anyway (see RegisterStackLocked),
-        // and the rest of the region where the stack lives can well
-        // contain outdated stack variables which are not live anymore,
-        // hence should not be treated as such.
-        RAW_VLOG(11, "Not %s-disabling %" PRIuS " bytes at %p"
-                    ": have stack inside: %p",
-                    (stack_disable ? "stack" : "range"),
-                    info.object_size, ptr, AsPtr(*iter));
-        return;
-      }
-    }
-    RAW_VLOG(11, "%s-disabling %" PRIuS " bytes at %p",
-                (stack_disable ? "Stack" : "Range"), info.object_size, ptr);
-    live_objects->push_back(AllocObject(ptr, info.object_size,
-                                        MUST_BE_ON_HEAP));
-  }
-}
-
-static const char kUnnamedProcSelfMapEntry[] = "UNNAMED";
-
-// This function takes some fields from a /proc/self/maps line:
-//
-//   start_address  start address of a memory region.
-//   end_address    end address of a memory region
-//   permissions    rwx + private/shared bit
-//   filename       filename of the mapped file
-//
-// If the region is not writeable, then it cannot have any heap
-// pointers in it, otherwise we record it as a candidate live region
-// to get filtered later.
-static void RecordGlobalDataLocked(uintptr_t start_address,
-                                   uintptr_t end_address,
-                                   const char* permissions,
-                                   const char* filename) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  // Ignore non-writeable regions.
-  if (strchr(permissions, 'w') == NULL) return;
-  if (filename == NULL  ||  *filename == '\0') {
-    filename = kUnnamedProcSelfMapEntry;
-  }
-  RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR,
-              filename, start_address, end_address);
-  (*library_live_objects)[filename].
-    push_back(AllocObject(AsPtr(start_address),
-                          end_address - start_address,
-                          MAYBE_LIVE));
-}
-
-// See if 'library' from /proc/self/maps has base name 'library_base'
-// i.e. contains it and has '.' or '-' after it.
-static bool IsLibraryNamed(const char* library, const char* library_base) {
-  const char* p = hc_strstr(library, library_base);
-  size_t sz = strlen(library_base);
-  return p != NULL  &&  (p[sz] == '.'  ||  p[sz] == '-');
-}
-
-// static
-void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library,
-                                                 uintptr_t start_address,
-                                                 uintptr_t end_address) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  int depth = 0;
-  // TODO(maxim): maybe this should be extended to also use objdump
-  //              and pick the text portion of the library more precisely.
-  if (IsLibraryNamed(library, "/libpthread")  ||
-        // libpthread has a lot of small "system" leaks we don't care about.
-        // In particular it allocates memory to store data supplied via
-        // pthread_setspecific (which can be the only pointer to a heap object).
-      IsLibraryNamed(library, "/libdl")  ||
-        // library loaders leak some "system" heap that we don't care about
-      IsLibraryNamed(library, "/libcrypto")  ||
-        // Sometimes libcrypto of OpenSSH is compiled with -fomit-frame-pointer
-        // (any library can be, of course, but this one often is because speed
-        // is so important for making crypto usable).  We ignore all its
-        // allocations because we can't see the call stacks.  We'd prefer
-        // to ignore allocations done in files/symbols that match
-        // "default_malloc_ex|default_realloc_ex"
-        // but that doesn't work when the end-result binary is stripped.
-      IsLibraryNamed(library, "/libjvm")  ||
-        // JVM has a lot of leaks we don't care about.
-      IsLibraryNamed(library, "/libzip")
-        // The JVM leaks java.util.zip.Inflater after loading classes.
-     ) {
-    depth = 1;  // only disable allocation calls directly from the library code
-  } else if (IsLibraryNamed(library, "/ld")
-               // library loader leaks some "system" heap
-               // (e.g. thread-local storage) that we don't care about
-            ) {
-    depth = 2;  // disable allocation calls directly from the library code
-                // and at depth 2 from it.
-    // We need depth 2 here solely because of a libc bug that
-    // forces us to jump through __memalign_hook and MemalignOverride hoops
-    // in tcmalloc.cc.
-    // Those buggy __libc_memalign() calls are in ld-linux.so and happen for
-    // thread-local storage allocations that we want to ignore here.
-    // We go with the depth-2 hack as a workaround for this libc bug:
-    // otherwise we'd need to extend MallocHook interface
-    // so that correct stack depth adjustment can be propagated from
-    // the exceptional case of MemalignOverride.
-    // Using depth 2 here should not mask real leaks because ld-linux.so
-    // does not call user code.
-  }
-  if (depth) {
-    RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth);
-    DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth);
-    if (IsLibraryNamed(library, "/libpthread")  ||
-        IsLibraryNamed(library, "/libdl")  ||
-        IsLibraryNamed(library, "/ld")) {
-      RAW_VLOG(10, "Global memory regions made by %s will be live data",
-                  library);
-      if (global_region_caller_ranges == NULL) {
-        global_region_caller_ranges =
-          new(Allocator::Allocate(sizeof(GlobalRegionCallerRangeMap)))
-            GlobalRegionCallerRangeMap;
-      }
-      global_region_caller_ranges
-        ->insert(make_pair(end_address, start_address));
-    }
-  }
-}
-
-// static
-HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked(
-                                  ProcMapsTask proc_maps_task) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  // Need to provide own scratch memory to ProcMapsIterator:
-  ProcMapsIterator::Buffer buffer;
-  ProcMapsIterator it(0, &buffer);
-  if (!it.Valid()) {
-    int errsv = errno;
-    RAW_LOG(ERROR, "Could not open /proc/self/maps: errno=%d. "
-                   "Libraries will not be handled correctly.", errsv);
-    return CANT_OPEN_PROC_MAPS;
-  }
-  uint64 start_address, end_address, file_offset;
-  int64 inode;
-  char *permissions, *filename;
-  bool saw_shared_lib = false;
-  bool saw_nonzero_inode = false;
-  bool saw_shared_lib_with_nonzero_inode = false;
-  while (it.Next(&start_address, &end_address, &permissions,
-                 &file_offset, &inode, &filename)) {
-    if (start_address >= end_address) {
-      // Warn if a line we can be interested in is ill-formed:
-      if (inode != 0) {
-        RAW_LOG(ERROR, "Errors reading /proc/self/maps. "
-                       "Some global memory regions will not "
-                       "be handled correctly.");
-      }
-      // Silently skip other ill-formed lines: some are possible
-      // probably due to the interplay of how /proc/self/maps is updated
-      // while we read it in chunks in ProcMapsIterator and
-      // do things in this loop.
-      continue;
-    }
-    // Determine if any shared libraries are present (this is the same
-    // list of extensions as is found in pprof).  We want to ignore
-    // 'fake' libraries with inode 0 when determining.  However, some
-    // systems don't share inodes via /proc, so we turn off this check
-    // if we don't see any evidence that we're getting inode info.
-    if (inode != 0) {
-      saw_nonzero_inode = true;
-    }
-    if ((hc_strstr(filename, "lib") && hc_strstr(filename, ".so")) ||
-        hc_strstr(filename, ".dll") ||
-        // not all .dylib filenames start with lib. .dylib is big enough
-        // that we are unlikely to get false matches just checking that.
-        hc_strstr(filename, ".dylib") || hc_strstr(filename, ".bundle")) {
-      saw_shared_lib = true;
-      if (inode != 0) {
-        saw_shared_lib_with_nonzero_inode = true;
-      }
-    }
-
-    switch (proc_maps_task) {
-      case DISABLE_LIBRARY_ALLOCS:
-        // All lines starting like
-        // "401dc000-4030f000 r??p 00132000 03:01 13991972  lib/bin"
-        // identify a data and code sections of a shared library or our binary
-        if (inode != 0 && strncmp(permissions, "r-xp", 4) == 0) {
-          DisableLibraryAllocsLocked(filename, start_address, end_address);
-        }
-        break;
-      case RECORD_GLOBAL_DATA:
-        RecordGlobalDataLocked(start_address, end_address,
-                               permissions, filename);
-        break;
-      default:
-        RAW_CHECK(0, "");
-    }
-  }
-  // If /proc/self/maps is reporting inodes properly (we saw a
-  // non-zero inode), then we only say we saw a shared lib if we saw a
-  // 'real' one, with a non-zero inode.
-  if (saw_nonzero_inode) {
-    saw_shared_lib = saw_shared_lib_with_nonzero_inode;
-  }
-  if (!saw_shared_lib) {
-    RAW_LOG(ERROR, "No shared libs detected. Will likely report false leak "
-                   "positives for statically linked executables.");
-    return NO_SHARED_LIBS_IN_PROC_MAPS;
-  }
-  return PROC_MAPS_USED;
-}
-
-// Total number and size of live objects dropped from the profile;
-// (re)initialized in IgnoreAllLiveObjectsLocked.
-static int64 live_objects_total;
-static int64 live_bytes_total;
-
-// pid of the thread that is doing the current leak check
-// (protected by our lock; IgnoreAllLiveObjectsLocked sets it)
-static pid_t self_thread_pid = 0;
-
-// Status of our thread listing callback execution
-// (protected by our lock; used from within IgnoreAllLiveObjectsLocked)
-static enum {
-  CALLBACK_NOT_STARTED,
-  CALLBACK_STARTED,
-  CALLBACK_COMPLETED,
-} thread_listing_status = CALLBACK_NOT_STARTED;
-
-// Ideally to avoid deadlocks this function should not result in any libc
-// or other function calls that might need to lock a mutex:
-// It is called when all threads of a process are stopped
-// at arbitrary points thus potentially holding those locks.
-//
-// In practice we are calling some simple i/o and sprintf-type library functions
-// for logging messages, but use only our own LowLevelAlloc::Arena allocator.
-//
-// This is known to be buggy: the library i/o function calls are able to cause
-// deadlocks when they request a lock that a stopped thread happens to hold.
-// This issue as far as we know have so far not resulted in any deadlocks
-// in practice, so for now we are taking our chance that the deadlocks
-// have insignificant frequency.
-//
-// If such deadlocks become a problem we should make the i/o calls
-// into appropriately direct system calls (or eliminate them),
-// in particular write() is not safe and vsnprintf() is potentially dangerous
-// due to reliance on locale functions (these are called through RAW_LOG
-// and in other ways).
-//
-
-#if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER)
-# if (defined(__i386__) || defined(__x86_64))
-#  define THREAD_REGS i386_regs
-# elif defined(__PPC__)
-#  define THREAD_REGS ppc_regs
-# endif
-#endif
-
-/*static*/ int HeapLeakChecker::IgnoreLiveThreadsLocked(void* parameter,
-                                                        int num_threads,
-                                                        pid_t* thread_pids,
-                                                        va_list /*ap*/) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  thread_listing_status = CALLBACK_STARTED;
-  RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid());
-
-  if (FLAGS_heap_check_ignore_global_live) {
-    UseProcMapsLocked(RECORD_GLOBAL_DATA);
-  }
-
-  // We put the registers from other threads here
-  // to make pointers stored in them live.
-  vector<void*, STL_Allocator<void*, Allocator> > thread_registers;
-
-  int failures = 0;
-  for (int i = 0; i < num_threads; ++i) {
-    // the leak checking thread itself is handled
-    // specially via self_thread_stack, not here:
-    if (thread_pids[i] == self_thread_pid) continue;
-    RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]);
-#ifdef THREAD_REGS
-    THREAD_REGS thread_regs;
-#define sys_ptrace(r, p, a, d)  syscall(SYS_ptrace, (r), (p), (a), (d))
-    // We use sys_ptrace to avoid thread locking
-    // because this is called from TCMalloc_ListAllProcessThreads
-    // when all but this thread are suspended.
-    if (sys_ptrace(PTRACE_GETREGS, thread_pids[i], NULL, &thread_regs) == 0) {
-      // Need to use SP to get all the data from the very last stack frame:
-      COMPILE_ASSERT(sizeof(thread_regs.SP) == sizeof(void*),
-                     SP_register_does_not_look_like_a_pointer);
-      RegisterStackLocked(reinterpret_cast<void*>(thread_regs.SP));
-      // Make registers live (just in case PTRACE_ATTACH resulted in some
-      // register pointers still being in the registers and not on the stack):
-      for (void** p = reinterpret_cast<void**>(&thread_regs);
-           p < reinterpret_cast<void**>(&thread_regs + 1); ++p) {
-        RAW_VLOG(12, "Thread register %p", *p);
-        thread_registers.push_back(*p);
-      }
-    } else {
-      failures += 1;
-    }
-#else
-    failures += 1;
-#endif
-  }
-  // Use all the collected thread (stack) liveness sources:
-  IgnoreLiveObjectsLocked("threads stack data", "");
-  if (thread_registers.size()) {
-    // Make thread registers be live heap data sources.
-    // we rely here on the fact that vector is in one memory chunk:
-    RAW_VLOG(11, "Live registers at %p of %" PRIuS " bytes",
-                &thread_registers[0], thread_registers.size() * sizeof(void*));
-    live_objects->push_back(AllocObject(&thread_registers[0],
-                                        thread_registers.size() * sizeof(void*),
-                                        THREAD_REGISTERS));
-    IgnoreLiveObjectsLocked("threads register data", "");
-  }
-  // Do all other liveness walking while all threads are stopped:
-  IgnoreNonThreadLiveObjectsLocked();
-  // Can now resume the threads:
-  TCMalloc_ResumeAllProcessThreads(num_threads, thread_pids);
-  thread_listing_status = CALLBACK_COMPLETED;
-  return failures;
-}
-
-// Stack top of the thread that is doing the current leak check
-// (protected by our lock; IgnoreAllLiveObjectsLocked sets it)
-static const void* self_thread_stack_top;
-
-// static
-void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-  RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid);
-  // Register our own stack:
-
-  // Important that all stack ranges (including the one here)
-  // are known before we start looking at them
-  // in MakeDisabledLiveCallbackLocked:
-  RegisterStackLocked(self_thread_stack_top);
-  IgnoreLiveObjectsLocked("stack data", "");
-
-  // Make objects we were told to ignore live:
-  if (ignored_objects) {
-    for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin();
-         object != ignored_objects->end(); ++object) {
-      const void* ptr = AsPtr(object->first);
-      RAW_VLOG(11, "Ignored live object at %p of %" PRIuS " bytes",
-                  ptr, object->second);
-      live_objects->
-        push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP));
-      // we do this liveness check for ignored_objects before doing any
-      // live heap walking to make sure it does not fail needlessly:
-      size_t object_size;
-      if (!(heap_profile->FindAlloc(ptr, &object_size)  &&
-            object->second == object_size)) {
-        RAW_LOG(FATAL, "Object at %p of %" PRIuS " bytes from an"
-                       " IgnoreObject() has disappeared", ptr, object->second);
-      }
-    }
-    IgnoreLiveObjectsLocked("ignored objects", "");
-  }
-
-  // Treat objects that were allocated when a Disabler was live as
-  // roots.  I.e., if X was allocated while a Disabler was active,
-  // and Y is reachable from X, arrange that neither X nor Y are
-  // treated as leaks.
-  heap_profile->IterateAllocs(MakeIgnoredObjectsLiveCallbackLocked);
-  IgnoreLiveObjectsLocked("disabled objects", "");
-
-  // Make code-address-disabled objects live and ignored:
-  // This in particular makes all thread-specific data live
-  // because the basic data structure to hold pointers to thread-specific data
-  // is allocated from libpthreads and we have range-disabled that
-  // library code with UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS);
-  // so now we declare all thread-specific data reachable from there as live.
-  heap_profile->IterateAllocs(MakeDisabledLiveCallbackLocked);
-  IgnoreLiveObjectsLocked("disabled code", "");
-
-  // Actually make global data live:
-  if (FLAGS_heap_check_ignore_global_live) {
-    bool have_null_region_callers = false;
-    for (LibraryLiveObjectsStacks::iterator l = library_live_objects->begin();
-         l != library_live_objects->end(); ++l) {
-      RAW_CHECK(live_objects->empty(), "");
-      // Process library_live_objects in l->second
-      // filtering them by MemoryRegionMap:
-      // It's safe to iterate over MemoryRegionMap
-      // w/o locks here as we are inside MemoryRegionMap::Lock():
-      RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-      // The only change to MemoryRegionMap possible in this loop
-      // is region addition as a result of allocating more memory
-      // for live_objects. This won't invalidate the RegionIterator
-      // or the intent of the loop.
-      // --see the comment by MemoryRegionMap::BeginRegionLocked().
-      for (MemoryRegionMap::RegionIterator region =
-             MemoryRegionMap::BeginRegionLocked();
-           region != MemoryRegionMap::EndRegionLocked(); ++region) {
-        // "region" from MemoryRegionMap is to be subtracted from
-        // (tentatively live) regions in l->second
-        // if it has a stack inside or it was allocated by
-        // a non-special caller (not one covered by a range
-        // in global_region_caller_ranges).
-        // This will in particular exclude all memory chunks used
-        // by the heap itself as well as what's been allocated with
-        // any allocator on top of mmap.
-        bool subtract = true;
-        if (!region->is_stack  &&  global_region_caller_ranges) {
-          if (region->caller() == static_cast<uintptr_t>(NULL)) {
-            have_null_region_callers = true;
-          } else {
-            GlobalRegionCallerRangeMap::const_iterator iter
-              = global_region_caller_ranges->upper_bound(region->caller());
-            if (iter != global_region_caller_ranges->end()) {
-              RAW_DCHECK(iter->first > region->caller(), "");
-              if (iter->second < region->caller()) {  // in special region
-                subtract = false;
-              }
-            }
-          }
-        }
-        if (subtract) {
-          // The loop puts the result of filtering l->second into live_objects:
-          for (LiveObjectsStack::const_iterator i = l->second.begin();
-               i != l->second.end(); ++i) {
-            // subtract *region from *i
-            uintptr_t start = AsInt(i->ptr);
-            uintptr_t end = start + i->size;
-            if (region->start_addr <= start  &&  end <= region->end_addr) {
-              // full deletion due to subsumption
-            } else if (start < region->start_addr  &&
-                       region->end_addr < end) {  // cutting-out split
-              live_objects->push_back(AllocObject(i->ptr,
-                                                  region->start_addr - start,
-                                                  IN_GLOBAL_DATA));
-              live_objects->push_back(AllocObject(AsPtr(region->end_addr),
-                                                  end - region->end_addr,
-                                                  IN_GLOBAL_DATA));
-            } else if (region->end_addr > start  &&
-                       region->start_addr <= start) {  // cut from start
-              live_objects->push_back(AllocObject(AsPtr(region->end_addr),
-                                                  end - region->end_addr,
-                                                  IN_GLOBAL_DATA));
-            } else if (region->start_addr > start  &&
-                       region->start_addr < end) {  // cut from end
-              live_objects->push_back(AllocObject(i->ptr,
-                                                  region->start_addr - start,
-                                                  IN_GLOBAL_DATA));
-            } else {  // pass: no intersection
-              live_objects->push_back(AllocObject(i->ptr, i->size,
-                                                  IN_GLOBAL_DATA));
-            }
-          }
-          // Move live_objects back into l->second
-          // for filtering by the next region.
-          live_objects->swap(l->second);
-          live_objects->clear();
-        }
-      }
-      // Now get and use live_objects from the final version of l->second:
-      if (VLOG_IS_ON(11)) {
-        for (LiveObjectsStack::const_iterator i = l->second.begin();
-             i != l->second.end(); ++i) {
-          RAW_VLOG(11, "Library live region at %p of %" PRIuPTR " bytes",
-                      i->ptr, i->size);
-        }
-      }
-      live_objects->swap(l->second);
-      IgnoreLiveObjectsLocked("in globals of\n  ", l->first.c_str());
-    }
-    if (have_null_region_callers) {
-      RAW_LOG(ERROR, "Have memory regions w/o callers: "
-                     "might report false leaks");
-    }
-    Allocator::DeleteAndNull(&library_live_objects);
-  }
-}
-
-// Callback for TCMalloc_ListAllProcessThreads in IgnoreAllLiveObjectsLocked below
-// to test/verify that we have just the one main thread, in which case
-// we can do everything in that main thread,
-// so that CPU profiler can collect all its samples.
-// Returns the number of threads in the process.
-static int IsOneThread(void* parameter, int num_threads,
-                       pid_t* thread_pids, va_list ap) {
-  if (num_threads != 1) {
-    RAW_LOG(WARNING, "Have threads: Won't CPU-profile the bulk of leak "
-                     "checking work happening in IgnoreLiveThreadsLocked!");
-  }
-  TCMalloc_ResumeAllProcessThreads(num_threads, thread_pids);
-  return num_threads;
-}
-
-// Dummy for IgnoreAllLiveObjectsLocked below.
-// Making it global helps with compiler warnings.
-static va_list dummy_ap;
-
-// static
-void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_CHECK(live_objects == NULL, "");
-  live_objects = new(Allocator::Allocate(sizeof(LiveObjectsStack)))
-                   LiveObjectsStack;
-  stack_tops = new(Allocator::Allocate(sizeof(StackTopSet))) StackTopSet;
-  // reset the counts
-  live_objects_total = 0;
-  live_bytes_total = 0;
-  // Reduce max_heap_object_size to FLAGS_heap_check_max_pointer_offset
-  // for the time of leak check.
-  // FLAGS_heap_check_max_pointer_offset caps max_heap_object_size
-  // to manage reasonably low chances of random bytes
-  // appearing to be pointing into large actually leaked heap objects.
-  const size_t old_max_heap_object_size = max_heap_object_size;
-  max_heap_object_size = (
-    FLAGS_heap_check_max_pointer_offset != -1
-    ? min(size_t(FLAGS_heap_check_max_pointer_offset), max_heap_object_size)
-    : max_heap_object_size);
-  // Record global data as live:
-  if (FLAGS_heap_check_ignore_global_live) {
-    library_live_objects =
-      new(Allocator::Allocate(sizeof(LibraryLiveObjectsStacks)))
-        LibraryLiveObjectsStacks;
-  }
-  // Ignore all thread stacks:
-  thread_listing_status = CALLBACK_NOT_STARTED;
-  bool need_to_ignore_non_thread_objects = true;
-  self_thread_pid = getpid();
-  self_thread_stack_top = self_stack_top;
-  if (FLAGS_heap_check_ignore_thread_live) {
-    // In case we are doing CPU profiling we'd like to do all the work
-    // in the main thread, not in the special thread created by
-    // TCMalloc_ListAllProcessThreads, so that CPU profiler can
-    // collect all its samples.  The machinery of
-    // TCMalloc_ListAllProcessThreads conflicts with the CPU profiler
-    // by also relying on signals and ::sigaction.  We can do this
-    // (run everything in the main thread) safely only if there's just
-    // the main thread itself in our process.  This variable reflects
-    // these two conditions:
-    bool want_and_can_run_in_main_thread =
-      ProfilingIsEnabledForAllThreads()  &&
-      TCMalloc_ListAllProcessThreads(NULL, IsOneThread) == 1;
-    // When the normal path of TCMalloc_ListAllProcessThreads below is taken,
-    // we fully suspend the threads right here before any liveness checking
-    // and keep them suspended for the whole time of liveness checking
-    // inside of the IgnoreLiveThreadsLocked callback.
-    // (The threads can't (de)allocate due to lock on the delete hook but
-    //  if not suspended they could still mess with the pointer
-    //  graph while we walk it).
-    int r = want_and_can_run_in_main_thread
-            ? IgnoreLiveThreadsLocked(NULL, 1, &self_thread_pid, dummy_ap)
-            : TCMalloc_ListAllProcessThreads(NULL, IgnoreLiveThreadsLocked);
-    need_to_ignore_non_thread_objects = r < 0;
-    if (r < 0) {
-      RAW_LOG(WARNING, "Thread finding failed with %d errno=%d", r, errno);
-      if (thread_listing_status == CALLBACK_COMPLETED) {
-        RAW_LOG(INFO, "Thread finding callback "
-                      "finished ok; hopefully everything is fine");
-        need_to_ignore_non_thread_objects = false;
-      } else if (thread_listing_status == CALLBACK_STARTED) {
-        RAW_LOG(FATAL, "Thread finding callback was "
-                       "interrupted or crashed; can't fix this");
-      } else {  // CALLBACK_NOT_STARTED
-        RAW_LOG(ERROR, "Could not find thread stacks. "
-                       "Will likely report false leak positives.");
-      }
-    } else if (r != 0) {
-      RAW_LOG(ERROR, "Thread stacks not found for %d threads. "
-                     "Will likely report false leak positives.", r);
-    } else {
-      RAW_VLOG(11, "Thread stacks appear to be found for all threads");
-    }
-  } else {
-    RAW_LOG(WARNING, "Not looking for thread stacks; "
-                     "objects reachable only from there "
-                     "will be reported as leaks");
-  }
-  // Do all other live data ignoring here if we did not do it
-  // within thread listing callback with all threads stopped.
-  if (need_to_ignore_non_thread_objects) {
-    if (FLAGS_heap_check_ignore_global_live) {
-      UseProcMapsLocked(RECORD_GLOBAL_DATA);
-    }
-    IgnoreNonThreadLiveObjectsLocked();
-  }
-  if (live_objects_total) {
-    RAW_VLOG(10, "Ignoring %" PRId64 " reachable objects of %" PRId64 " bytes",
-                live_objects_total, live_bytes_total);
-  }
-  // Free these: we made them here and heap_profile never saw them
-  Allocator::DeleteAndNull(&live_objects);
-  Allocator::DeleteAndNull(&stack_tops);
-  max_heap_object_size = old_max_heap_object_size;  // reset this var
-}
-
-// Alignment at which we should consider pointer positions
-// in IgnoreLiveObjectsLocked. Will normally use the value of
-// FLAGS_heap_check_pointer_source_alignment.
-static size_t pointer_source_alignment = kPointerSourceAlignment;
-// Global lock for HeapLeakChecker::DoNoLeaks
-// to protect pointer_source_alignment.
-static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
-
-// This function changes the live bits in the heap_profile-table's state:
-// we only record the live objects to be skipped.
-//
-// When checking if a byte sequence points to a heap object we use
-// HeapProfileTable::FindInsideAlloc to handle both pointers to
-// the start and inside of heap-allocated objects.
-// The "inside" case needs to be checked to support
-// at least the following relatively common cases:
-// - C++ arrays allocated with new FooClass[size] for classes
-//   with destructors have their size recorded in a sizeof(int) field
-//   before the place normal pointers point to.
-// - basic_string<>-s for e.g. the C++ library of gcc 3.4
-//   have the meta-info in basic_string<...>::_Rep recorded
-//   before the place normal pointers point to.
-// - Multiple-inherited objects have their pointers when cast to
-//   different base classes pointing inside of the actually
-//   allocated object.
-// - Sometimes reachability pointers point to member objects of heap objects,
-//   and then those member objects point to the full heap object.
-// - Third party UnicodeString: it stores a 32-bit refcount
-//   (in both 32-bit and 64-bit binaries) as the first uint32
-//   in the allocated memory and a normal pointer points at
-//   the second uint32 behind the refcount.
-// By finding these additional objects here
-// we slightly increase the chance to mistake random memory bytes
-// for a pointer and miss a leak in a particular run of a binary.
-//
-/*static*/ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
-                                                         const char* name2) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  int64 live_object_count = 0;
-  int64 live_byte_count = 0;
-  while (!live_objects->empty()) {
-    const char* object =
-      reinterpret_cast<const char*>(live_objects->back().ptr);
-    size_t size = live_objects->back().size;
-    const ObjectPlacement place = live_objects->back().place;
-    live_objects->pop_back();
-    if (place == MUST_BE_ON_HEAP  &&  heap_profile->MarkAsLive(object)) {
-      live_object_count += 1;
-      live_byte_count += size;
-    }
-    RAW_VLOG(13, "Looking for heap pointers in %p of %" PRIuS " bytes",
-                object, size);
-    const char* const whole_object = object;
-    size_t const whole_size = size;
-    // Try interpretting any byte sequence in object,size as a heap pointer:
-    const size_t remainder = AsInt(object) % pointer_source_alignment;
-    if (remainder) {
-      object += pointer_source_alignment - remainder;
-      if (size >= pointer_source_alignment - remainder) {
-        size -= pointer_source_alignment - remainder;
-      } else {
-        size = 0;
-      }
-    }
-    if (size < sizeof(void*)) continue;
-
-#ifdef NO_FRAME_POINTER
-    // Frame pointer omission requires us to use libunwind, which uses direct
-    // mmap and munmap system calls, and that needs special handling.
-    if (name2 == kUnnamedProcSelfMapEntry) {
-      static const uintptr_t page_mask = ~(getpagesize() - 1);
-      const uintptr_t addr = reinterpret_cast<uintptr_t>(object);
-      if ((addr & page_mask) == 0 && (size & page_mask) == 0) {
-        // This is an object we slurped from /proc/self/maps.
-        // It may or may not be readable at this point.
-        //
-        // In case all the above conditions made a mistake, and the object is
-        // not related to libunwind, we also verify that it's not readable
-        // before ignoring it.
-        if (msync(const_cast<char*>(object), size, MS_ASYNC) != 0) {
-          // Skip unreadable object, so we don't crash trying to sweep it.
-          RAW_VLOG(0, "Ignoring inaccessible object [%p, %p) "
-                   "(msync error %d (%s))",
-                   object, object + size, errno, strerror(errno));
-          continue;
-        }
-      }
-    }
-#endif
-
-    const char* const max_object = object + size - sizeof(void*);
-    while (object <= max_object) {
-      // potentially unaligned load:
-      const uintptr_t addr = *reinterpret_cast<const uintptr_t*>(object);
-      // Do fast check before the more expensive HaveOnHeapLocked lookup:
-      // this code runs for all memory words that are potentially pointers:
-      const bool can_be_on_heap =
-        // Order tests by the likelyhood of the test failing in 64/32 bit modes.
-        // Yes, this matters: we either lose 5..6% speed in 32 bit mode
-        // (which is already slower) or by a factor of 1.5..1.91 in 64 bit mode.
-        // After the alignment test got dropped the above performance figures
-        // must have changed; might need to revisit this.
-#if defined(__x86_64__)
-        addr <= max_heap_address  &&  // <= is for 0-sized object with max addr
-        min_heap_address <= addr;
-#else
-        min_heap_address <= addr  &&
-        addr <= max_heap_address;  // <= is for 0-sized object with max addr
-#endif
-      if (can_be_on_heap) {
-        const void* ptr = reinterpret_cast<const void*>(addr);
-        // Too expensive (inner loop): manually uncomment when debugging:
-        // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object);
-        size_t object_size;
-        if (HaveOnHeapLocked(&ptr, &object_size)  &&
-            heap_profile->MarkAsLive(ptr)) {
-          // We take the (hopefully low) risk here of encountering by accident
-          // a byte sequence in memory that matches an address of
-          // a heap object which is in fact leaked.
-          // I.e. in very rare and probably not repeatable/lasting cases
-          // we might miss some real heap memory leaks.
-          RAW_VLOG(14, "Found pointer to %p of %" PRIuS " bytes at %p "
-                      "inside %p of size %" PRIuS "",
-                      ptr, object_size, object, whole_object, whole_size);
-          if (VLOG_IS_ON(15)) {
-            // log call stacks to help debug how come something is not a leak
-            HeapProfileTable::AllocInfo alloc;
-            if (!heap_profile->FindAllocDetails(ptr, &alloc)) {
-              RAW_LOG(FATAL, "FindAllocDetails failed on ptr %p", ptr);
-            }
-            RAW_LOG(INFO, "New live %p object's alloc stack:", ptr);
-            for (int i = 0; i < alloc.stack_depth; ++i) {
-              RAW_LOG(INFO, "  @ %p", alloc.call_stack[i]);
-            }
-          }
-          live_object_count += 1;
-          live_byte_count += object_size;
-          live_objects->push_back(AllocObject(ptr, object_size,
-                                              IGNORED_ON_HEAP));
-        }
-      }
-      object += pointer_source_alignment;
-    }
-  }
-  live_objects_total += live_object_count;
-  live_bytes_total += live_byte_count;
-  if (live_object_count) {
-    RAW_VLOG(10, "Removed %" PRId64 " live heap objects of %" PRId64 " bytes: %s%s",
-                live_object_count, live_byte_count, name, name2);
-  }
-}
-
-//----------------------------------------------------------------------
-// HeapLeakChecker leak check disabling components
-//----------------------------------------------------------------------
-
-// static
-void HeapLeakChecker::DisableChecksIn(const char* pattern) {
-  RAW_LOG(WARNING, "DisableChecksIn(%s) is ignored", pattern);
-}
-
-// static
-void HeapLeakChecker::DoIgnoreObject(const void* ptr) {
-  SpinLockHolder l(&heap_checker_lock);
-  if (!heap_checker_on) return;
-  size_t object_size;
-  if (!HaveOnHeapLocked(&ptr, &object_size)) {
-    RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr);
-  } else {
-    RAW_VLOG(10, "Going to ignore live object at %p of %" PRIuS " bytes",
-                ptr, object_size);
-    if (ignored_objects == NULL)  {
-      ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap)))
-                          IgnoredObjectsMap;
-    }
-    if (!ignored_objects->insert(make_pair(AsInt(ptr), object_size)).second) {
-      RAW_LOG(WARNING, "Object at %p is already being ignored", ptr);
-    }
-  }
-}
-
-// static
-void HeapLeakChecker::UnIgnoreObject(const void* ptr) {
-  SpinLockHolder l(&heap_checker_lock);
-  if (!heap_checker_on) return;
-  size_t object_size;
-  if (!HaveOnHeapLocked(&ptr, &object_size)) {
-    RAW_LOG(FATAL, "No live heap object at %p to un-ignore", ptr);
-  } else {
-    bool found = false;
-    if (ignored_objects) {
-      IgnoredObjectsMap::iterator object = ignored_objects->find(AsInt(ptr));
-      if (object != ignored_objects->end()  &&  object_size == object->second) {
-        ignored_objects->erase(object);
-        found = true;
-        RAW_VLOG(10, "Now not going to ignore live object "
-                    "at %p of %" PRIuS " bytes", ptr, object_size);
-      }
-    }
-    if (!found)  RAW_LOG(FATAL, "Object at %p has not been ignored", ptr);
-  }
-}
-
-//----------------------------------------------------------------------
-// HeapLeakChecker non-static functions
-//----------------------------------------------------------------------
-
-char* HeapLeakChecker::MakeProfileNameLocked() {
-  RAW_DCHECK(lock_->IsHeld(), "");
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  const int len = profile_name_prefix->size() + strlen(name_) + 5 +
-                  strlen(HeapProfileTable::kFileExt) + 1;
-  char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len));
-  snprintf(file_name, len, "%s.%s-end%s",
-           profile_name_prefix->c_str(), name_,
-           HeapProfileTable::kFileExt);
-  return file_name;
-}
-
-void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) {
-  SpinLockHolder l(lock_);
-  name_ = NULL;  // checker is inactive
-  start_snapshot_ = NULL;
-  has_checked_ = false;
-  inuse_bytes_increase_ = 0;
-  inuse_allocs_increase_ = 0;
-  keep_profiles_ = false;
-  char* n = new char[strlen(name) + 1];   // do this before we lock
-  IgnoreObject(n);  // otherwise it might be treated as live due to our stack
-  { // Heap activity in other threads is paused for this whole scope.
-    SpinLockHolder al(&alignment_checker_lock);
-    SpinLockHolder hl(&heap_checker_lock);
-    MemoryRegionMap::LockHolder ml;
-    if (heap_checker_on  &&  profile_name_prefix != NULL) {
-      RAW_DCHECK(strchr(name, '/') == NULL, "must be a simple name");
-      memcpy(n, name, strlen(name) + 1);
-      name_ = n;  // checker is active
-      if (make_start_snapshot) {
-        start_snapshot_ = heap_profile->TakeSnapshot();
-      }
-
-      const HeapProfileTable::Stats& t = heap_profile->total();
-      const size_t start_inuse_bytes = t.alloc_size - t.free_size;
-      const size_t start_inuse_allocs = t.allocs - t.frees;
-      RAW_VLOG(10, "Start check \"%s\" profile: %" PRIuS " bytes "
-               "in %" PRIuS " objects",
-               name_, start_inuse_bytes, start_inuse_allocs);
-    } else {
-      RAW_LOG(WARNING, "Heap checker is not active, "
-                       "hence checker \"%s\" will do nothing!", name);
-    RAW_LOG(WARNING, "To activate set the HEAPCHECK environment variable.\n");
-    }
-  }
-  if (name_ == NULL) {
-    UnIgnoreObject(n);
-    delete[] n;  // must be done after we unlock
-  }
-}
-
-HeapLeakChecker::HeapLeakChecker(const char *name) : lock_(new SpinLock) {
-  RAW_DCHECK(strcmp(name, "_main_") != 0, "_main_ is reserved");
-  Create(name, true/*create start_snapshot_*/);
-}
-
-HeapLeakChecker::HeapLeakChecker() : lock_(new SpinLock) {
-  if (FLAGS_heap_check_before_constructors) {
-    // We want to check for leaks of objects allocated during global
-    // constructors (i.e., objects allocated already).  So we do not
-    // create a baseline snapshot and hence check for leaks of objects
-    // that may have already been created.
-    Create("_main_", false);
-  } else {
-    // We want to ignore leaks of objects allocated during global
-    // constructors (i.e., objects allocated already).  So we snapshot
-    // the current heap contents and use them as a baseline that is
-    // not reported by the leak checker.
-    Create("_main_", true);
-  }
-}
-
-ssize_t HeapLeakChecker::BytesLeaked() const {
-  SpinLockHolder l(lock_);
-  if (!has_checked_) {
-    RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call");
-  }
-  return inuse_bytes_increase_;
-}
-
-ssize_t HeapLeakChecker::ObjectsLeaked() const {
-  SpinLockHolder l(lock_);
-  if (!has_checked_) {
-    RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call");
-  }
-  return inuse_allocs_increase_;
-}
-
-// Save pid of main thread for using in naming dump files
-static int32 main_thread_pid = getpid();
-#ifdef HAVE_PROGRAM_INVOCATION_NAME
-#ifdef __UCLIBC__
-extern const char* program_invocation_name;
-extern const char* program_invocation_short_name;
-#else
-extern char* program_invocation_name;
-extern char* program_invocation_short_name;
-#endif
-static const char* invocation_name() { return program_invocation_short_name; }
-static string invocation_path() { return program_invocation_name; }
-#else
-static const char* invocation_name() { return "<your binary>"; }
-static string invocation_path() { return "<your binary>"; }
-#endif
-
-// Prints commands that users can run to get more information
-// about the reported leaks.
-static void SuggestPprofCommand(const char* pprof_file_arg) {
-  // Extra help information to print for the user when the test is
-  // being run in a way where the straightforward pprof command will
-  // not suffice.
-  string extra_help;
-
-  // Common header info to print for remote runs
-  const string remote_header =
-      "This program is being executed remotely and therefore the pprof\n"
-      "command printed above will not work.  Either run this program\n"
-      "locally, or adjust the pprof command as follows to allow it to\n"
-      "work on your local machine:\n";
-
-  // Extra command for fetching remote data
-  string fetch_cmd;
-
-  RAW_LOG(WARNING,
-          "\n\n"
-          "If the preceding stack traces are not enough to find "
-          "the leaks, try running THIS shell command:\n\n"
-          "%s%s %s \"%s\" --inuse_objects --lines --heapcheck "
-          " --edgefraction=1e-10 --nodefraction=1e-10 --gv\n"
-          "\n"
-          "%s"
-          "If you are still puzzled about why the leaks are "
-          "there, try rerunning this program with "
-          "HEAP_CHECK_TEST_POINTER_ALIGNMENT=1 and/or with "
-          "HEAP_CHECK_MAX_POINTER_OFFSET=-1\n"
-          "If the leak report occurs in a small fraction of runs, "
-          "try running with TCMALLOC_MAX_FREE_QUEUE_SIZE of few hundred MB "
-          "or with TCMALLOC_RECLAIM_MEMORY=false, "  // only works for debugalloc
-          "it might help find leaks more repeatably\n",
-          fetch_cmd.c_str(),
-          "pprof",           // works as long as pprof is on your path
-          invocation_path().c_str(),
-          pprof_file_arg,
-          extra_help.c_str()
-          );
-}
-
-bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
-  SpinLockHolder l(lock_);
-  // The locking also helps us keep the messages
-  // for the two checks close together.
-  SpinLockHolder al(&alignment_checker_lock);
-
-  // thread-safe: protected by alignment_checker_lock
-  static bool have_disabled_hooks_for_symbolize = false;
-  // Once we've checked for leaks and symbolized the results once, it's
-  // not safe to do it again.  This is because in order to symbolize
-  // safely, we had to disable all the malloc hooks here, so we no
-  // longer can be confident we've collected all the data we need.
-  if (have_disabled_hooks_for_symbolize) {
-    RAW_LOG(FATAL, "Must not call heap leak checker manually after "
-            " program-exit's automatic check.");
-  }
-
-  HeapProfileTable::Snapshot* leaks = NULL;
-  char* pprof_file = NULL;
-
-  {
-    // Heap activity in other threads is paused during this function
-    // (i.e. until we got all profile difference info).
-    SpinLockHolder hl(&heap_checker_lock);
-    if (heap_checker_on == false) {
-      if (name_ != NULL) {  // leak checking enabled when created the checker
-        RAW_LOG(WARNING, "Heap leak checker got turned off after checker "
-                "\"%s\" has been created, no leak check is being done for it!",
-                name_);
-      }
-      return true;
-    }
-
-    // Update global_region_caller_ranges. They may need to change since
-    // e.g. initialization because shared libraries might have been loaded or
-    // unloaded.
-    Allocator::DeleteAndNullIfNot(&global_region_caller_ranges);
-    ProcMapsResult pm_result = UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS);
-    RAW_CHECK(pm_result == PROC_MAPS_USED, "");
-
-    // Keep track of number of internally allocated objects so we
-    // can detect leaks in the heap-leak-checket itself
-    const int initial_allocs = Allocator::alloc_count();
-
-    if (name_ == NULL) {
-      RAW_LOG(FATAL, "Heap leak checker must not be turned on "
-              "after construction of a HeapLeakChecker");
-    }
-
-    MemoryRegionMap::LockHolder ml;
-    int a_local_var;  // Use our stack ptr to make stack data live:
-
-    // Make the heap profile, other threads are locked out.
-    HeapProfileTable::Snapshot* base =
-        reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_);
-    RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, "");
-    pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment;
-    IgnoreAllLiveObjectsLocked(&a_local_var);
-    leaks = heap_profile->NonLiveSnapshot(base);
-
-    inuse_bytes_increase_ = static_cast<ssize_t>(leaks->total().alloc_size);
-    inuse_allocs_increase_ = static_cast<ssize_t>(leaks->total().allocs);
-    if (leaks->Empty()) {
-      heap_profile->ReleaseSnapshot(leaks);
-      leaks = NULL;
-
-      // We can only check for internal leaks along the no-user-leak
-      // path since in the leak path we temporarily release
-      // heap_checker_lock and another thread can come in and disturb
-      // allocation counts.
-      if (Allocator::alloc_count() != initial_allocs) {
-        RAW_LOG(FATAL, "Internal HeapChecker leak of %d objects ; %d -> %d",
-                Allocator::alloc_count() - initial_allocs,
-                initial_allocs, Allocator::alloc_count());
-      }
-    } else if (FLAGS_heap_check_test_pointer_alignment) {
-      if (pointer_source_alignment == 1) {
-        RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: "
-                "--heap_check_pointer_source_alignment was already set to 1");
-      } else {
-        // Try with reduced pointer aligment
-        pointer_source_alignment = 1;
-        IgnoreAllLiveObjectsLocked(&a_local_var);
-        HeapProfileTable::Snapshot* leaks_wo_align =
-            heap_profile->NonLiveSnapshot(base);
-        pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment;
-        if (leaks_wo_align->Empty()) {
-          RAW_LOG(WARNING, "Found no leaks without pointer alignment: "
-                  "something might be placing pointers at "
-                  "unaligned addresses! This needs to be fixed.");
-        } else {
-          RAW_LOG(INFO, "Found leaks without pointer alignment as well: "
-                  "unaligned pointers must not be the cause of leaks.");
-          RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help "
-                  "to diagnose the leaks.");
-        }
-        heap_profile->ReleaseSnapshot(leaks_wo_align);
-      }
-    }
-
-    if (leaks != NULL) {
-      pprof_file = MakeProfileNameLocked();
-    }
-  }
-
-  has_checked_ = true;
-  if (leaks == NULL) {
-    if (FLAGS_heap_check_max_pointer_offset == -1) {
-      RAW_LOG(WARNING,
-              "Found no leaks without max_pointer_offset restriction: "
-              "it's possible that the default value of "
-              "heap_check_max_pointer_offset flag is too low. "
-              "Do you use pointers with larger than that offsets "
-              "pointing in the middle of heap-allocated objects?");
-    }
-    const HeapProfileTable::Stats& stats = heap_profile->total();
-    RAW_VLOG(heap_checker_info_level,
-             "No leaks found for check \"%s\" "
-             "(but no 100%% guarantee that there aren't any): "
-             "found %" PRId64 " reachable heap objects of %" PRId64 " bytes",
-             name_,
-             int64(stats.allocs - stats.frees),
-             int64(stats.alloc_size - stats.free_size));
-  } else {
-    if (should_symbolize == SYMBOLIZE) {
-      // To turn addresses into symbols, we need to fork, which is a
-      // problem if both parent and child end up trying to call the
-      // same malloc-hooks we've set up, at the same time.  To avoid
-      // trouble, we turn off the hooks before symbolizing.  Note that
-      // this makes it unsafe to ever leak-report again!  Luckily, we
-      // typically only want to report once in a program's run, at the
-      // very end.
-      if (MallocHook::GetNewHook() == NewHook)
-        MallocHook::SetNewHook(NULL);
-      if (MallocHook::GetDeleteHook() == DeleteHook)
-        MallocHook::SetDeleteHook(NULL);
-      MemoryRegionMap::Shutdown();
-      // Make sure all the hooks really got unset:
-      RAW_CHECK(MallocHook::GetNewHook() == NULL, "");
-      RAW_CHECK(MallocHook::GetDeleteHook() == NULL, "");
-      RAW_CHECK(MallocHook::GetMmapHook() == NULL, "");
-      RAW_CHECK(MallocHook::GetSbrkHook() == NULL, "");
-      have_disabled_hooks_for_symbolize = true;
-      leaks->ReportLeaks(name_, pprof_file, true);  // true = should_symbolize
-    } else {
-      leaks->ReportLeaks(name_, pprof_file, false);
-    }
-    if (FLAGS_heap_check_identify_leaks) {
-      leaks->ReportIndividualObjects();
-    }
-
-    SuggestPprofCommand(pprof_file);
-
-    {
-      SpinLockHolder hl(&heap_checker_lock);
-      heap_profile->ReleaseSnapshot(leaks);
-      Allocator::Free(pprof_file);
-    }
-  }
-
-  return (leaks == NULL);
-}
-
-HeapLeakChecker::~HeapLeakChecker() {
-  if (name_ != NULL) {  // had leak checking enabled when created the checker
-    if (!has_checked_) {
-      RAW_LOG(FATAL, "Some *NoLeaks|SameHeap method"
-                     " must be called on any created HeapLeakChecker");
-    }
-
-    // Deallocate any snapshot taken at start
-    if (start_snapshot_ != NULL) {
-      SpinLockHolder l(&heap_checker_lock);
-      heap_profile->ReleaseSnapshot(
-          reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_));
-    }
-
-    UnIgnoreObject(name_);
-    delete[] name_;
-    name_ = NULL;
-  }
-  delete lock_;
-}
-
-//----------------------------------------------------------------------
-// HeapLeakChecker overall heap check components
-//----------------------------------------------------------------------
-
-// static
-bool HeapLeakChecker::IsActive() {
-  SpinLockHolder l(&heap_checker_lock);
-  return heap_checker_on;
-}
-
-vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL;
-
-// When a HeapCleaner object is intialized, add its function to the static list
-// of cleaners to be run before leaks checking.
-HeapCleaner::HeapCleaner(void_function f) {
-  if (heap_cleanups_ == NULL)
-    heap_cleanups_ = new vector<HeapCleaner::void_function>;
-  heap_cleanups_->push_back(f);
-}
-
-// Run all of the cleanup functions and delete the vector.
-void HeapCleaner::RunHeapCleanups() {
-  if (!heap_cleanups_)
-    return;
-  for (int i = 0; i < heap_cleanups_->size(); i++) {
-    void (*f)(void) = (*heap_cleanups_)[i];
-    f();
-  }
-  delete heap_cleanups_;
-  heap_cleanups_ = NULL;
-}
-
-// Program exit heap cleanup registered as a module object destructor.
-// Will not get executed when we crash on a signal.
-//
-void HeapLeakChecker_RunHeapCleanups() {
-  if (FLAGS_heap_check == "local")   // don't check heap in this mode
-    return;
-  { SpinLockHolder l(&heap_checker_lock);
-    // can get here (via forks?) with other pids
-    if (heap_checker_pid != getpid()) return;
-  }
-  HeapCleaner::RunHeapCleanups();
-  if (!FLAGS_heap_check_after_destructors) HeapLeakChecker::DoMainHeapCheck();
-}
-
-static bool internal_init_start_has_run = false;
-
-// Called exactly once, before main() (but hopefully just before).
-// This picks a good unique name for the dumped leak checking heap profiles.
-//
-// Because we crash when InternalInitStart is called more than once,
-// it's fine that we hold heap_checker_lock only around pieces of
-// this function: this is still enough for thread-safety w.r.t. other functions
-// of this module.
-// We can't hold heap_checker_lock throughout because it would deadlock
-// on a memory allocation since our new/delete hooks can be on.
-//
-void HeapLeakChecker_InternalInitStart() {
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_CHECK(!internal_init_start_has_run,
-              "Heap-check constructor called twice.  Perhaps you both linked"
-              " in the heap checker, and also used LD_PRELOAD to load it?");
-    internal_init_start_has_run = true;
-
-#ifdef ADDRESS_SANITIZER
-    // AddressSanitizer's custom malloc conflicts with HeapChecker.
-    FLAGS_heap_check = "";
-#endif
-
-    if (FLAGS_heap_check.empty()) {
-      // turns out we do not need checking in the end; can stop profiling
-      HeapLeakChecker::TurnItselfOffLocked();
-      return;
-    } else if (RunningOnValgrind()) {
-      // There is no point in trying -- we'll just fail.
-      RAW_LOG(WARNING, "Can't run under Valgrind; will turn itself off");
-      HeapLeakChecker::TurnItselfOffLocked();
-      return;
-    }
-  }
-
-  // Changing this to false can be useful when debugging heap-checker itself:
-  if (!FLAGS_heap_check_run_under_gdb && IsDebuggerAttached()) {
-    RAW_LOG(WARNING, "Someone is ptrace()ing us; will turn itself off");
-    SpinLockHolder l(&heap_checker_lock);
-    HeapLeakChecker::TurnItselfOffLocked();
-    return;
-  }
-
-  { SpinLockHolder l(&heap_checker_lock);
-    if (!constructor_heap_profiling) {
-      RAW_LOG(FATAL, "Can not start so late. You have to enable heap checking "
-	             "with HEAPCHECK=<mode>.");
-    }
-  }
-
-  // Set all flags
-  RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, "");
-  if (FLAGS_heap_check == "minimal") {
-    // The least we can check.
-    FLAGS_heap_check_before_constructors = false;  // from after main
-                                                   // (ignore more)
-    FLAGS_heap_check_after_destructors = false;  // to after cleanup
-                                                 // (most data is live)
-    FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
-    FLAGS_heap_check_ignore_global_live = true;  // ignore all live
-  } else if (FLAGS_heap_check == "normal") {
-    // Faster than 'minimal' and not much stricter.
-    FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
-    FLAGS_heap_check_after_destructors = false;  // to after cleanup
-                                                 // (most data is live)
-    FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
-    FLAGS_heap_check_ignore_global_live = true;  // ignore all live
-  } else if (FLAGS_heap_check == "strict") {
-    // A bit stricter than 'normal': global destructors must fully clean up
-    // after themselves if they are present.
-    FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
-    FLAGS_heap_check_after_destructors = true;  // to after destructors
-                                                // (less data live)
-    FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
-    FLAGS_heap_check_ignore_global_live = true;  // ignore all live
-  } else if (FLAGS_heap_check == "draconian") {
-    // Drop not very portable and not very exact live heap flooding.
-    FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
-    FLAGS_heap_check_after_destructors = true;  // to after destructors
-                                                // (need them)
-    FLAGS_heap_check_ignore_thread_live = false;  // no live flood (stricter)
-    FLAGS_heap_check_ignore_global_live = false;  // no live flood (stricter)
-  } else if (FLAGS_heap_check == "as-is") {
-    // do nothing: use other flags as is
-  } else if (FLAGS_heap_check == "local") {
-    // do nothing
-  } else {
-    RAW_LOG(FATAL, "Unsupported heap_check flag: %s",
-                   FLAGS_heap_check.c_str());
-  }
-  // FreeBSD doesn't seem to honor atexit execution order:
-  //    http://code.google.com/p/gperftools/issues/detail?id=375
-  // Since heap-checking before destructors depends on atexit running
-  // at the right time, on FreeBSD we always check after, even in the
-  // less strict modes.  This just means FreeBSD is always a bit
-  // stricter in its checking than other OSes.
-  // This now appears to be the case in other OSes as well;
-  // so always check afterwards.
-  FLAGS_heap_check_after_destructors = true;
-
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_DCHECK(heap_checker_pid == getpid(), "");
-    heap_checker_on = true;
-    RAW_DCHECK(heap_profile, "");
-    HeapLeakChecker::ProcMapsResult pm_result = HeapLeakChecker::UseProcMapsLocked(HeapLeakChecker::DISABLE_LIBRARY_ALLOCS);
-      // might neeed to do this more than once
-      // if one later dynamically loads libraries that we want disabled
-    if (pm_result != HeapLeakChecker::PROC_MAPS_USED) {  // can't function
-      HeapLeakChecker::TurnItselfOffLocked();
-      return;
-    }
-  }
-
-  // make a good place and name for heap profile leak dumps
-  string* profile_prefix =
-    new string(FLAGS_heap_check_dump_directory + "/" + invocation_name());
-
-  // Finalize prefix for dumping leak checking profiles.
-  const int32 our_pid = getpid();   // safest to call getpid() outside lock
-  { SpinLockHolder l(&heap_checker_lock);
-    // main_thread_pid might still be 0 if this function is being called before
-    // global constructors.  In that case, our pid *is* the main pid.
-    if (main_thread_pid == 0)
-      main_thread_pid = our_pid;
-  }
-  char pid_buf[15];
-  snprintf(pid_buf, sizeof(pid_buf), ".%d", main_thread_pid);
-  *profile_prefix += pid_buf;
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_DCHECK(profile_name_prefix == NULL, "");
-    profile_name_prefix = profile_prefix;
-  }
-
-  // Make sure new/delete hooks are installed properly
-  // and heap profiler is indeed able to keep track
-  // of the objects being allocated.
-  // We test this to make sure we are indeed checking for leaks.
-  char* test_str = new char[5];
-  size_t size;
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_CHECK(heap_profile->FindAlloc(test_str, &size),
-              "our own new/delete not linked?");
-  }
-  delete[] test_str;
-  { SpinLockHolder l(&heap_checker_lock);
-    // This check can fail when it should not if another thread allocates
-    // into this same spot right this moment,
-    // which is unlikely since this code runs in InitGoogle.
-    RAW_CHECK(!heap_profile->FindAlloc(test_str, &size),
-              "our own new/delete not linked?");
-  }
-  // If we crash in the above code, it probably means that
-  // "nm <this_binary> | grep new" will show that tcmalloc's new/delete
-  // implementation did not get linked-in into this binary
-  // (i.e. nm will list __builtin_new and __builtin_vec_new as undefined).
-  // If this happens, it is a BUILD bug to be fixed.
-
-  RAW_VLOG(heap_checker_info_level,
-           "WARNING: Perftools heap leak checker is active "
-           "-- Performance may suffer");
-
-  if (FLAGS_heap_check != "local") {
-    HeapLeakChecker* main_hc = new HeapLeakChecker();
-    SpinLockHolder l(&heap_checker_lock);
-    RAW_DCHECK(main_heap_checker == NULL,
-               "Repeated creation of main_heap_checker");
-    main_heap_checker = main_hc;
-    do_main_heap_check = true;
-  }
-
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_CHECK(heap_checker_on  &&  constructor_heap_profiling,
-              "Leak checking is expected to be fully turned on now");
-  }
-
-  // For binaries built in debug mode, this will set release queue of
-  // debugallocation.cc to 100M to make it less likely for real leaks to
-  // be hidden due to reuse of heap memory object addresses.
-  // Running a test with --malloc_reclaim_memory=0 would help find leaks even
-  // better, but the test might run out of memory as a result.
-  // The scenario is that a heap object at address X is allocated and freed,
-  // but some other data-structure still retains a pointer to X.
-  // Then the same heap memory is used for another object, which is leaked,
-  // but the leak is not noticed due to the pointer to the original object at X.
-  // TODO(csilvers): support this in some manner.
-#if 0
-  SetCommandLineOptionWithMode("max_free_queue_size", "104857600",  // 100M
-                               SET_FLAG_IF_DEFAULT);
-#endif
-}
-
-// We want this to run early as well, but not so early as
-// ::BeforeConstructors (we want flag assignments to have already
-// happened, for instance).  Initializer-registration does the trick.
-REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker_InternalInitStart());
-REGISTER_MODULE_DESTRUCTOR(init_start, HeapLeakChecker_RunHeapCleanups());
-
-// static
-bool HeapLeakChecker::NoGlobalLeaksMaybeSymbolize(
-    ShouldSymbolize should_symbolize) {
-  // we never delete or change main_heap_checker once it's set:
-  HeapLeakChecker* main_hc = GlobalChecker();
-  if (main_hc) {
-    RAW_VLOG(10, "Checking for whole-program memory leaks");
-    return main_hc->DoNoLeaks(should_symbolize);
-  }
-  return true;
-}
-
-// static
-bool HeapLeakChecker::DoMainHeapCheck() {
-  if (FLAGS_heap_check_delay_seconds > 0) {
-    sleep(FLAGS_heap_check_delay_seconds);
-  }
-  { SpinLockHolder l(&heap_checker_lock);
-    if (!do_main_heap_check) return false;
-    RAW_DCHECK(heap_checker_pid == getpid(), "");
-    do_main_heap_check = false;  // will do it now; no need to do it more
-  }
-
-  // The program is over, so it's safe to symbolize addresses (which
-  // requires a fork) because no serious work is expected to be done
-  // after this.  Symbolizing is really useful -- knowing what
-  // function has a leak is better than knowing just an address --
-  // and while we can only safely symbolize once in a program run,
-  // now is the time (after all, there's no "later" that would be better).
-  if (!NoGlobalLeaksMaybeSymbolize(SYMBOLIZE)) {
-    if (FLAGS_heap_check_identify_leaks) {
-      RAW_LOG(FATAL, "Whole-program memory leaks found.");
-    }
-    RAW_LOG(ERROR, "Exiting with error code (instead of crashing) "
-                   "because of whole-program memory leaks");
-    _exit(1);    // we don't want to call atexit() routines!
-  }
-  return true;
-}
-
-// static
-HeapLeakChecker* HeapLeakChecker::GlobalChecker() {
-  SpinLockHolder l(&heap_checker_lock);
-  return main_heap_checker;
-}
-
-// static
-bool HeapLeakChecker::NoGlobalLeaks() {
-  // symbolizing requires a fork, which isn't safe to do in general.
-  return NoGlobalLeaksMaybeSymbolize(DO_NOT_SYMBOLIZE);
-}
-
-// static
-void HeapLeakChecker::CancelGlobalCheck() {
-  SpinLockHolder l(&heap_checker_lock);
-  if (do_main_heap_check) {
-    RAW_VLOG(heap_checker_info_level,
-             "Canceling the automatic at-exit whole-program memory leak check");
-    do_main_heap_check = false;
-  }
-}
-
-// static
-void HeapLeakChecker::BeforeConstructorsLocked() {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_CHECK(!constructor_heap_profiling,
-            "BeforeConstructorsLocked called multiple times");
-#ifdef ADDRESS_SANITIZER
-  // AddressSanitizer's custom malloc conflicts with HeapChecker.
-  return;
-#endif
-  // Set hooks early to crash if 'new' gets called before we make heap_profile,
-  // and make sure no other hooks existed:
-  RAW_CHECK(MallocHook::AddNewHook(&NewHook), "");
-  RAW_CHECK(MallocHook::AddDeleteHook(&DeleteHook), "");
-  constructor_heap_profiling = true;
-  MemoryRegionMap::Init(1, /* use_buckets */ false);
-    // Set up MemoryRegionMap with (at least) one caller stack frame to record
-    // (important that it's done before HeapProfileTable creation below).
-  Allocator::Init();
-  RAW_CHECK(heap_profile == NULL, "");
-  heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable)))
-      HeapProfileTable(&Allocator::Allocate, &Allocator::Free,
-                       /* profile_mmap */ false);
-  RAW_VLOG(10, "Starting tracking the heap");
-  heap_checker_on = true;
-}
-
-// static
-void HeapLeakChecker::TurnItselfOffLocked() {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  // Set FLAGS_heap_check to "", for users who test for it
-  if (!FLAGS_heap_check.empty())  // be a noop in the common case
-    FLAGS_heap_check.clear();     // because clear() could allocate memory
-  if (constructor_heap_profiling) {
-    RAW_CHECK(heap_checker_on, "");
-    RAW_VLOG(heap_checker_info_level, "Turning perftools heap leak checking off");
-    heap_checker_on = false;
-    // Unset our hooks checking they were set:
-    RAW_CHECK(MallocHook::RemoveNewHook(&NewHook), "");
-    RAW_CHECK(MallocHook::RemoveDeleteHook(&DeleteHook), "");
-    Allocator::DeleteAndNull(&heap_profile);
-    // free our optional global data:
-    Allocator::DeleteAndNullIfNot(&ignored_objects);
-    Allocator::DeleteAndNullIfNot(&disabled_ranges);
-    Allocator::DeleteAndNullIfNot(&global_region_caller_ranges);
-    Allocator::Shutdown();
-    MemoryRegionMap::Shutdown();
-  }
-  RAW_CHECK(!heap_checker_on, "");
-}
-
-extern bool heap_leak_checker_bcad_variable;  // in heap-checker-bcad.cc
-
-static bool has_called_before_constructors = false;
-
-// TODO(maxim): inline this function with
-// MallocHook_InitAtFirstAllocation_HeapLeakChecker, and also rename
-// HeapLeakChecker::BeforeConstructorsLocked.
-void HeapLeakChecker_BeforeConstructors() {
-  SpinLockHolder l(&heap_checker_lock);
-  // We can be called from several places: the first mmap/sbrk/alloc call
-  // or the first global c-tor from heap-checker-bcad.cc:
-  // Do not re-execute initialization:
-  if (has_called_before_constructors) return;
-  has_called_before_constructors = true;
-
-  heap_checker_pid = getpid();  // set it always
-  heap_leak_checker_bcad_variable = true;
-  // just to reference it, so that heap-checker-bcad.o is linked in
-
-  // This function can be called *very* early, before the normal
-  // global-constructor that sets FLAGS_verbose.  Set it manually now,
-  // so the RAW_LOG messages here are controllable.
-  const char* verbose_str = GetenvBeforeMain("PERFTOOLS_VERBOSE");
-  if (verbose_str && atoi(verbose_str)) {  // different than the default of 0?
-    FLAGS_verbose = atoi(verbose_str);
-  }
-
-  bool need_heap_check = true;
-  // The user indicates a desire for heap-checking via the HEAPCHECK
-  // environment variable.  If it's not set, there's no way to do
-  // heap-checking.
-  if (!GetenvBeforeMain("HEAPCHECK")) {
-    need_heap_check = false;
-  }
-#ifdef HAVE_GETEUID
-  if (need_heap_check && getuid() != geteuid()) {
-    // heap-checker writes out files.  Thus, for security reasons, we don't
-    // recognize the env. var. to turn on heap-checking if we're setuid.
-    RAW_LOG(WARNING, ("HeapChecker: ignoring HEAPCHECK because "
-                      "program seems to be setuid\n"));
-    need_heap_check = false;
-  }
-#endif
-  if (need_heap_check) {
-    HeapLeakChecker::BeforeConstructorsLocked();
-  }
-}
-
-// This function overrides the weak function defined in malloc_hook.cc and
-// called by one of the initial malloc hooks (malloc_hook.cc) when the very
-// first memory allocation or an mmap/sbrk happens.  This ensures that
-// HeapLeakChecker is initialized and installs all its hooks early enough to
-// track absolutely all memory allocations and all memory region acquisitions
-// via mmap and sbrk.
-extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() {
-  HeapLeakChecker_BeforeConstructors();
-}
-
-// This function is executed after all global object destructors run.
-void HeapLeakChecker_AfterDestructors() {
-  { SpinLockHolder l(&heap_checker_lock);
-    // can get here (via forks?) with other pids
-    if (heap_checker_pid != getpid()) return;
-  }
-  if (FLAGS_heap_check_after_destructors) {
-    if (HeapLeakChecker::DoMainHeapCheck()) {
-      const struct timespec sleep_time = { 0, 500000000 };  // 500 ms
-      nanosleep(&sleep_time, NULL);
-        // Need this hack to wait for other pthreads to exit.
-        // Otherwise tcmalloc find errors
-        // on a free() call from pthreads.
-    }
-  }
-  SpinLockHolder l(&heap_checker_lock);
-  RAW_CHECK(!do_main_heap_check, "should have done it");
-}
-
-//----------------------------------------------------------------------
-// HeapLeakChecker disabling helpers
-//----------------------------------------------------------------------
-
-// These functions are at the end of the file to prevent their inlining:
-
-// static
-void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address,
-                                                const void* end_address,
-                                                int max_depth) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_DCHECK(start_address < end_address, "");
-  if (disabled_ranges == NULL) {
-    disabled_ranges = new(Allocator::Allocate(sizeof(DisabledRangeMap)))
-                        DisabledRangeMap;
-  }
-  RangeValue value;
-  value.start_address = AsInt(start_address);
-  value.max_depth = max_depth;
-  if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) {
-    RAW_VLOG(10, "Disabling leak checking in stack traces "
-                "under frame addresses between %p..%p",
-                start_address, end_address);
-  } else {  // check that this is just a verbatim repetition
-    RangeValue const& val = disabled_ranges->find(AsInt(end_address))->second;
-    if (val.max_depth != value.max_depth  ||
-        val.start_address != value.start_address) {
-      RAW_LOG(FATAL, "Two DisableChecksToHereFrom calls conflict: "
-                     "(%p, %p, %d) vs. (%p, %p, %d)",
-                     AsPtr(val.start_address), end_address, val.max_depth,
-                     start_address, end_address, max_depth);
-    }
-  }
-}
-
-// static
-inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr,
-                                              size_t* object_size) {
-  // Commented-out because HaveOnHeapLocked is very performance-critical:
-  // RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  const uintptr_t addr = AsInt(*ptr);
-  if (heap_profile->FindInsideAlloc(
-        *ptr, max_heap_object_size, ptr, object_size)) {
-    RAW_VLOG(16, "Got pointer into %p at +%" PRIuPTR " offset",
-             *ptr, addr - AsInt(*ptr));
-    return true;
-  }
-  return false;
-}
-
-// static
-const void* HeapLeakChecker::GetAllocCaller(void* ptr) {
-  // this is used only in the unittest, so the heavy checks are fine
-  HeapProfileTable::AllocInfo info;
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_CHECK(heap_profile->FindAllocDetails(ptr, &info), "");
-  }
-  RAW_CHECK(info.stack_depth >= 1, "");
-  return info.call_stack[0];
-}
diff --git a/contrib/libtcmalloc/src/heap-profile-stats.h b/contrib/libtcmalloc/src/heap-profile-stats.h
deleted file mode 100644
index ae45d5883fa..00000000000
--- a/contrib/libtcmalloc/src/heap-profile-stats.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2013, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This file defines structs to accumulate memory allocation and deallocation
-// counts.  These structs are commonly used for malloc (in HeapProfileTable)
-// and mmap (in MemoryRegionMap).
-
-// A bucket is data structure for heap profiling to store a pair of a stack
-// trace and counts of (de)allocation.  Buckets are stored in a hash table
-// which is declared as "HeapProfileBucket**".
-//
-// A hash value is computed from a stack trace.  Collision in the hash table
-// is resolved by separate chaining with linked lists.  The links in the list
-// are implemented with the member "HeapProfileBucket* next".
-//
-// A structure of a hash table HeapProfileBucket** bucket_table would be like:
-// bucket_table[0] => NULL
-// bucket_table[1] => HeapProfileBucket() => HeapProfileBucket() => NULL
-// ...
-// bucket_table[i] => HeapProfileBucket() => NULL
-// ...
-// bucket_table[n] => HeapProfileBucket() => NULL
-
-#ifndef HEAP_PROFILE_STATS_H_
-#define HEAP_PROFILE_STATS_H_
-
-struct HeapProfileStats {
-  // Returns true if the two HeapProfileStats are semantically equal.
-  bool Equivalent(const HeapProfileStats& other) const {
-    return allocs - frees == other.allocs - other.frees &&
-        alloc_size - free_size == other.alloc_size - other.free_size;
-  }
-
-  int32 allocs;      // Number of allocation calls.
-  int32 frees;       // Number of free calls.
-  int64 alloc_size;  // Total size of all allocated objects so far.
-  int64 free_size;   // Total size of all freed objects so far.
-};
-
-// Allocation and deallocation statistics per each stack trace.
-struct HeapProfileBucket : public HeapProfileStats {
-  // Longest stack trace we record.
-  static const int kMaxStackDepth = 32;
-
-  uintptr_t hash;           // Hash value of the stack trace.
-  int depth;                // Depth of stack trace.
-  const void** stack;       // Stack trace.
-  HeapProfileBucket* next;  // Next entry in hash-table.
-};
-
-#endif  // HEAP_PROFILE_STATS_H_
diff --git a/contrib/libtcmalloc/src/heap-profile-table.cc b/contrib/libtcmalloc/src/heap-profile-table.cc
deleted file mode 100644
index 7486468c056..00000000000
--- a/contrib/libtcmalloc/src/heap-profile-table.cc
+++ /dev/null
@@ -1,631 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2006, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//         Maxim Lifantsev (refactoring)
-//
-
-#include <config.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>   // for write()
-#endif
-#include <fcntl.h>    // for open()
-#ifdef HAVE_GLOB_H
-#include <glob.h>
-#ifndef GLOB_NOMATCH  // true on some old cygwins
-# define GLOB_NOMATCH 0
-#endif
-#endif
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h> // for PRIxPTR
-#endif
-#ifdef HAVE_POLL_H
-#include <poll.h>
-#endif
-#include <errno.h>
-#include <stdarg.h>
-#include <string>
-#include <map>
-#include <algorithm>  // for sort(), equal(), and copy()
-
-#include "heap-profile-table.h"
-
-#include "base/logging.h"
-#include "raw_printer.h"
-#include "symbolize.h"
-#include <gperftools/stacktrace.h>
-#include <gperftools/malloc_hook.h>
-#include "memory_region_map.h"
-#include "base/commandlineflags.h"
-#include "base/logging.h"    // for the RawFD I/O commands
-#include "base/sysinfo.h"
-
-using std::sort;
-using std::equal;
-using std::copy;
-using std::string;
-using std::map;
-
-using tcmalloc::FillProcSelfMaps;   // from sysinfo.h
-using tcmalloc::DumpProcSelfMaps;   // from sysinfo.h
-
-//----------------------------------------------------------------------
-
-DEFINE_bool(cleanup_old_heap_profiles,
-            EnvToBool("HEAP_PROFILE_CLEANUP", true),
-            "At initialization time, delete old heap profiles.");
-
-DEFINE_int32(heap_check_max_leaks,
-             EnvToInt("HEAP_CHECK_MAX_LEAKS", 20),
-             "The maximum number of leak reports to print.");
-
-//----------------------------------------------------------------------
-
-// header of the dumped heap profile
-static const char kProfileHeader[] = "heap profile: ";
-static const char kProcSelfMapsHeader[] = "\nMAPPED_LIBRARIES:\n";
-
-//----------------------------------------------------------------------
-
-const char HeapProfileTable::kFileExt[] = ".heap";
-
-//----------------------------------------------------------------------
-
-static const int kHashTableSize = 179999;   // Size for bucket_table_.
-/*static*/ const int HeapProfileTable::kMaxStackDepth;
-
-//----------------------------------------------------------------------
-
-// We strip out different number of stack frames in debug mode
-// because less inlining happens in that case
-#ifdef NDEBUG
-static const int kStripFrames = 2;
-#else
-static const int kStripFrames = 3;
-#endif
-
-// For sorting Stats or Buckets by in-use space
-static bool ByAllocatedSpace(HeapProfileTable::Stats* a,
-                             HeapProfileTable::Stats* b) {
-  // Return true iff "a" has more allocated space than "b"
-  return (a->alloc_size - a->free_size) > (b->alloc_size - b->free_size);
-}
-
-//----------------------------------------------------------------------
-
-HeapProfileTable::HeapProfileTable(Allocator alloc,
-                                   DeAllocator dealloc,
-                                   bool profile_mmap)
-    : alloc_(alloc),
-      dealloc_(dealloc),
-      profile_mmap_(profile_mmap),
-      bucket_table_(NULL),
-      num_buckets_(0),
-      address_map_(NULL) {
-  // Make a hash table for buckets.
-  const int table_bytes = kHashTableSize * sizeof(*bucket_table_);
-  bucket_table_ = static_cast<Bucket**>(alloc_(table_bytes));
-  memset(bucket_table_, 0, table_bytes);
-
-  // Make an allocation map.
-  address_map_ =
-      new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_);
-
-  // Initialize.
-  memset(&total_, 0, sizeof(total_));
-  num_buckets_ = 0;
-}
-
-HeapProfileTable::~HeapProfileTable() {
-  // Free the allocation map.
-  address_map_->~AllocationMap();
-  dealloc_(address_map_);
-  address_map_ = NULL;
-
-  // Free the hash table.
-  for (int i = 0; i < kHashTableSize; i++) {
-    for (Bucket* curr = bucket_table_[i]; curr != 0; /**/) {
-      Bucket* bucket = curr;
-      curr = curr->next;
-      dealloc_(bucket->stack);
-      dealloc_(bucket);
-    }
-  }
-  dealloc_(bucket_table_);
-  bucket_table_ = NULL;
-}
-
-HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth,
-                                                      const void* const key[]) {
-  // Make hash-value
-  uintptr_t h = 0;
-  for (int i = 0; i < depth; i++) {
-    h += reinterpret_cast<uintptr_t>(key[i]);
-    h += h << 10;
-    h ^= h >> 6;
-  }
-  h += h << 3;
-  h ^= h >> 11;
-
-  // Lookup stack trace in table
-  unsigned int buck = ((unsigned int) h) % kHashTableSize;
-  for (Bucket* b = bucket_table_[buck]; b != 0; b = b->next) {
-    if ((b->hash == h) &&
-        (b->depth == depth) &&
-        equal(key, key + depth, b->stack)) {
-      return b;
-    }
-  }
-
-  // Create new bucket
-  const size_t key_size = sizeof(key[0]) * depth;
-  const void** kcopy = reinterpret_cast<const void**>(alloc_(key_size));
-  copy(key, key + depth, kcopy);
-  Bucket* b = reinterpret_cast<Bucket*>(alloc_(sizeof(Bucket)));
-  memset(b, 0, sizeof(*b));
-  b->hash  = h;
-  b->depth = depth;
-  b->stack = kcopy;
-  b->next  = bucket_table_[buck];
-  bucket_table_[buck] = b;
-  num_buckets_++;
-  return b;
-}
-
-int HeapProfileTable::GetCallerStackTrace(
-    int skip_count, void* stack[kMaxStackDepth]) {
-  return MallocHook::GetCallerStackTrace(
-      stack, kMaxStackDepth, kStripFrames + skip_count + 1);
-}
-
-void HeapProfileTable::RecordAlloc(
-    const void* ptr, size_t bytes, int stack_depth,
-    const void* const call_stack[]) {
-  Bucket* b = GetBucket(stack_depth, call_stack);
-  b->allocs++;
-  b->alloc_size += bytes;
-  total_.allocs++;
-  total_.alloc_size += bytes;
-
-  AllocValue v;
-  v.set_bucket(b);  // also did set_live(false); set_ignore(false)
-  v.bytes = bytes;
-  address_map_->Insert(ptr, v);
-}
-
-void HeapProfileTable::RecordFree(const void* ptr) {
-  AllocValue v;
-  if (address_map_->FindAndRemove(ptr, &v)) {
-    Bucket* b = v.bucket();
-    b->frees++;
-    b->free_size += v.bytes;
-    total_.frees++;
-    total_.free_size += v.bytes;
-  }
-}
-
-bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const {
-  const AllocValue* alloc_value = address_map_->Find(ptr);
-  if (alloc_value != NULL) *object_size = alloc_value->bytes;
-  return alloc_value != NULL;
-}
-
-bool HeapProfileTable::FindAllocDetails(const void* ptr,
-                                        AllocInfo* info) const {
-  const AllocValue* alloc_value = address_map_->Find(ptr);
-  if (alloc_value != NULL) {
-    info->object_size = alloc_value->bytes;
-    info->call_stack = alloc_value->bucket()->stack;
-    info->stack_depth = alloc_value->bucket()->depth;
-  }
-  return alloc_value != NULL;
-}
-
-bool HeapProfileTable::FindInsideAlloc(const void* ptr,
-                                       size_t max_size,
-                                       const void** object_ptr,
-                                       size_t* object_size) const {
-  const AllocValue* alloc_value =
-    address_map_->FindInside(&AllocValueSize, max_size, ptr, object_ptr);
-  if (alloc_value != NULL) *object_size = alloc_value->bytes;
-  return alloc_value != NULL;
-}
-
-bool HeapProfileTable::MarkAsLive(const void* ptr) {
-  AllocValue* alloc = address_map_->FindMutable(ptr);
-  if (alloc && !alloc->live()) {
-    alloc->set_live(true);
-    return true;
-  }
-  return false;
-}
-
-void HeapProfileTable::MarkAsIgnored(const void* ptr) {
-  AllocValue* alloc = address_map_->FindMutable(ptr);
-  if (alloc) {
-    alloc->set_ignore(true);
-  }
-}
-
-// We'd be happier using snprintfer, but we don't to reduce dependencies.
-int HeapProfileTable::UnparseBucket(const Bucket& b,
-                                    char* buf, int buflen, int bufsize,
-                                    const char* extra,
-                                    Stats* profile_stats) {
-  if (profile_stats != NULL) {
-    profile_stats->allocs += b.allocs;
-    profile_stats->alloc_size += b.alloc_size;
-    profile_stats->frees += b.frees;
-    profile_stats->free_size += b.free_size;
-  }
-  int printed =
-    snprintf(buf + buflen, bufsize - buflen, "%6d: %8" PRId64 " [%6d: %8" PRId64 "] @%s",
-             b.allocs - b.frees,
-             b.alloc_size - b.free_size,
-             b.allocs,
-             b.alloc_size,
-             extra);
-  // If it looks like the snprintf failed, ignore the fact we printed anything
-  if (printed < 0 || printed >= bufsize - buflen) return buflen;
-  buflen += printed;
-  for (int d = 0; d < b.depth; d++) {
-    printed = snprintf(buf + buflen, bufsize - buflen, " 0x%08" PRIxPTR,
-                       reinterpret_cast<uintptr_t>(b.stack[d]));
-    if (printed < 0 || printed >= bufsize - buflen) return buflen;
-    buflen += printed;
-  }
-  printed = snprintf(buf + buflen, bufsize - buflen, "\n");
-  if (printed < 0 || printed >= bufsize - buflen) return buflen;
-  buflen += printed;
-  return buflen;
-}
-
-HeapProfileTable::Bucket**
-HeapProfileTable::MakeSortedBucketList() const {
-  Bucket** list = static_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_));
-
-  int bucket_count = 0;
-  for (int i = 0; i < kHashTableSize; i++) {
-    for (Bucket* curr = bucket_table_[i]; curr != 0; curr = curr->next) {
-      list[bucket_count++] = curr;
-    }
-  }
-  RAW_DCHECK(bucket_count == num_buckets_, "");
-
-  sort(list, list + num_buckets_, ByAllocatedSpace);
-
-  return list;
-}
-
-void HeapProfileTable::IterateOrderedAllocContexts(
-    AllocContextIterator callback) const {
-  Bucket** list = MakeSortedBucketList();
-  AllocContextInfo info;
-  for (int i = 0; i < num_buckets_; ++i) {
-    *static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]);
-    info.stack_depth = list[i]->depth;
-    info.call_stack = list[i]->stack;
-    callback(info);
-  }
-  dealloc_(list);
-}
-
-int HeapProfileTable::FillOrderedProfile(char buf[], int size) const {
-  Bucket** list = MakeSortedBucketList();
-
-  // Our file format is "bucket, bucket, ..., bucket, proc_self_maps_info".
-  // In the cases buf is too small, we'd rather leave out the last
-  // buckets than leave out the /proc/self/maps info.  To ensure that,
-  // we actually print the /proc/self/maps info first, then move it to
-  // the end of the buffer, then write the bucket info into whatever
-  // is remaining, and then move the maps info one last time to close
-  // any gaps.  Whew!
-  int map_length = snprintf(buf, size, "%s", kProcSelfMapsHeader);
-  if (map_length < 0 || map_length >= size) {
-      dealloc_(list);
-      return 0;
-  }
-  bool dummy;   // "wrote_all" -- did /proc/self/maps fit in its entirety?
-  map_length += FillProcSelfMaps(buf + map_length, size - map_length, &dummy);
-  RAW_DCHECK(map_length <= size, "");
-  char* const map_start = buf + size - map_length;      // move to end
-  memmove(map_start, buf, map_length);
-  size -= map_length;
-
-  Stats stats;
-  memset(&stats, 0, sizeof(stats));
-  int bucket_length = snprintf(buf, size, "%s", kProfileHeader);
-  if (bucket_length < 0 || bucket_length >= size) {
-      dealloc_(list);
-      return 0;
-  }
-  bucket_length = UnparseBucket(total_, buf, bucket_length, size,
-                                " heapprofile", &stats);
-
-  // Dump the mmap list first.
-  if (profile_mmap_) {
-    BufferArgs buffer(buf, bucket_length, size);
-    MemoryRegionMap::IterateBuckets<BufferArgs*>(DumpBucketIterator, &buffer);
-    bucket_length = buffer.buflen;
-  }
-
-  for (int i = 0; i < num_buckets_; i++) {
-    bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, "",
-                                  &stats);
-  }
-  RAW_DCHECK(bucket_length < size, "");
-
-  dealloc_(list);
-
-  RAW_DCHECK(buf + bucket_length <= map_start, "");
-  memmove(buf + bucket_length, map_start, map_length);  // close the gap
-
-  return bucket_length + map_length;
-}
-
-// static
-void HeapProfileTable::DumpBucketIterator(const Bucket* bucket,
-                                          BufferArgs* args) {
-  args->buflen = UnparseBucket(*bucket, args->buf, args->buflen, args->bufsize,
-                               "", NULL);
-}
-
-inline
-void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v,
-                                           const DumpArgs& args) {
-  if (v->live()) {
-    v->set_live(false);
-    return;
-  }
-  if (v->ignore()) {
-    return;
-  }
-  Bucket b;
-  memset(&b, 0, sizeof(b));
-  b.allocs = 1;
-  b.alloc_size = v->bytes;
-  b.depth = v->bucket()->depth;
-  b.stack = v->bucket()->stack;
-  char buf[1024];
-  int len = UnparseBucket(b, buf, 0, sizeof(buf), "", args.profile_stats);
-  RawWrite(args.fd, buf, len);
-}
-
-// Callback from NonLiveSnapshot; adds entry to arg->dest
-// if not the entry is not live and is not present in arg->base.
-void HeapProfileTable::AddIfNonLive(const void* ptr, AllocValue* v,
-                                    AddNonLiveArgs* arg) {
-  if (v->live()) {
-    v->set_live(false);
-  } else {
-    if (arg->base != NULL && arg->base->map_.Find(ptr) != NULL) {
-      // Present in arg->base, so do not save
-    } else {
-      arg->dest->Add(ptr, *v);
-    }
-  }
-}
-
-bool HeapProfileTable::WriteProfile(const char* file_name,
-                                    const Bucket& total,
-                                    AllocationMap* allocations) {
-  RAW_VLOG(1, "Dumping non-live heap profile to %s", file_name);
-  RawFD fd = RawOpenForWriting(file_name);
-  if (fd != kIllegalRawFD) {
-    RawWrite(fd, kProfileHeader, strlen(kProfileHeader));
-    char buf[512];
-    int len = UnparseBucket(total, buf, 0, sizeof(buf), " heapprofile",
-                            NULL);
-    RawWrite(fd, buf, len);
-    const DumpArgs args(fd, NULL);
-    allocations->Iterate<const DumpArgs&>(DumpNonLiveIterator, args);
-    RawWrite(fd, kProcSelfMapsHeader, strlen(kProcSelfMapsHeader));
-    DumpProcSelfMaps(fd);
-    RawClose(fd);
-    return true;
-  } else {
-    RAW_LOG(ERROR, "Failed dumping filtered heap profile to %s", file_name);
-    return false;
-  }
-}
-
-void HeapProfileTable::CleanupOldProfiles(const char* prefix) {
-  if (!FLAGS_cleanup_old_heap_profiles)
-    return;
-  string pattern = string(prefix) + ".*" + kFileExt;
-#if defined(HAVE_GLOB_H)
-  glob_t g;
-  const int r = glob(pattern.c_str(), GLOB_ERR, NULL, &g);
-  if (r == 0 || r == GLOB_NOMATCH) {
-    const int prefix_length = strlen(prefix);
-    for (int i = 0; i < g.gl_pathc; i++) {
-      const char* fname = g.gl_pathv[i];
-      if ((strlen(fname) >= prefix_length) &&
-          (memcmp(fname, prefix, prefix_length) == 0)) {
-        RAW_VLOG(1, "Removing old heap profile %s", fname);
-        unlink(fname);
-      }
-    }
-  }
-  globfree(&g);
-#else   /* HAVE_GLOB_H */
-  RAW_LOG(WARNING, "Unable to remove old heap profiles (can't run glob())");
-#endif
-}
-
-HeapProfileTable::Snapshot* HeapProfileTable::TakeSnapshot() {
-  Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_);
-  address_map_->Iterate(AddToSnapshot, s);
-  return s;
-}
-
-void HeapProfileTable::ReleaseSnapshot(Snapshot* s) {
-  s->~Snapshot();
-  dealloc_(s);
-}
-
-// Callback from TakeSnapshot; adds a single entry to snapshot
-void HeapProfileTable::AddToSnapshot(const void* ptr, AllocValue* v,
-                                     Snapshot* snapshot) {
-  snapshot->Add(ptr, *v);
-}
-
-HeapProfileTable::Snapshot* HeapProfileTable::NonLiveSnapshot(
-    Snapshot* base) {
-  RAW_VLOG(2, "NonLiveSnapshot input: %d %d\n",
-           int(total_.allocs - total_.frees),
-           int(total_.alloc_size - total_.free_size));
-
-  Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_);
-  AddNonLiveArgs args;
-  args.dest = s;
-  args.base = base;
-  address_map_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args);
-  RAW_VLOG(2, "NonLiveSnapshot output: %d %d\n",
-           int(s->total_.allocs - s->total_.frees),
-           int(s->total_.alloc_size - s->total_.free_size));
-  return s;
-}
-
-// Information kept per unique bucket seen
-struct HeapProfileTable::Snapshot::Entry {
-  int count;
-  int bytes;
-  Bucket* bucket;
-  Entry() : count(0), bytes(0) { }
-
-  // Order by decreasing bytes
-  bool operator<(const Entry& x) const {
-    return this->bytes > x.bytes;
-  }
-};
-
-// State used to generate leak report.  We keep a mapping from Bucket pointer
-// the collected stats for that bucket.
-struct HeapProfileTable::Snapshot::ReportState {
-  map<Bucket*, Entry> buckets_;
-};
-
-// Callback from ReportLeaks; updates ReportState.
-void HeapProfileTable::Snapshot::ReportCallback(const void* ptr,
-                                                AllocValue* v,
-                                                ReportState* state) {
-  Entry* e = &state->buckets_[v->bucket()]; // Creates empty Entry first time
-  e->bucket = v->bucket();
-  e->count++;
-  e->bytes += v->bytes;
-}
-
-void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name,
-                                             const char* filename,
-                                             bool should_symbolize) {
-  // This is only used by the heap leak checker, but is intimately
-  // tied to the allocation map that belongs in this module and is
-  // therefore placed here.
-  RAW_LOG(ERROR, "Leak check %s detected leaks of %" PRIuS " bytes "
-          "in %" PRIuS " objects",
-          checker_name,
-          size_t(total_.alloc_size),
-          size_t(total_.allocs));
-
-  // Group objects by Bucket
-  ReportState state;
-  map_.Iterate(&ReportCallback, &state);
-
-  // Sort buckets by decreasing leaked size
-  const int n = state.buckets_.size();
-  Entry* entries = new Entry[n];
-  int dst = 0;
-  for (map<Bucket*,Entry>::const_iterator iter = state.buckets_.begin();
-       iter != state.buckets_.end();
-       ++iter) {
-    entries[dst++] = iter->second;
-  }
-  sort(entries, entries + n);
-
-  // Report a bounded number of leaks to keep the leak report from
-  // growing too long.
-  const int to_report =
-      (FLAGS_heap_check_max_leaks > 0 &&
-       n > FLAGS_heap_check_max_leaks) ? FLAGS_heap_check_max_leaks : n;
-  RAW_LOG(ERROR, "The %d largest leaks:", to_report);
-
-  // Print
-  SymbolTable symbolization_table;
-  for (int i = 0; i < to_report; i++) {
-    const Entry& e = entries[i];
-    for (int j = 0; j < e.bucket->depth; j++) {
-      symbolization_table.Add(e.bucket->stack[j]);
-    }
-  }
-  static const int kBufSize = 2<<10;
-  char buffer[kBufSize];
-  if (should_symbolize)
-    symbolization_table.Symbolize();
-  for (int i = 0; i < to_report; i++) {
-    const Entry& e = entries[i];
-    base::RawPrinter printer(buffer, kBufSize);
-    printer.Printf("Leak of %d bytes in %d objects allocated from:\n",
-                   e.bytes, e.count);
-    for (int j = 0; j < e.bucket->depth; j++) {
-      const void* pc = e.bucket->stack[j];
-      printer.Printf("\t@ %" PRIxPTR " %s\n",
-          reinterpret_cast<uintptr_t>(pc), symbolization_table.GetSymbol(pc));
-    }
-    RAW_LOG(ERROR, "%s", buffer);
-  }
-
-  if (to_report < n) {
-    RAW_LOG(ERROR, "Skipping leaks numbered %d..%d",
-            to_report, n-1);
-  }
-  delete[] entries;
-
-  // TODO: Dump the sorted Entry list instead of dumping raw data?
-  // (should be much shorter)
-  if (!HeapProfileTable::WriteProfile(filename, total_, &map_)) {
-    RAW_LOG(ERROR, "Could not write pprof profile to %s", filename);
-  }
-}
-
-void HeapProfileTable::Snapshot::ReportObject(const void* ptr,
-                                              AllocValue* v,
-                                              char* unused) {
-  // Perhaps also log the allocation stack trace (unsymbolized)
-  // on this line in case somebody finds it useful.
-  RAW_LOG(ERROR, "leaked %" PRIuS " byte object %p", v->bytes, ptr);
-}
-
-void HeapProfileTable::Snapshot::ReportIndividualObjects() {
-  char unused;
-  map_.Iterate(ReportObject, &unused);
-}
diff --git a/contrib/libtcmalloc/src/heap-profile-table.h b/contrib/libtcmalloc/src/heap-profile-table.h
deleted file mode 100644
index 3c6284741af..00000000000
--- a/contrib/libtcmalloc/src/heap-profile-table.h
+++ /dev/null
@@ -1,399 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2006, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//         Maxim Lifantsev (refactoring)
-//
-
-#ifndef BASE_HEAP_PROFILE_TABLE_H_
-#define BASE_HEAP_PROFILE_TABLE_H_
-
-#include "addressmap-inl.h"
-#include "base/basictypes.h"
-#include "base/logging.h"   // for RawFD
-#include "heap-profile-stats.h"
-
-// Table to maintain a heap profile data inside,
-// i.e. the set of currently active heap memory allocations.
-// thread-unsafe and non-reentrant code:
-// each instance object must be used by one thread
-// at a time w/o self-recursion.
-//
-// TODO(maxim): add a unittest for this class.
-class HeapProfileTable {
- public:
-
-  // Extension to be used for heap pforile files.
-  static const char kFileExt[];
-
-  // Longest stack trace we record.
-  static const int kMaxStackDepth = 32;
-
-  // data types ----------------------------
-
-  // Profile stats.
-  typedef HeapProfileStats Stats;
-
-  // Info we can return about an allocation.
-  struct AllocInfo {
-    size_t object_size;  // size of the allocation
-    const void* const* call_stack;  // call stack that made the allocation call
-    int stack_depth;  // depth of call_stack
-    bool live;
-    bool ignored;
-  };
-
-  // Info we return about an allocation context.
-  // An allocation context is a unique caller stack trace
-  // of an allocation operation.
-  struct AllocContextInfo : public Stats {
-    int stack_depth;                // Depth of stack trace
-    const void* const* call_stack;  // Stack trace
-  };
-
-  // Memory (de)allocator interface we'll use.
-  typedef void* (*Allocator)(size_t size);
-  typedef void  (*DeAllocator)(void* ptr);
-
-  // interface ---------------------------
-
-  HeapProfileTable(Allocator alloc, DeAllocator dealloc, bool profile_mmap);
-  ~HeapProfileTable();
-
-  // Collect the stack trace for the function that asked to do the
-  // allocation for passing to RecordAlloc() below.
-  //
-  // The stack trace is stored in 'stack'. The stack depth is returned.
-  //
-  // 'skip_count' gives the number of stack frames between this call
-  // and the memory allocation function.
-  static int GetCallerStackTrace(int skip_count, void* stack[kMaxStackDepth]);
-
-  // Record an allocation at 'ptr' of 'bytes' bytes.  'stack_depth'
-  // and 'call_stack' identifying the function that requested the
-  // allocation. They can be generated using GetCallerStackTrace() above.
-  void RecordAlloc(const void* ptr, size_t bytes,
-                   int stack_depth, const void* const call_stack[]);
-
-  // Record the deallocation of memory at 'ptr'.
-  void RecordFree(const void* ptr);
-
-  // Return true iff we have recorded an allocation at 'ptr'.
-  // If yes, fill *object_size with the allocation byte size.
-  bool FindAlloc(const void* ptr, size_t* object_size) const;
-  // Same as FindAlloc, but fills all of *info.
-  bool FindAllocDetails(const void* ptr, AllocInfo* info) const;
-
-  // Return true iff "ptr" points into a recorded allocation
-  // If yes, fill *object_ptr with the actual allocation address
-  // and *object_size with the allocation byte size.
-  // max_size specifies largest currently possible allocation size.
-  bool FindInsideAlloc(const void* ptr, size_t max_size,
-                       const void** object_ptr, size_t* object_size) const;
-
-  // If "ptr" points to a recorded allocation and it's not marked as live
-  // mark it as live and return true. Else return false.
-  // All allocations start as non-live.
-  bool MarkAsLive(const void* ptr);
-
-  // If "ptr" points to a recorded allocation, mark it as "ignored".
-  // Ignored objects are treated like other objects, except that they
-  // are skipped in heap checking reports.
-  void MarkAsIgnored(const void* ptr);
-
-  // Return current total (de)allocation statistics.  It doesn't contain
-  // mmap'ed regions.
-  const Stats& total() const { return total_; }
-
-  // Allocation data iteration callback: gets passed object pointer and
-  // fully-filled AllocInfo.
-  typedef void (*AllocIterator)(const void* ptr, const AllocInfo& info);
-
-  // Iterate over the allocation profile data calling "callback"
-  // for every allocation.
-  void IterateAllocs(AllocIterator callback) const {
-    address_map_->Iterate(MapArgsAllocIterator, callback);
-  }
-
-  // Allocation context profile data iteration callback
-  typedef void (*AllocContextIterator)(const AllocContextInfo& info);
-
-  // Iterate over the allocation context profile data calling "callback"
-  // for every allocation context. Allocation contexts are ordered by the
-  // size of allocated space.
-  void IterateOrderedAllocContexts(AllocContextIterator callback) const;
-
-  // Fill profile data into buffer 'buf' of size 'size'
-  // and return the actual size occupied by the dump in 'buf'.
-  // The profile buckets are dumped in the decreasing order
-  // of currently allocated bytes.
-  // We do not provision for 0-terminating 'buf'.
-  int FillOrderedProfile(char buf[], int size) const;
-
-  // Cleanup any old profile files matching prefix + ".*" + kFileExt.
-  static void CleanupOldProfiles(const char* prefix);
-
-  // Return a snapshot of the current contents of *this.
-  // Caller must call ReleaseSnapshot() on result when no longer needed.
-  // The result is only valid while this exists and until
-  // the snapshot is discarded by calling ReleaseSnapshot().
-  class Snapshot;
-  Snapshot* TakeSnapshot();
-
-  // Release a previously taken snapshot.  snapshot must not
-  // be used after this call.
-  void ReleaseSnapshot(Snapshot* snapshot);
-
-  // Return a snapshot of every non-live, non-ignored object in *this.
-  // If "base" is non-NULL, skip any objects present in "base".
-  // As a side-effect, clears the "live" bit on every live object in *this.
-  // Caller must call ReleaseSnapshot() on result when no longer needed.
-  Snapshot* NonLiveSnapshot(Snapshot* base);
-
- private:
-
-  // data types ----------------------------
-
-  // Hash table bucket to hold (de)allocation stats
-  // for a given allocation call stack trace.
-  typedef HeapProfileBucket Bucket;
-
-  // Info stored in the address map
-  struct AllocValue {
-    // Access to the stack-trace bucket
-    Bucket* bucket() const {
-      return reinterpret_cast<Bucket*>(bucket_rep & ~uintptr_t(kMask));
-    }
-    // This also does set_live(false).
-    void set_bucket(Bucket* b) { bucket_rep = reinterpret_cast<uintptr_t>(b); }
-    size_t  bytes;   // Number of bytes in this allocation
-
-    // Access to the allocation liveness flag (for leak checking)
-    bool live() const { return bucket_rep & kLive; }
-    void set_live(bool l) {
-      bucket_rep = (bucket_rep & ~uintptr_t(kLive)) | (l ? kLive : 0);
-    }
-
-    // Should this allocation be ignored if it looks like a leak?
-    bool ignore() const { return bucket_rep & kIgnore; }
-    void set_ignore(bool r) {
-      bucket_rep = (bucket_rep & ~uintptr_t(kIgnore)) | (r ? kIgnore : 0);
-    }
-
-   private:
-    // We store a few bits in the bottom bits of bucket_rep.
-    // (Alignment is at least four, so we have at least two bits.)
-    static const int kLive = 1;
-    static const int kIgnore = 2;
-    static const int kMask = kLive | kIgnore;
-
-    uintptr_t bucket_rep;
-  };
-
-  // helper for FindInsideAlloc
-  static size_t AllocValueSize(const AllocValue& v) { return v.bytes; }
-
-  typedef AddressMap<AllocValue> AllocationMap;
-
-  // Arguments that need to be passed DumpBucketIterator callback below.
-  struct BufferArgs {
-    BufferArgs(char* buf_arg, int buflen_arg, int bufsize_arg)
-        : buf(buf_arg),
-          buflen(buflen_arg),
-          bufsize(bufsize_arg) {
-    }
-
-    char* buf;
-    int buflen;
-    int bufsize;
-
-    DISALLOW_COPY_AND_ASSIGN(BufferArgs);
-  };
-
-  // Arguments that need to be passed DumpNonLiveIterator callback below.
-  struct DumpArgs {
-    DumpArgs(RawFD fd_arg, Stats* profile_stats_arg)
-        : fd(fd_arg),
-          profile_stats(profile_stats_arg) {
-    }
-
-    RawFD fd;  // file to write to
-    Stats* profile_stats;  // stats to update (may be NULL)
-  };
-
-  // helpers ----------------------------
-
-  // Unparse bucket b and print its portion of profile dump into buf.
-  // We return the amount of space in buf that we use.  We start printing
-  // at buf + buflen, and promise not to go beyond buf + bufsize.
-  // We do not provision for 0-terminating 'buf'.
-  //
-  // If profile_stats is non-NULL, we update *profile_stats by
-  // counting bucket b.
-  //
-  // "extra" is appended to the unparsed bucket.  Typically it is empty,
-  // but may be set to something like " heapprofile" for the total
-  // bucket to indicate the type of the profile.
-  static int UnparseBucket(const Bucket& b,
-                           char* buf, int buflen, int bufsize,
-                           const char* extra,
-                           Stats* profile_stats);
-
-  // Get the bucket for the caller stack trace 'key' of depth 'depth'
-  // creating the bucket if needed.
-  Bucket* GetBucket(int depth, const void* const key[]);
-
-  // Helper for IterateAllocs to do callback signature conversion
-  // from AllocationMap::Iterate to AllocIterator.
-  static void MapArgsAllocIterator(const void* ptr, AllocValue* v,
-                                   AllocIterator callback) {
-    AllocInfo info;
-    info.object_size = v->bytes;
-    info.call_stack = v->bucket()->stack;
-    info.stack_depth = v->bucket()->depth;
-    info.live = v->live();
-    info.ignored = v->ignore();
-    callback(ptr, info);
-  }
-
-  // Helper to dump a bucket.
-  inline static void DumpBucketIterator(const Bucket* bucket,
-                                        BufferArgs* args);
-
-  // Helper for DumpNonLiveProfile to do object-granularity
-  // heap profile dumping. It gets passed to AllocationMap::Iterate.
-  inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v,
-                                         const DumpArgs& args);
-
-  // Helper for IterateOrderedAllocContexts and FillOrderedProfile.
-  // Creates a sorted list of Buckets whose length is num_buckets_.
-  // The caller is responsible for deallocating the returned list.
-  Bucket** MakeSortedBucketList() const;
-
-  // Helper for TakeSnapshot.  Saves object to snapshot.
-  static void AddToSnapshot(const void* ptr, AllocValue* v, Snapshot* s);
-
-  // Arguments passed to AddIfNonLive
-  struct AddNonLiveArgs {
-    Snapshot* dest;
-    Snapshot* base;
-  };
-
-  // Helper for NonLiveSnapshot.  Adds the object to the destination
-  // snapshot if it is non-live.
-  static void AddIfNonLive(const void* ptr, AllocValue* v,
-                           AddNonLiveArgs* arg);
-
-  // Write contents of "*allocations" as a heap profile to
-  // "file_name".  "total" must contain the total of all entries in
-  // "*allocations".
-  static bool WriteProfile(const char* file_name,
-                           const Bucket& total,
-                           AllocationMap* allocations);
-
-  // data ----------------------------
-
-  // Memory (de)allocator that we use.
-  Allocator alloc_;
-  DeAllocator dealloc_;
-
-  // Overall profile stats; we use only the Stats part,
-  // but make it a Bucket to pass to UnparseBucket.
-  Bucket total_;
-
-  bool profile_mmap_;
-
-  // Bucket hash table for malloc.
-  // We hand-craft one instead of using one of the pre-written
-  // ones because we do not want to use malloc when operating on the table.
-  // It is only few lines of code, so no big deal.
-  Bucket** bucket_table_;
-  int num_buckets_;
-
-  // Map of all currently allocated objects and mapped regions we know about.
-  AllocationMap* address_map_;
-
-  DISALLOW_COPY_AND_ASSIGN(HeapProfileTable);
-};
-
-class HeapProfileTable::Snapshot {
- public:
-  const Stats& total() const { return total_; }
-
-  // Report anything in this snapshot as a leak.
-  // May use new/delete for temporary storage.
-  // If should_symbolize is true, will fork (which is not threadsafe)
-  // to turn addresses into symbol names.  Set to false for maximum safety.
-  // Also writes a heap profile to "filename" that contains
-  // all of the objects in this snapshot.
-  void ReportLeaks(const char* checker_name, const char* filename,
-                   bool should_symbolize);
-
-  // Report the addresses of all leaked objects.
-  // May use new/delete for temporary storage.
-  void ReportIndividualObjects();
-
-  bool Empty() const {
-    return (total_.allocs == 0) && (total_.alloc_size == 0);
-  }
-
- private:
-  friend class HeapProfileTable;
-
-  // Total count/size are stored in a Bucket so we can reuse UnparseBucket
-  Bucket total_;
-
-  // We share the Buckets managed by the parent table, but have our
-  // own object->bucket map.
-  AllocationMap map_;
-
-  Snapshot(Allocator alloc, DeAllocator dealloc) : map_(alloc, dealloc) {
-    memset(&total_, 0, sizeof(total_));
-  }
-
-  // Callback used to populate a Snapshot object with entries found
-  // in another allocation map.
-  inline void Add(const void* ptr, const AllocValue& v) {
-    map_.Insert(ptr, v);
-    total_.allocs++;
-    total_.alloc_size += v.bytes;
-  }
-
-  // Helpers for sorting and generating leak reports
-  struct Entry;
-  struct ReportState;
-  static void ReportCallback(const void* ptr, AllocValue* v, ReportState*);
-  static void ReportObject(const void* ptr, AllocValue* v, char*);
-
-  DISALLOW_COPY_AND_ASSIGN(Snapshot);
-};
-
-#endif  // BASE_HEAP_PROFILE_TABLE_H_
diff --git a/contrib/libtcmalloc/src/internal_logging.cc b/contrib/libtcmalloc/src/internal_logging.cc
deleted file mode 100644
index 3b2d0cb80f9..00000000000
--- a/contrib/libtcmalloc/src/internal_logging.cc
+++ /dev/null
@@ -1,192 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#include "internal_logging.h"
-#include <stdarg.h>                     // for va_end, va_start
-#include <stdio.h>                      // for vsnprintf, va_list, etc
-#include <stdlib.h>                     // for abort
-#include <string.h>                     // for strlen, memcpy
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>    // for write()
-#endif
-
-#include <gperftools/malloc_extension.h>
-#include "base/logging.h"   // for perftools_vsnprintf
-#include "base/spinlock.h"              // for SpinLockHolder, SpinLock
-
-// Variables for storing crash output.  Allocated statically since we
-// may not be able to heap-allocate while crashing.
-static SpinLock crash_lock(base::LINKER_INITIALIZED);
-static bool crashed = false;
-static const int kStatsBufferSize = 16 << 10;
-static char stats_buffer[kStatsBufferSize] = { 0 };
-
-namespace tcmalloc {
-
-static void WriteMessage(const char* msg, int length) {
-  write(STDERR_FILENO, msg, length);
-}
-
-void (*log_message_writer)(const char* msg, int length) = WriteMessage;
-
-
-class Logger {
- public:
-  bool Add(const LogItem& item);
-  bool AddStr(const char* str, int n);
-  bool AddNum(uint64_t num, int base);  // base must be 10 or 16.
-
-  static const int kBufSize = 200;
-  char* p_;
-  char* end_;
-  char buf_[kBufSize];
-};
-
-void Log(LogMode mode, const char* filename, int line,
-         LogItem a, LogItem b, LogItem c, LogItem d) {
-  Logger state;
-  state.p_ = state.buf_;
-  state.end_ = state.buf_ + sizeof(state.buf_);
-  state.AddStr(filename, strlen(filename))
-      && state.AddStr(":", 1)
-      && state.AddNum(line, 10)
-      && state.AddStr("]", 1)
-      && state.Add(a)
-      && state.Add(b)
-      && state.Add(c)
-      && state.Add(d);
-
-  // Teminate with newline
-  if (state.p_ >= state.end_) {
-    state.p_ = state.end_ - 1;
-  }
-  *state.p_ = '\n';
-  state.p_++;
-
-  int msglen = state.p_ - state.buf_;
-  if (mode == kLog) {
-    (*log_message_writer)(state.buf_, msglen);
-    return;
-  }
-
-  bool first_crash = false;
-  {
-    SpinLockHolder l(&crash_lock);
-    if (!crashed) {
-      crashed = true;
-      first_crash = true;
-    }
-  }
-
-  (*log_message_writer)(state.buf_, msglen);
-  if (first_crash && mode == kCrashWithStats) {
-    MallocExtension::instance()->GetStats(stats_buffer, kStatsBufferSize);
-    (*log_message_writer)(stats_buffer, strlen(stats_buffer));
-  }
-
-  abort();
-}
-
-bool Logger::Add(const LogItem& item) {
-  // Separate items with spaces
-  if (p_ < end_) {
-    *p_ = ' ';
-    p_++;
-  }
-
-  switch (item.tag_) {
-    case LogItem::kStr:
-      return AddStr(item.u_.str, strlen(item.u_.str));
-    case LogItem::kUnsigned:
-      return AddNum(item.u_.unum, 10);
-    case LogItem::kSigned:
-      if (item.u_.snum < 0) {
-        // The cast to uint64_t is intentionally before the negation
-        // so that we do not attempt to negate -2^63.
-        return AddStr("-", 1)
-            && AddNum(- static_cast<uint64_t>(item.u_.snum), 10);
-      } else {
-        return AddNum(static_cast<uint64_t>(item.u_.snum), 10);
-      }
-    case LogItem::kPtr:
-      return AddStr("0x", 2)
-          && AddNum(reinterpret_cast<uintptr_t>(item.u_.ptr), 16);
-    default:
-      return false;
-  }
-}
-
-bool Logger::AddStr(const char* str, int n) {
-  if (end_ - p_ < n) {
-    return false;
-  } else {
-    memcpy(p_, str, n);
-    p_ += n;
-    return true;
-  }
-}
-
-bool Logger::AddNum(uint64_t num, int base) {
-  static const char kDigits[] = "0123456789abcdef";
-  char space[22];  // more than enough for 2^64 in smallest supported base (10)
-  char* end = space + sizeof(space);
-  char* pos = end;
-  do {
-    pos--;
-    *pos = kDigits[num % base];
-    num /= base;
-  } while (num > 0 && pos > space);
-  return AddStr(pos, end - pos);
-}
-
-}  // end tcmalloc namespace
-
-void TCMalloc_Printer::printf(const char* format, ...) {
-  if (left_ > 0) {
-    va_list ap;
-    va_start(ap, format);
-    const int r = perftools_vsnprintf(buf_, left_, format, ap);
-    va_end(ap);
-    if (r < 0) {
-      // Perhaps an old glibc that returns -1 on truncation?
-      left_ = 0;
-    } else if (r > left_) {
-      // Truncation
-      left_ = 0;
-    } else {
-      left_ -= r;
-      buf_ += r;
-    }
-  }
-}
diff --git a/contrib/libtcmalloc/src/internal_logging.h b/contrib/libtcmalloc/src/internal_logging.h
deleted file mode 100644
index c6363894911..00000000000
--- a/contrib/libtcmalloc/src/internal_logging.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Internal logging and related utility routines.
-
-#ifndef TCMALLOC_INTERNAL_LOGGING_H_
-#define TCMALLOC_INTERNAL_LOGGING_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-
-//-------------------------------------------------------------------
-// Utility routines
-//-------------------------------------------------------------------
-
-// Safe logging helper: we write directly to the stderr file
-// descriptor and avoid FILE buffering because that may invoke
-// malloc().
-//
-// Example:
-//   Log(kLog, __FILE__, __LINE__, "error", bytes);
-
-namespace tcmalloc {
-enum LogMode {
-  kLog,                       // Just print the message
-  kCrash,                     // Print the message and crash
-  kCrashWithStats             // Print the message, some stats, and crash
-};
-
-class Logger;
-
-// A LogItem holds any of the argument types that can be passed to Log()
-class LogItem {
- public:
-  LogItem()                     : tag_(kEnd)      { }
-  LogItem(const char* v)        : tag_(kStr)      { u_.str = v; }
-  LogItem(int v)                : tag_(kSigned)   { u_.snum = v; }
-  LogItem(long v)               : tag_(kSigned)   { u_.snum = v; }
-  LogItem(long long v)          : tag_(kSigned)   { u_.snum = v; }
-  LogItem(unsigned int v)       : tag_(kUnsigned) { u_.unum = v; }
-  LogItem(unsigned long v)      : tag_(kUnsigned) { u_.unum = v; }
-  LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; }
-  LogItem(const void* v)        : tag_(kPtr)      { u_.ptr = v; }
- private:
-  friend class Logger;
-  enum Tag {
-    kStr,
-    kSigned,
-    kUnsigned,
-    kPtr,
-    kEnd
-  };
-  Tag tag_;
-  union {
-    const char* str;
-    const void* ptr;
-    int64_t snum;
-    uint64_t unum;
-  } u_;
-};
-
-extern PERFTOOLS_DLL_DECL void Log(LogMode mode, const char* filename, int line,
-                LogItem a, LogItem b = LogItem(),
-                LogItem c = LogItem(), LogItem d = LogItem());
-
-// Tests can override this function to collect logging messages.
-extern PERFTOOLS_DLL_DECL void (*log_message_writer)(const char* msg, int length);
-
-}  // end tcmalloc namespace
-
-// Like assert(), but executed even in NDEBUG mode
-#undef CHECK_CONDITION
-#define CHECK_CONDITION(cond)                                            \
-do {                                                                     \
-  if (!(cond)) {                                                         \
-    ::tcmalloc::Log(::tcmalloc::kCrash, __FILE__, __LINE__, #cond);      \
-  }                                                                      \
-} while (0)
-
-// Our own version of assert() so we can avoid hanging by trying to do
-// all kinds of goofy printing while holding the malloc lock.
-#ifndef NDEBUG
-#define ASSERT(cond) CHECK_CONDITION(cond)
-#else
-#define ASSERT(cond) ((void) 0)
-#endif
-
-// Print into buffer
-class TCMalloc_Printer {
- private:
-  char* buf_;           // Where should we write next
-  int   left_;          // Space left in buffer (including space for \0)
-
- public:
-  // REQUIRES: "length > 0"
-  TCMalloc_Printer(char* buf, int length) : buf_(buf), left_(length) {
-    buf[0] = '\0';
-  }
-
-  void printf(const char* format, ...)
-#ifdef HAVE___ATTRIBUTE__
-    __attribute__ ((__format__ (__printf__, 2, 3)))
-#endif
-;
-};
-
-#endif  // TCMALLOC_INTERNAL_LOGGING_H_
diff --git a/contrib/libtcmalloc/src/libc_override.h b/contrib/libtcmalloc/src/libc_override.h
deleted file mode 100644
index 0dbabb2d169..00000000000
--- a/contrib/libtcmalloc/src/libc_override.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// This .h file imports the code that causes tcmalloc to override libc
-// versions of malloc/free/new/delete/etc.  That is, it provides the
-// logic that makes it so calls to malloc(10) go through tcmalloc,
-// rather than the default (libc) malloc.
-//
-// This file also provides a method: ReplaceSystemAlloc(), that every
-// libc_override_*.h file it #includes is required to provide.  This
-// is called when first setting up tcmalloc -- that is, when a global
-// constructor in tcmalloc.cc is executed -- to do any initialization
-// work that may be required for this OS.  (Note we cannot entirely
-// control when tcmalloc is initialized, and the system may do some
-// mallocs and frees before this routine is called.)  It may be a
-// noop.
-//
-// Every libc has its own way of doing this, and sometimes the compiler
-// matters too, so we have a different file for each libc, and often
-// for different compilers and OS's.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_INL_H_
-#define TCMALLOC_LIBC_OVERRIDE_INL_H_
-
-#include "config.h"
-#ifdef HAVE_FEATURES_H
-#include <features.h>   // for __GLIBC__
-#endif
-#include <gperftools/tcmalloc.h>
-
-static void ReplaceSystemAlloc();  // defined in the .h files below
-
-// For windows, there are two ways to get tcmalloc.  If we're
-// patching, then src/windows/patch_function.cc will do the necessary
-// overriding here.  Otherwise, we doing the 'redefine' trick, where
-// we remove malloc/new/etc from mscvcrt.dll, and just need to define
-// them now.
-#if defined(_WIN32) && defined(WIN32_DO_PATCHING)
-void PatchWindowsFunctions();   // in src/windows/patch_function.cc
-static void ReplaceSystemAlloc() { PatchWindowsFunctions(); }
-
-#elif defined(_WIN32) && !defined(WIN32_DO_PATCHING)
-#include "libc_override_redefine.h"
-
-#elif defined(__APPLE__)
-#include "libc_override_osx.h"
-
-#elif defined(__GLIBC__)
-#include "libc_override_glibc.h"
-
-// Not all gcc systems necessarily support weak symbols, but all the
-// ones I know of do, so for now just assume they all do.
-#elif defined(__GNUC__)
-#include "libc_override_gcc_and_weak.h"
-
-#else
-#error Need to add support for your libc/OS here
-
-#endif
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_INL_H_
diff --git a/contrib/libtcmalloc/src/libc_override_gcc_and_weak.h b/contrib/libtcmalloc/src/libc_override_gcc_and_weak.h
deleted file mode 100644
index 323d615d0ac..00000000000
--- a/contrib/libtcmalloc/src/libc_override_gcc_and_weak.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Used to override malloc routines on systems that define the
-// memory allocation routines to be weak symbols in their libc
-// (almost all unix-based systems are like this), on gcc, which
-// suppports the 'alias' attribute.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
-#define TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
-
-#ifdef HAVE_SYS_CDEFS_H
-#include <sys/cdefs.h>    // for __THROW
-#endif
-#include <gperftools/tcmalloc.h>
-
-#include "getenv_safe.h" // TCMallocGetenvSafe
-#include "base/commandlineflags.h"
-
-#ifndef __THROW    // I guess we're not on a glibc-like system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-#ifndef __GNUC__
-# error libc_override_gcc_and_weak.h is for gcc distributions only.
-#endif
-
-#define ALIAS(tc_fn)   __attribute__ ((alias (#tc_fn), used))
-
-void* operator new(size_t size)
-    ALIAS(tc_new);
-void operator delete(void* p) noexcept
-    ALIAS(tc_delete);
-void* operator new[](size_t size)
-    ALIAS(tc_newarray);
-void operator delete[](void* p) noexcept
-    ALIAS(tc_deletearray);
-void* operator new(size_t size, const std::nothrow_t& nt) noexcept
-    ALIAS(tc_new_nothrow);
-void* operator new[](size_t size, const std::nothrow_t& nt) noexcept
-    ALIAS(tc_newarray_nothrow);
-void operator delete(void* p, const std::nothrow_t& nt) noexcept
-    ALIAS(tc_delete_nothrow);
-void operator delete[](void* p, const std::nothrow_t& nt) noexcept
-    ALIAS(tc_deletearray_nothrow);
-
-#if defined(ENABLE_SIZED_DELETE)
-
-void operator delete(void *p, size_t size) throw()
-    ALIAS(tc_delete_sized);
-void operator delete[](void *p, size_t size) throw()
-    ALIAS(tc_deletearray_sized);
-
-#elif defined(ENABLE_DYNAMIC_SIZED_DELETE) && \
-  (__GNUC__ * 100 + __GNUC_MINOR__) >= 405
-
-static void delegate_sized_delete(void *p, size_t s) throw() {
-  (operator delete)(p);
-}
-
-static void delegate_sized_deletearray(void *p, size_t s) throw() {
-  (operator delete[])(p);
-}
-
-extern "C" __attribute__((weak))
-int tcmalloc_sized_delete_enabled(void);
-
-static bool sized_delete_enabled(void) {
-  if (tcmalloc_sized_delete_enabled != 0) {
-    return !!tcmalloc_sized_delete_enabled();
-  }
-
-  const char *flag = TCMallocGetenvSafe("TCMALLOC_ENABLE_SIZED_DELETE");
-  return tcmalloc::commandlineflags::StringToBool(flag, false);
-}
-
-extern "C" {
-
-static void *resolve_delete_sized(void) {
-  if (sized_delete_enabled()) {
-    return reinterpret_cast<void *>(tc_delete_sized);
-  }
-  return reinterpret_cast<void *>(delegate_sized_delete);
-}
-
-static void *resolve_deletearray_sized(void) {
-  if (sized_delete_enabled()) {
-    return reinterpret_cast<void *>(tc_deletearray_sized);
-  }
-  return reinterpret_cast<void *>(delegate_sized_deletearray);
-}
-
-}
-
-void operator delete(void *p, size_t size) throw()
-  __attribute__((ifunc("resolve_delete_sized")));
-void operator delete[](void *p, size_t size) throw()
-  __attribute__((ifunc("resolve_deletearray_sized")));
-
-#else /* !ENABLE_SIZED_DELETE && !ENABLE_DYN_SIZED_DELETE */
-
-void operator delete(void *p, size_t size) throw()
-  ALIAS(tc_delete);
-void operator delete[](void *p, size_t size) throw()
-  ALIAS(tc_deletearray);
-
-#endif /* !ENABLE_SIZED_DELETE && !ENABLE_DYN_SIZED_DELETE */
-
-extern "C" {
-  void* malloc(size_t size) __THROW               ALIAS(tc_malloc);
-  void free(void* ptr) __THROW                    ALIAS(tc_free);
-  void* realloc(void* ptr, size_t size) __THROW   ALIAS(tc_realloc);
-  void* calloc(size_t n, size_t size) __THROW     ALIAS(tc_calloc);
-  void cfree(void* ptr) __THROW                   ALIAS(tc_cfree);
-  void* memalign(size_t align, size_t s) __THROW  ALIAS(tc_memalign);
-  void* valloc(size_t size) __THROW               ALIAS(tc_valloc);
-  void* pvalloc(size_t size) __THROW              ALIAS(tc_pvalloc);
-  int posix_memalign(void** r, size_t a, size_t s) __THROW
-      ALIAS(tc_posix_memalign);
-#ifndef __UCLIBC__
-  void malloc_stats(void) __THROW                 ALIAS(tc_malloc_stats);
-#endif
-  int mallopt(int cmd, int value) __THROW         ALIAS(tc_mallopt);
-#ifdef HAVE_STRUCT_MALLINFO
-  struct mallinfo mallinfo(void) __THROW          ALIAS(tc_mallinfo);
-#endif
-  size_t malloc_size(void* p) __THROW             ALIAS(tc_malloc_size);
-#if defined(__ANDROID__)
-  size_t malloc_usable_size(const void* p) __THROW
-         ALIAS(tc_malloc_size);
-#else
-  size_t malloc_usable_size(void* p) __THROW      ALIAS(tc_malloc_size);
-#endif
-}   // extern "C"
-
-#undef ALIAS
-
-// No need to do anything at tcmalloc-registration time: we do it all
-// via overriding weak symbols (at link time).
-static void ReplaceSystemAlloc() { }
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
diff --git a/contrib/libtcmalloc/src/libc_override_glibc.h b/contrib/libtcmalloc/src/libc_override_glibc.h
deleted file mode 100644
index cc17df315c0..00000000000
--- a/contrib/libtcmalloc/src/libc_override_glibc.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Used to override malloc routines on systems that are using glibc.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
-#define TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
-
-#include "config.h"
-#include <features.h>     // for __GLIBC__
-#include <gperftools/tcmalloc.h>
-
-#ifndef __GLIBC__
-# error libc_override_glibc.h is for glibc distributions only.
-#endif
-
-// In glibc, the memory-allocation methods are weak symbols, so we can
-// just override them with our own.  If we're using gcc, we can use
-// __attribute__((alias)) to do the overriding easily (exception:
-// Mach-O, which doesn't support aliases).  Otherwise we have to use a
-// function call.
-#if !defined(__GNUC__) || defined(__MACH__)
-
-// This also defines ReplaceSystemAlloc().
-# include "libc_override_redefine.h"  // defines functions malloc()/etc
-
-#else  // #if !defined(__GNUC__) || defined(__MACH__)
-
-// If we get here, we're a gcc system, so do all the overriding we do
-// with gcc.  This does the overriding of all the 'normal' memory
-// allocation.  This also defines ReplaceSystemAlloc().
-# include "libc_override_gcc_and_weak.h"
-
-// We also have to do some glibc-specific overriding.  Some library
-// routines on RedHat 9 allocate memory using malloc() and free it
-// using __libc_free() (or vice-versa).  Since we provide our own
-// implementations of malloc/free, we need to make sure that the
-// __libc_XXX variants (defined as part of glibc) also point to the
-// same implementations.  Since it only matters for redhat, we
-// do it inside the gcc #ifdef, since redhat uses gcc.
-// TODO(csilvers): only do this if we detect we're an old enough glibc?
-
-#define ALIAS(tc_fn)   __attribute__ ((alias (#tc_fn)))
-extern "C" {
-  void* __libc_malloc(size_t size)                ALIAS(tc_malloc);
-  void __libc_free(void* ptr)                     ALIAS(tc_free);
-  void* __libc_realloc(void* ptr, size_t size)    ALIAS(tc_realloc);
-  void* __libc_calloc(size_t n, size_t size)      ALIAS(tc_calloc);
-  void __libc_cfree(void* ptr)                    ALIAS(tc_cfree);
-  void* __libc_memalign(size_t align, size_t s)   ALIAS(tc_memalign);
-  void* __libc_valloc(size_t size)                ALIAS(tc_valloc);
-  void* __libc_pvalloc(size_t size)               ALIAS(tc_pvalloc);
-  int __posix_memalign(void** r, size_t a, size_t s)  ALIAS(tc_posix_memalign);
-}   // extern "C"
-#undef ALIAS
-
-#endif  // #if defined(__GNUC__) && !defined(__MACH__)
-
-// No need to write ReplaceSystemAlloc(); one of the #includes above
-// did it for us.
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
diff --git a/contrib/libtcmalloc/src/libc_override_osx.h b/contrib/libtcmalloc/src/libc_override_osx.h
deleted file mode 100644
index afd57d1560a..00000000000
--- a/contrib/libtcmalloc/src/libc_override_osx.h
+++ /dev/null
@@ -1,308 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Used to override malloc routines on OS X systems.  We use the
-// malloc-zone functionality built into OS X to register our malloc
-// routine.
-//
-// 1) We used to use the normal 'override weak libc malloc/etc'
-// technique for OS X.  This is not optimal because mach does not
-// support the 'alias' attribute, so we had to have forwarding
-// functions.  It also does not work very well with OS X shared
-// libraries (dylibs) -- in general, the shared libs don't use
-// tcmalloc unless run with the DYLD_FORCE_FLAT_NAMESPACE envvar.
-//
-// 2) Another approach would be to use an interposition array:
-//      static const interpose_t interposers[] __attribute__((section("__DATA, __interpose"))) = {
-//        { (void *)tc_malloc, (void *)malloc },
-//        { (void *)tc_free, (void *)free },
-//      };
-// This requires the user to set the DYLD_INSERT_LIBRARIES envvar, so
-// is not much better.
-//
-// 3) Registering a new malloc zone avoids all these issues:
-//  http://www.opensource.apple.com/source/Libc/Libc-583/include/malloc/malloc.h
-//  http://www.opensource.apple.com/source/Libc/Libc-583/gen/malloc.c
-// If we make tcmalloc the default malloc zone (undocumented but
-// possible) then all new allocs use it, even those in shared
-// libraries.  Allocs done before tcmalloc was installed, or in libs
-// that aren't using tcmalloc for some reason, will correctly go
-// through the malloc-zone interface when free-ing, and will pick up
-// the libc free rather than tcmalloc free.  So it should "never"
-// cause a crash (famous last words).
-//
-// 4) The routines one must define for one's own malloc have changed
-// between OS X versions.  This requires some hoops on our part, but
-// is only really annoying when it comes to posix_memalign.  The right
-// behavior there depends on what OS version tcmalloc was compiled on,
-// but also what OS version the program is running on.  For now, we
-// punt and don't implement our own posix_memalign.  Apps that really
-// care can use tc_posix_memalign directly.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_
-#define TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_
-
-#include "config.h"
-#ifdef HAVE_FEATURES_H
-#include <features.h>
-#endif
-#include <gperftools/tcmalloc.h>
-
-#if !defined(__APPLE__)
-# error libc_override_glibc-osx.h is for OS X distributions only.
-#endif
-
-#include <AvailabilityMacros.h>
-#include <malloc/malloc.h>
-
-namespace tcmalloc {
-  void CentralCacheLockAll();
-  void CentralCacheUnlockAll();
-}
-
-// from AvailabilityMacros.h
-#if defined(MAC_OS_X_VERSION_10_6) && \
-    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
-extern "C" {
-  // This function is only available on 10.6 (and later) but the
-  // LibSystem headers do not use AvailabilityMacros.h to handle weak
-  // importing automatically.  This prototype is a copy of the one in
-  // <malloc/malloc.h> with the WEAK_IMPORT_ATTRBIUTE added.
-  extern malloc_zone_t *malloc_default_purgeable_zone(void)
-      WEAK_IMPORT_ATTRIBUTE;
-}
-#endif
-
-// We need to provide wrappers around all the libc functions.
-namespace {
-size_t mz_size(malloc_zone_t* zone, const void* ptr) {
-  if (MallocExtension::instance()->GetOwnership(ptr) != MallocExtension::kOwned)
-    return 0;  // malloc_zone semantics: return 0 if we don't own the memory
-
-  // TODO(csilvers): change this method to take a const void*, one day.
-  return MallocExtension::instance()->GetAllocatedSize(const_cast<void*>(ptr));
-}
-
-void* mz_malloc(malloc_zone_t* zone, size_t size) {
-  return tc_malloc(size);
-}
-
-void* mz_calloc(malloc_zone_t* zone, size_t num_items, size_t size) {
-  return tc_calloc(num_items, size);
-}
-
-void* mz_valloc(malloc_zone_t* zone, size_t size) {
-  return tc_valloc(size);
-}
-
-void mz_free(malloc_zone_t* zone, void* ptr) {
-  return tc_free(ptr);
-}
-
-void* mz_realloc(malloc_zone_t* zone, void* ptr, size_t size) {
-  return tc_realloc(ptr, size);
-}
-
-void* mz_memalign(malloc_zone_t* zone, size_t align, size_t size) {
-  return tc_memalign(align, size);
-}
-
-void mz_destroy(malloc_zone_t* zone) {
-  // A no-op -- we will not be destroyed!
-}
-
-// malloc_introspection callbacks.  I'm not clear on what all of these do.
-kern_return_t mi_enumerator(task_t task, void *,
-                            unsigned type_mask, vm_address_t zone_address,
-                            memory_reader_t reader,
-                            vm_range_recorder_t recorder) {
-  // Should enumerate all the pointers we have.  Seems like a lot of work.
-  return KERN_FAILURE;
-}
-
-size_t mi_good_size(malloc_zone_t *zone, size_t size) {
-  // I think it's always safe to return size, but we maybe could do better.
-  return size;
-}
-
-boolean_t mi_check(malloc_zone_t *zone) {
-  return MallocExtension::instance()->VerifyAllMemory();
-}
-
-void mi_print(malloc_zone_t *zone, boolean_t verbose) {
-  int bufsize = 8192;
-  if (verbose)
-    bufsize = 102400;   // I picked this size arbitrarily
-  char* buffer = new char[bufsize];
-  MallocExtension::instance()->GetStats(buffer, bufsize);
-  fprintf(stdout, "%s", buffer);
-  delete[] buffer;
-}
-
-void mi_log(malloc_zone_t *zone, void *address) {
-  // I don't think we support anything like this
-}
-
-void mi_force_lock(malloc_zone_t *zone) {
-  tcmalloc::CentralCacheLockAll();
-}
-
-void mi_force_unlock(malloc_zone_t *zone) {
-  tcmalloc::CentralCacheUnlockAll();
-}
-
-void mi_statistics(malloc_zone_t *zone, malloc_statistics_t *stats) {
-  // TODO(csilvers): figure out how to fill these out
-  stats->blocks_in_use = 0;
-  stats->size_in_use = 0;
-  stats->max_size_in_use = 0;
-  stats->size_allocated = 0;
-}
-
-boolean_t mi_zone_locked(malloc_zone_t *zone) {
-  return false;  // Hopefully unneeded by us!
-}
-
-}  // unnamed namespace
-
-// OS X doesn't have pvalloc, cfree, malloc_statc, etc, so we can just
-// define our own. :-)  OS X supplies posix_memalign in some versions
-// but not others, either strongly or weakly linked, in a way that's
-// difficult enough to code to correctly, that I just don't try to
-// support either memalign() or posix_memalign().  If you need them
-// and are willing to code to tcmalloc, you can use tc_posix_memalign().
-extern "C" {
-  void  cfree(void* p)                   { tc_cfree(p);               }
-  void* pvalloc(size_t s)                { return tc_pvalloc(s);      }
-  void malloc_stats(void)                { tc_malloc_stats();         }
-  int mallopt(int cmd, int v)            { return tc_mallopt(cmd, v); }
-  // No struct mallinfo on OS X, so don't define mallinfo().
-  // An alias for malloc_size(), which OS X defines.
-  size_t malloc_usable_size(void* p)     { return tc_malloc_size(p); }
-}  // extern "C"
-
-static malloc_zone_t *get_default_zone() {
-   malloc_zone_t **zones = NULL;
-   unsigned int num_zones = 0;
-
-   /*
-    * On OSX 10.12, malloc_default_zone returns a special zone that is not
-    * present in the list of registered zones. That zone uses a "lite zone"
-    * if one is present (apparently enabled when malloc stack logging is
-    * enabled), or the first registered zone otherwise. In practice this
-    * means unless malloc stack logging is enabled, the first registered
-    * zone is the default.
-    * So get the list of zones to get the first one, instead of relying on
-    * malloc_default_zone.
-    */
-   if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones,
-                                            &num_zones)) {
-       /* Reset the value in case the failure happened after it was set. */
-       num_zones = 0;
-   }
-
-   if (num_zones)
-     return zones[0];
-
-   return malloc_default_zone();
-}
-
-
-static void ReplaceSystemAlloc() {
-  static malloc_introspection_t tcmalloc_introspection;
-  memset(&tcmalloc_introspection, 0, sizeof(tcmalloc_introspection));
-
-  tcmalloc_introspection.enumerator = &mi_enumerator;
-  tcmalloc_introspection.good_size = &mi_good_size;
-  tcmalloc_introspection.check = &mi_check;
-  tcmalloc_introspection.print = &mi_print;
-  tcmalloc_introspection.log = &mi_log;
-  tcmalloc_introspection.force_lock = &mi_force_lock;
-  tcmalloc_introspection.force_unlock = &mi_force_unlock;
-
-  static malloc_zone_t tcmalloc_zone;
-  memset(&tcmalloc_zone, 0, sizeof(malloc_zone_t));
-
-  // Start with a version 4 zone which is used for OS X 10.4 and 10.5.
-  tcmalloc_zone.version = 4;
-  tcmalloc_zone.zone_name = "tcmalloc";
-  tcmalloc_zone.size = &mz_size;
-  tcmalloc_zone.malloc = &mz_malloc;
-  tcmalloc_zone.calloc = &mz_calloc;
-  tcmalloc_zone.valloc = &mz_valloc;
-  tcmalloc_zone.free = &mz_free;
-  tcmalloc_zone.realloc = &mz_realloc;
-  tcmalloc_zone.destroy = &mz_destroy;
-  tcmalloc_zone.batch_malloc = NULL;
-  tcmalloc_zone.batch_free = NULL;
-  tcmalloc_zone.introspect = &tcmalloc_introspection;
-
-  // from AvailabilityMacros.h
-#if defined(MAC_OS_X_VERSION_10_6) && \
-    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
-  // Switch to version 6 on OSX 10.6 to support memalign.
-  tcmalloc_zone.version = 6;
-  tcmalloc_zone.free_definite_size = NULL;
-  tcmalloc_zone.memalign = &mz_memalign;
-  tcmalloc_introspection.zone_locked = &mi_zone_locked;
-
-  // Request the default purgable zone to force its creation. The
-  // current default zone is registered with the purgable zone for
-  // doing tiny and small allocs.  Sadly, it assumes that the default
-  // zone is the szone implementation from OS X and will crash if it
-  // isn't.  By creating the zone now, this will be true and changing
-  // the default zone won't cause a problem.  This only needs to
-  // happen when actually running on OS X 10.6 and higher (note the
-  // ifdef above only checks if we were *compiled* with 10.6 or
-  // higher; at runtime we have to check if this symbol is defined.)
-  if (malloc_default_purgeable_zone) {
-    malloc_default_purgeable_zone();
-  }
-#endif
-
-  // Register the tcmalloc zone. At this point, it will not be the
-  // default zone.
-  malloc_zone_register(&tcmalloc_zone);
-
-  // Unregister and reregister the default zone.  Unregistering swaps
-  // the specified zone with the last one registered which for the
-  // default zone makes the more recently registered zone the default
-  // zone.  The default zone is then re-registered to ensure that
-  // allocations made from it earlier will be handled correctly.
-  // Things are not guaranteed to work that way, but it's how they work now.
-  malloc_zone_t *default_zone = get_default_zone();
-  malloc_zone_unregister(default_zone);
-  malloc_zone_register(default_zone);
-}
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_
diff --git a/contrib/libtcmalloc/src/libc_override_redefine.h b/contrib/libtcmalloc/src/libc_override_redefine.h
deleted file mode 100644
index 72679ef38b8..00000000000
--- a/contrib/libtcmalloc/src/libc_override_redefine.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Used on systems that don't have their own definition of
-// malloc/new/etc.  (Typically this will be a windows msvcrt.dll that
-// has been edited to remove the definitions.)  We can just define our
-// own as normal functions.
-//
-// This should also work on systems were all the malloc routines are
-// defined as weak symbols, and there's no support for aliasing.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
-#define TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
-
-void* operator new(size_t size)                  { return tc_new(size);       }
-void operator delete(void* p) throw()            { tc_delete(p);              }
-void* operator new[](size_t size)                { return tc_newarray(size);  }
-void operator delete[](void* p) throw()          { tc_deletearray(p);         }
-void* operator new(size_t size, const std::nothrow_t& nt) throw() {
-  return tc_new_nothrow(size, nt);
-}
-void* operator new[](size_t size, const std::nothrow_t& nt) throw() {
-  return tc_newarray_nothrow(size, nt);
-}
-void operator delete(void* ptr, const std::nothrow_t& nt) throw() {
-  return tc_delete_nothrow(ptr, nt);
-}
-void operator delete[](void* ptr, const std::nothrow_t& nt) throw() {
-  return tc_deletearray_nothrow(ptr, nt);
-}
-
-#ifdef ENABLE_SIZED_DELETE
-void operator delete(void* p, size_t s) throw()  { tc_delete_sized(p, s);     }
-void operator delete[](void* p, size_t s) throw(){ tc_deletearray_sized(p);   }
-#endif
-
-extern "C" {
-  void* malloc(size_t s)                         { return tc_malloc(s);       }
-  void  free(void* p)                            { tc_free(p);                }
-  void* realloc(void* p, size_t s)               { return tc_realloc(p, s);   }
-  void* calloc(size_t n, size_t s)               { return tc_calloc(n, s);    }
-  void  cfree(void* p)                           { tc_cfree(p);               }
-  void* memalign(size_t a, size_t s)             { return tc_memalign(a, s);  }
-  void* valloc(size_t s)                         { return tc_valloc(s);       }
-  void* pvalloc(size_t s)                        { return tc_pvalloc(s);      }
-  int posix_memalign(void** r, size_t a, size_t s)         {
-    return tc_posix_memalign(r, a, s);
-  }
-  void malloc_stats(void)                        { tc_malloc_stats();         }
-  int mallopt(int cmd, int v)                    { return tc_mallopt(cmd, v); }
-#ifdef HAVE_STRUCT_MALLINFO
-  struct mallinfo mallinfo(void)                 { return tc_mallinfo();      }
-#endif
-  size_t malloc_size(void* p)                    { return tc_malloc_size(p); }
-  size_t malloc_usable_size(void* p)             { return tc_malloc_size(p); }
-}  // extern "C"
-
-// No need to do anything at tcmalloc-registration time: we do it all
-// via overriding weak symbols (at link time).
-static void ReplaceSystemAlloc() { }
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
diff --git a/contrib/libtcmalloc/src/linked_list.h b/contrib/libtcmalloc/src/linked_list.h
deleted file mode 100644
index 66a07410760..00000000000
--- a/contrib/libtcmalloc/src/linked_list.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Some very basic linked list functions for dealing with using void * as
-// storage.
-
-#ifndef TCMALLOC_LINKED_LIST_H_
-#define TCMALLOC_LINKED_LIST_H_
-
-#include <stddef.h>
-
-namespace tcmalloc {
-
-inline void *SLL_Next(void *t) {
-  return *(reinterpret_cast<void**>(t));
-}
-
-inline void SLL_SetNext(void *t, void *n) {
-  *(reinterpret_cast<void**>(t)) = n;
-}
-
-inline void SLL_Push(void **list, void *element) {
-  SLL_SetNext(element, *list);
-  *list = element;
-}
-
-inline void *SLL_Pop(void **list) {
-  void *result = *list;
-  *list = SLL_Next(*list);
-  return result;
-}
-
-// Remove N elements from a linked list to which head points.  head will be
-// modified to point to the new head.  start and end will point to the first
-// and last nodes of the range.  Note that end will point to NULL after this
-// function is called.
-inline void SLL_PopRange(void **head, int N, void **start, void **end) {
-  if (N == 0) {
-    *start = NULL;
-    *end = NULL;
-    return;
-  }
-
-  void *tmp = *head;
-  for (int i = 1; i < N; ++i) {
-    tmp = SLL_Next(tmp);
-  }
-
-  *start = *head;
-  *end = tmp;
-  *head = SLL_Next(tmp);
-  // Unlink range from list.
-  SLL_SetNext(tmp, NULL);
-}
-
-inline void SLL_PushRange(void **head, void *start, void *end) {
-  if (!start) return;
-  SLL_SetNext(end, *head);
-  *head = start;
-}
-
-inline size_t SLL_Size(void *head) {
-  int count = 0;
-  while (head) {
-    count++;
-    head = SLL_Next(head);
-  }
-  return count;
-}
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_LINKED_LIST_H_
diff --git a/contrib/libtcmalloc/src/malloc_extension.cc b/contrib/libtcmalloc/src/malloc_extension.cc
deleted file mode 100644
index 13a06f4dbe8..00000000000
--- a/contrib/libtcmalloc/src/malloc_extension.cc
+++ /dev/null
@@ -1,388 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#include <assert.h>
-#include <string.h>
-#include <stdio.h>
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-#include <string>
-#include "base/dynamic_annotations.h"
-#include "base/sysinfo.h"    // for FillProcSelfMaps
-#ifndef NO_HEAP_CHECK
-#include "gperftools/heap-checker.h"
-#endif
-#include "gperftools/malloc_extension.h"
-#include "gperftools/malloc_extension_c.h"
-#include "maybe_threads.h"
-#include "base/googleinit.h"
-
-using STL_NAMESPACE::string;
-using STL_NAMESPACE::vector;
-
-static void DumpAddressMap(string* result) {
-  *result += "\nMAPPED_LIBRARIES:\n";
-  // We keep doubling until we get a fit
-  const size_t old_resultlen = result->size();
-  for (int amap_size = 10240; amap_size < 10000000; amap_size *= 2) {
-    result->resize(old_resultlen + amap_size);
-    bool wrote_all = false;
-    const int bytes_written =
-        tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size,
-                                   &wrote_all);
-    if (wrote_all) {   // we fit!
-      (*result)[old_resultlen + bytes_written] = '\0';
-      result->resize(old_resultlen + bytes_written);
-      return;
-    }
-  }
-  result->reserve(old_resultlen);   // just don't print anything
-}
-
-// Note: this routine is meant to be called before threads are spawned.
-void MallocExtension::Initialize() {
-  static bool initialize_called = false;
-
-  if (initialize_called) return;
-  initialize_called = true;
-
-#ifdef __GLIBC__
-  // GNU libc++ versions 3.3 and 3.4 obey the environment variables
-  // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively.  Setting
-  // one of these variables forces the STL default allocator to call
-  // new() or delete() for each allocation or deletion.  Otherwise
-  // the STL allocator tries to avoid the high cost of doing
-  // allocations by pooling memory internally.  However, tcmalloc
-  // does allocations really fast, especially for the types of small
-  // items one sees in STL, so it's better off just using us.
-  // TODO: control whether we do this via an environment variable?
-  setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/);
-  setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/);
-
-  // Now we need to make the setenv 'stick', which it may not do since
-  // the env is flakey before main() is called.  But luckily stl only
-  // looks at this env var the first time it tries to do an alloc, and
-  // caches what it finds.  So we just cause an stl alloc here.
-  string dummy("I need to be allocated");
-  dummy += "!";         // so the definition of dummy isn't optimized out
-#endif  /* __GLIBC__ */
-}
-
-// SysAllocator implementation
-SysAllocator::~SysAllocator() {}
-
-// Default implementation -- does nothing
-MallocExtension::~MallocExtension() { }
-bool MallocExtension::VerifyAllMemory() { return true; }
-bool MallocExtension::VerifyNewMemory(const void* p) { return true; }
-bool MallocExtension::VerifyArrayNewMemory(const void* p) { return true; }
-bool MallocExtension::VerifyMallocMemory(const void* p) { return true; }
-
-bool MallocExtension::GetNumericProperty(const char* property, size_t* value) {
-  return false;
-}
-
-bool MallocExtension::SetNumericProperty(const char* property, size_t value) {
-  return false;
-}
-
-void MallocExtension::GetStats(char* buffer, int length) {
-  assert(length > 0);
-  buffer[0] = '\0';
-}
-
-bool MallocExtension::MallocMemoryStats(int* blocks, size_t* total,
-                                       int histogram[kMallocHistogramSize]) {
-  *blocks = 0;
-  *total = 0;
-  memset(histogram, 0, sizeof(*histogram) * kMallocHistogramSize);
-  return true;
-}
-
-void** MallocExtension::ReadStackTraces(int* sample_period) {
-  return NULL;
-}
-
-void** MallocExtension::ReadHeapGrowthStackTraces() {
-  return NULL;
-}
-
-void MallocExtension::MarkThreadIdle() {
-  // Default implementation does nothing
-}
-
-void MallocExtension::MarkThreadBusy() {
-  // Default implementation does nothing
-}
-
-SysAllocator* MallocExtension::GetSystemAllocator() {
-  return NULL;
-}
-
-void MallocExtension::SetSystemAllocator(SysAllocator *a) {
-  // Default implementation does nothing
-}
-
-void MallocExtension::ReleaseToSystem(size_t num_bytes) {
-  // Default implementation does nothing
-}
-
-void MallocExtension::ReleaseFreeMemory() {
-  ReleaseToSystem(static_cast<size_t>(-1));   // SIZE_T_MAX
-}
-
-void MallocExtension::SetMemoryReleaseRate(double rate) {
-  // Default implementation does nothing
-}
-
-double MallocExtension::GetMemoryReleaseRate() {
-  return -1.0;
-}
-
-size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) {
-  return size;
-}
-
-size_t MallocExtension::GetAllocatedSize(const void* p) {
-  assert(GetOwnership(p) != kNotOwned);
-  return 0;
-}
-
-MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) {
-  return kUnknownOwnership;
-}
-
-void MallocExtension::GetFreeListSizes(
-    vector<MallocExtension::FreeListInfo>* v) {
-  v->clear();
-}
-
-size_t MallocExtension::GetThreadCacheSize() {
-  return 0;
-}
-
-void MallocExtension::MarkThreadTemporarilyIdle() {
-  // Default implementation does nothing
-}
-
-// The current malloc extension object.
-
-static MallocExtension* current_instance;
-
-static void InitModule() {
-  if (current_instance != NULL) {
-    return;
-  }
-  current_instance = new MallocExtension;
-#ifndef NO_HEAP_CHECK
-  HeapLeakChecker::IgnoreObject(current_instance);
-#endif
-}
-
-REGISTER_MODULE_INITIALIZER(malloc_extension_init, InitModule())
-
-MallocExtension* MallocExtension::instance() {
-  InitModule();
-  return current_instance;
-}
-
-void MallocExtension::Register(MallocExtension* implementation) {
-  InitModule();
-  // When running under valgrind, our custom malloc is replaced with
-  // valgrind's one and malloc extensions will not work.  (Note:
-  // callers should be responsible for checking that they are the
-  // malloc that is really being run, before calling Register.  This
-  // is just here as an extra sanity check.)
-  if (!RunningOnValgrind()) {
-    current_instance = implementation;
-  }
-}
-
-// -----------------------------------------------------------------------
-// Heap sampling support
-// -----------------------------------------------------------------------
-
-namespace {
-
-// Accessors
-uintptr_t Count(void** entry) {
-  return reinterpret_cast<uintptr_t>(entry[0]);
-}
-uintptr_t Size(void** entry) {
-  return reinterpret_cast<uintptr_t>(entry[1]);
-}
-uintptr_t Depth(void** entry) {
-  return reinterpret_cast<uintptr_t>(entry[2]);
-}
-void* PC(void** entry, int i) {
-  return entry[3+i];
-}
-
-void PrintCountAndSize(MallocExtensionWriter* writer,
-                       uintptr_t count, uintptr_t size) {
-  char buf[100];
-  snprintf(buf, sizeof(buf),
-           "%6" PRIu64 ": %8" PRIu64 " [%6" PRIu64 ": %8" PRIu64 "] @",
-           static_cast<uint64>(count),
-           static_cast<uint64>(size),
-           static_cast<uint64>(count),
-           static_cast<uint64>(size));
-  writer->append(buf, strlen(buf));
-}
-
-void PrintHeader(MallocExtensionWriter* writer,
-                 const char* label, void** entries) {
-  // Compute the total count and total size
-  uintptr_t total_count = 0;
-  uintptr_t total_size = 0;
-  for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
-    total_count += Count(entry);
-    total_size += Size(entry);
-  }
-
-  const char* const kTitle = "heap profile: ";
-  writer->append(kTitle, strlen(kTitle));
-  PrintCountAndSize(writer, total_count, total_size);
-  writer->append(" ", 1);
-  writer->append(label, strlen(label));
-  writer->append("\n", 1);
-}
-
-void PrintStackEntry(MallocExtensionWriter* writer, void** entry) {
-  PrintCountAndSize(writer, Count(entry), Size(entry));
-
-  for (int i = 0; i < Depth(entry); i++) {
-    char buf[32];
-    snprintf(buf, sizeof(buf), " %p", PC(entry, i));
-    writer->append(buf, strlen(buf));
-  }
-  writer->append("\n", 1);
-}
-
-}
-
-void MallocExtension::GetHeapSample(MallocExtensionWriter* writer) {
-  int sample_period = 0;
-  void** entries = ReadStackTraces(&sample_period);
-  if (entries == NULL) {
-    const char* const kErrorMsg =
-        "This malloc implementation does not support sampling.\n"
-        "As of 2005/01/26, only tcmalloc supports sampling, and\n"
-        "you are probably running a binary that does not use\n"
-        "tcmalloc.\n";
-    writer->append(kErrorMsg, strlen(kErrorMsg));
-    return;
-  }
-
-  char label[32];
-  sprintf(label, "heap_v2/%d", sample_period);
-  PrintHeader(writer, label, entries);
-  for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
-    PrintStackEntry(writer, entry);
-  }
-  delete[] entries;
-
-  DumpAddressMap(writer);
-}
-
-void MallocExtension::GetHeapGrowthStacks(MallocExtensionWriter* writer) {
-  void** entries = ReadHeapGrowthStackTraces();
-  if (entries == NULL) {
-    const char* const kErrorMsg =
-        "This malloc implementation does not support "
-        "ReadHeapGrowthStackTraces().\n"
-        "As of 2005/09/27, only tcmalloc supports this, and you\n"
-        "are probably running a binary that does not use tcmalloc.\n";
-    writer->append(kErrorMsg, strlen(kErrorMsg));
-    return;
-  }
-
-  // Do not canonicalize the stack entries, so that we get a
-  // time-ordered list of stack traces, which may be useful if the
-  // client wants to focus on the latest stack traces.
-  PrintHeader(writer, "growth", entries);
-  for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
-    PrintStackEntry(writer, entry);
-  }
-  delete[] entries;
-
-  DumpAddressMap(writer);
-}
-
-void MallocExtension::Ranges(void* arg, RangeFunction func) {
-  // No callbacks by default
-}
-
-// These are C shims that work on the current instance.
-
-#define C_SHIM(fn, retval, paramlist, arglist)          \
-  extern "C" PERFTOOLS_DLL_DECL retval MallocExtension_##fn paramlist {    \
-    return MallocExtension::instance()->fn arglist;     \
-  }
-
-C_SHIM(VerifyAllMemory, int, (void), ());
-C_SHIM(VerifyNewMemory, int, (const void* p), (p));
-C_SHIM(VerifyArrayNewMemory, int, (const void* p), (p));
-C_SHIM(VerifyMallocMemory, int, (const void* p), (p));
-C_SHIM(MallocMemoryStats, int,
-       (int* blocks, size_t* total, int histogram[kMallocHistogramSize]),
-       (blocks, total, histogram));
-
-C_SHIM(GetStats, void,
-       (char* buffer, int buffer_length), (buffer, buffer_length));
-C_SHIM(GetNumericProperty, int,
-       (const char* property, size_t* value), (property, value));
-C_SHIM(SetNumericProperty, int,
-       (const char* property, size_t value), (property, value));
-
-C_SHIM(MarkThreadIdle, void, (void), ());
-C_SHIM(MarkThreadBusy, void, (void), ());
-C_SHIM(ReleaseFreeMemory, void, (void), ());
-C_SHIM(ReleaseToSystem, void, (size_t num_bytes), (num_bytes));
-C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size));
-C_SHIM(GetAllocatedSize, size_t, (const void* p), (p));
-C_SHIM(GetThreadCacheSize, size_t, (void), ());
-C_SHIM(MarkThreadTemporarilyIdle, void, (void), ());
-
-// Can't use the shim here because of the need to translate the enums.
-extern "C"
-MallocExtension_Ownership MallocExtension_GetOwnership(const void* p) {
-  return static_cast<MallocExtension_Ownership>(
-      MallocExtension::instance()->GetOwnership(p));
-}
diff --git a/contrib/libtcmalloc/src/malloc_hook-inl.h b/contrib/libtcmalloc/src/malloc_hook-inl.h
deleted file mode 100644
index dbf4d46ed47..00000000000
--- a/contrib/libtcmalloc/src/malloc_hook-inl.h
+++ /dev/null
@@ -1,249 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// This has the implementation details of malloc_hook that are needed
-// to use malloc-hook inside the tcmalloc system.  It does not hold
-// any of the client-facing calls that are used to add new hooks.
-
-#ifndef _MALLOC_HOOK_INL_H_
-#define _MALLOC_HOOK_INL_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-#include "base/atomicops.h"
-#include "base/basictypes.h"
-#include <gperftools/malloc_hook.h>
-
-#include "common.h" // for UNLIKELY
-
-namespace base { namespace internal {
-
-// Capacity of 8 means that HookList is 9 words.
-static const int kHookListCapacity = 8;
-// last entry is reserved for deprecated "singular" hooks. So we have
-// 7 "normal" hooks per list
-static const int kHookListMaxValues = 7;
-static const int kHookListSingularIdx = 7;
-
-// HookList: a class that provides synchronized insertions and removals and
-// lockless traversal.  Most of the implementation is in malloc_hook.cc.
-template <typename T>
-struct PERFTOOLS_DLL_DECL HookList {
-  COMPILE_ASSERT(sizeof(T) <= sizeof(AtomicWord), T_should_fit_in_AtomicWord);
-
-  // Adds value to the list.  Note that duplicates are allowed.  Thread-safe and
-  // blocking (acquires hooklist_spinlock).  Returns true on success; false
-  // otherwise (failures include invalid value and no space left).
-  bool Add(T value);
-
-  void FixupPrivEndLocked();
-
-  // Removes the first entry matching value from the list.  Thread-safe and
-  // blocking (acquires hooklist_spinlock).  Returns true on success; false
-  // otherwise (failures include invalid value and no value found).
-  bool Remove(T value);
-
-  // Store up to n values of the list in output_array, and return the number of
-  // elements stored.  Thread-safe and non-blocking.  This is fast (one memory
-  // access) if the list is empty.
-  int Traverse(T* output_array, int n) const;
-
-  // Fast inline implementation for fast path of Invoke*Hook.
-  bool empty() const {
-    return base::subtle::NoBarrier_Load(&priv_end) == 0;
-  }
-
-  // Used purely to handle deprecated singular hooks
-  T GetSingular() const {
-    const AtomicWord *place = &priv_data[kHookListSingularIdx];
-    return bit_cast<T>(base::subtle::NoBarrier_Load(place));
-  }
-
-  T ExchangeSingular(T new_val);
-
-  // This internal data is not private so that the class is an aggregate and can
-  // be initialized by the linker.  Don't access this directly.  Use the
-  // INIT_HOOK_LIST macro in malloc_hook.cc.
-
-  // One more than the index of the last valid element in priv_data.  During
-  // 'Remove' this may be past the last valid element in priv_data, but
-  // subsequent values will be 0.
-  //
-  // Index kHookListCapacity-1 is reserved as 'deprecated' single hook pointer
-  AtomicWord priv_end;
-  AtomicWord priv_data[kHookListCapacity];
-};
-
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::NewHook> new_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::DeleteHook> delete_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::PreMmapHook> premmap_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MmapHook> mmap_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MmapReplacement> mmap_replacement_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MunmapHook> munmap_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MunmapReplacement> munmap_replacement_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MremapHook> mremap_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::PreSbrkHook> presbrk_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::SbrkHook> sbrk_hooks_;
-
-} }  // namespace base::internal
-
-// The following method is DEPRECATED
-inline MallocHook::NewHook MallocHook::GetNewHook() {
-  return base::internal::new_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeNewHook(const void* p, size_t s) {
-  if (UNLIKELY(!base::internal::new_hooks_.empty())) {
-    InvokeNewHookSlow(p, s);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::DeleteHook MallocHook::GetDeleteHook() {
-  return base::internal::delete_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeDeleteHook(const void* p) {
-  if (UNLIKELY(!base::internal::delete_hooks_.empty())) {
-    InvokeDeleteHookSlow(p);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::PreMmapHook MallocHook::GetPreMmapHook() {
-  return base::internal::premmap_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokePreMmapHook(const void* start,
-                                          size_t size,
-                                          int protection,
-                                          int flags,
-                                          int fd,
-                                          off_t offset) {
-  if (!base::internal::premmap_hooks_.empty()) {
-    InvokePreMmapHookSlow(start, size, protection, flags, fd, offset);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::MmapHook MallocHook::GetMmapHook() {
-  return base::internal::mmap_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeMmapHook(const void* result,
-                                       const void* start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset) {
-  if (!base::internal::mmap_hooks_.empty()) {
-    InvokeMmapHookSlow(result, start, size, protection, flags, fd, offset);
-  }
-}
-
-inline bool MallocHook::InvokeMmapReplacement(const void* start,
-                                              size_t size,
-                                              int protection,
-                                              int flags,
-                                              int fd,
-                                              off_t offset,
-                                              void** result) {
-  if (!base::internal::mmap_replacement_.empty()) {
-    return InvokeMmapReplacementSlow(start, size,
-                                     protection, flags,
-                                     fd, offset,
-                                     result);
-  }
-  return false;
-}
-
-// The following method is DEPRECATED
-inline MallocHook::MunmapHook MallocHook::GetMunmapHook() {
-  return base::internal::munmap_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeMunmapHook(const void* p, size_t size) {
-  if (!base::internal::munmap_hooks_.empty()) {
-    InvokeMunmapHookSlow(p, size);
-  }
-}
-
-inline bool MallocHook::InvokeMunmapReplacement(
-    const void* p, size_t size, int* result) {
-  if (!base::internal::mmap_replacement_.empty()) {
-    return InvokeMunmapReplacementSlow(p, size, result);
-  }
-  return false;
-}
-
-// The following method is DEPRECATED
-inline MallocHook::MremapHook MallocHook::GetMremapHook() {
-  return base::internal::mremap_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeMremapHook(const void* result,
-                                         const void* old_addr,
-                                         size_t old_size,
-                                         size_t new_size,
-                                         int flags,
-                                         const void* new_addr) {
-  if (!base::internal::mremap_hooks_.empty()) {
-    InvokeMremapHookSlow(result, old_addr, old_size, new_size, flags, new_addr);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::PreSbrkHook MallocHook::GetPreSbrkHook() {
-  return base::internal::presbrk_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokePreSbrkHook(ptrdiff_t increment) {
-  if (!base::internal::presbrk_hooks_.empty() && increment != 0) {
-    InvokePreSbrkHookSlow(increment);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::SbrkHook MallocHook::GetSbrkHook() {
-  return base::internal::sbrk_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeSbrkHook(const void* result,
-                                       ptrdiff_t increment) {
-  if (!base::internal::sbrk_hooks_.empty() && increment != 0) {
-    InvokeSbrkHookSlow(result, increment);
-  }
-}
-
-#endif /* _MALLOC_HOOK_INL_H_ */
diff --git a/contrib/libtcmalloc/src/malloc_hook.cc b/contrib/libtcmalloc/src/malloc_hook.cc
deleted file mode 100644
index f87da8abbeb..00000000000
--- a/contrib/libtcmalloc/src/malloc_hook.cc
+++ /dev/null
@@ -1,703 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-
-// Disable the glibc prototype of mremap(), as older versions of the
-// system headers define this function with only four arguments,
-// whereas newer versions allow an optional fifth argument:
-#ifdef HAVE_MMAP
-# define mremap glibc_mremap
-# include <sys/mman.h>
-# undef mremap
-#endif
-
-#include <stddef.h>
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-#include <algorithm>
-#include "base/logging.h"
-#include "base/spinlock.h"
-#include "maybe_emergency_malloc.h"
-#include "maybe_threads.h"
-#include "malloc_hook-inl.h"
-#include <gperftools/malloc_hook.h>
-
-// This #ifdef should almost never be set.  Set NO_TCMALLOC_SAMPLES if
-// you're porting to a system where you really can't get a stacktrace.
-#ifdef NO_TCMALLOC_SAMPLES
-  // We use #define so code compiles even if you #include stacktrace.h somehow.
-# define GetStackTrace(stack, depth, skip)  (0)
-#else
-# include <gperftools/stacktrace.h>
-#endif
-
-// __THROW is defined in glibc systems.  It means, counter-intuitively,
-// "This function will never throw an exception."  It's an optional
-// optimization tool, but we may need to use it to match glibc prototypes.
-#ifndef __THROW    // I guess we're not on a glibc system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-using std::copy;
-
-
-// Declaration of default weak initialization function, that can be overridden
-// by linking-in a strong definition (as heap-checker.cc does).  This is
-// extern "C" so that it doesn't trigger gold's --detect-odr-violations warning,
-// which only looks at C++ symbols.
-//
-// This function is declared here as weak, and defined later, rather than a more
-// straightforward simple weak definition, as a workround for an icc compiler
-// issue ((Intel reference 290819).  This issue causes icc to resolve weak
-// symbols too early, at compile rather than link time.  By declaring it (weak)
-// here, then defining it below after its use, we can avoid the problem.
-extern "C" {
-ATTRIBUTE_WEAK void MallocHook_InitAtFirstAllocation_HeapLeakChecker();
-}
-
-namespace {
-
-void RemoveInitialHooksAndCallInitializers();  // below.
-
-pthread_once_t once = PTHREAD_ONCE_INIT;
-
-// These hooks are installed in MallocHook as the only initial hooks.  The first
-// hook that is called will run RemoveInitialHooksAndCallInitializers (see the
-// definition below) and then redispatch to any malloc hooks installed by
-// RemoveInitialHooksAndCallInitializers.
-//
-// Note(llib): there is a possibility of a race in the event that there are
-// multiple threads running before the first allocation.  This is pretty
-// difficult to achieve, but if it is then multiple threads may concurrently do
-// allocations.  The first caller will call
-// RemoveInitialHooksAndCallInitializers via one of the initial hooks.  A
-// concurrent allocation may, depending on timing either:
-// * still have its initial malloc hook installed, run that and block on waiting
-//   for the first caller to finish its call to
-//   RemoveInitialHooksAndCallInitializers, and proceed normally.
-// * occur some time during the RemoveInitialHooksAndCallInitializers call, at
-//   which point there could be no initial hooks and the subsequent hooks that
-//   are about to be set up by RemoveInitialHooksAndCallInitializers haven't
-//   been installed yet.  I think the worst we can get is that some allocations
-//   will not get reported to some hooks set by the initializers called from
-//   RemoveInitialHooksAndCallInitializers.
-
-void InitialNewHook(const void* ptr, size_t size) {
-  perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers);
-  MallocHook::InvokeNewHook(ptr, size);
-}
-
-void InitialPreMMapHook(const void* start,
-                               size_t size,
-                               int protection,
-                               int flags,
-                               int fd,
-                               off_t offset) {
-  perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers);
-  MallocHook::InvokePreMmapHook(start, size, protection, flags, fd, offset);
-}
-
-void InitialPreSbrkHook(ptrdiff_t increment) {
-  perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers);
-  MallocHook::InvokePreSbrkHook(increment);
-}
-
-// This function is called at most once by one of the above initial malloc
-// hooks.  It removes all initial hooks and initializes all other clients that
-// want to get control at the very first memory allocation.  The initializers
-// may assume that the initial malloc hooks have been removed.  The initializers
-// may set up malloc hooks and allocate memory.
-void RemoveInitialHooksAndCallInitializers() {
-  RAW_CHECK(MallocHook::RemoveNewHook(&InitialNewHook), "");
-  RAW_CHECK(MallocHook::RemovePreMmapHook(&InitialPreMMapHook), "");
-  RAW_CHECK(MallocHook::RemovePreSbrkHook(&InitialPreSbrkHook), "");
-
-  // HeapLeakChecker is currently the only module that needs to get control on
-  // the first memory allocation, but one can add other modules by following the
-  // same weak/strong function pattern.
-  MallocHook_InitAtFirstAllocation_HeapLeakChecker();
-}
-
-}  // namespace
-
-// Weak default initialization function that must go after its use.
-extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() {
-  // Do nothing.
-}
-
-namespace base { namespace internal {
-
-// This lock is shared between all implementations of HookList::Add & Remove.
-// The potential for contention is very small.  This needs to be a SpinLock and
-// not a Mutex since it's possible for Mutex locking to allocate memory (e.g.,
-// per-thread allocation in debug builds), which could cause infinite recursion.
-static SpinLock hooklist_spinlock(base::LINKER_INITIALIZED);
-
-template <typename T>
-bool HookList<T>::Add(T value_as_t) {
-  AtomicWord value = bit_cast<AtomicWord>(value_as_t);
-  if (value == 0) {
-    return false;
-  }
-  SpinLockHolder l(&hooklist_spinlock);
-  // Find the first slot in data that is 0.
-  int index = 0;
-  while ((index < kHookListMaxValues) &&
-         (base::subtle::NoBarrier_Load(&priv_data[index]) != 0)) {
-    ++index;
-  }
-  if (index == kHookListMaxValues) {
-    return false;
-  }
-  AtomicWord prev_num_hooks = base::subtle::Acquire_Load(&priv_end);
-  base::subtle::NoBarrier_Store(&priv_data[index], value);
-  if (prev_num_hooks <= index) {
-    base::subtle::NoBarrier_Store(&priv_end, index + 1);
-  }
-  return true;
-}
-
-template <typename T>
-void HookList<T>::FixupPrivEndLocked() {
-  AtomicWord hooks_end = base::subtle::NoBarrier_Load(&priv_end);
-  while ((hooks_end > 0) &&
-         (base::subtle::NoBarrier_Load(&priv_data[hooks_end - 1]) == 0)) {
-    --hooks_end;
-  }
-  base::subtle::NoBarrier_Store(&priv_end, hooks_end);
-}
-
-template <typename T>
-bool HookList<T>::Remove(T value_as_t) {
-  if (value_as_t == 0) {
-    return false;
-  }
-  SpinLockHolder l(&hooklist_spinlock);
-  AtomicWord hooks_end = base::subtle::NoBarrier_Load(&priv_end);
-  int index = 0;
-  while (index < hooks_end && value_as_t != bit_cast<T>(
-             base::subtle::NoBarrier_Load(&priv_data[index]))) {
-    ++index;
-  }
-  if (index == hooks_end) {
-    return false;
-  }
-  base::subtle::NoBarrier_Store(&priv_data[index], 0);
-  FixupPrivEndLocked();
-  return true;
-}
-
-template <typename T>
-int HookList<T>::Traverse(T* output_array, int n) const {
-  AtomicWord hooks_end = base::subtle::Acquire_Load(&priv_end);
-  int actual_hooks_end = 0;
-  for (int i = 0; i < hooks_end && n > 0; ++i) {
-    AtomicWord data = base::subtle::Acquire_Load(&priv_data[i]);
-    if (data != 0) {
-      *output_array++ = bit_cast<T>(data);
-      ++actual_hooks_end;
-      --n;
-    }
-  }
-  return actual_hooks_end;
-}
-
-template <typename T>
-T HookList<T>::ExchangeSingular(T value_as_t) {
-  AtomicWord value = bit_cast<AtomicWord>(value_as_t);
-  AtomicWord old_value;
-  SpinLockHolder l(&hooklist_spinlock);
-  old_value = base::subtle::NoBarrier_Load(&priv_data[kHookListSingularIdx]);
-  base::subtle::NoBarrier_Store(&priv_data[kHookListSingularIdx], value);
-  if (value != 0) {
-    base::subtle::NoBarrier_Store(&priv_end, kHookListSingularIdx + 1);
-  } else {
-    FixupPrivEndLocked();
-  }
-  return bit_cast<T>(old_value);
-}
-
-// Initialize a HookList (optionally with the given initial_value in index 0).
-#define INIT_HOOK_LIST { 0 }
-#define INIT_HOOK_LIST_WITH_VALUE(initial_value)                \
-  { 1, { reinterpret_cast<AtomicWord>(initial_value) } }
-
-// Explicit instantiation for malloc_hook_test.cc.  This ensures all the methods
-// are instantiated.
-template struct HookList<MallocHook::NewHook>;
-
-HookList<MallocHook::NewHook> new_hooks_ =
-    INIT_HOOK_LIST_WITH_VALUE(&InitialNewHook);
-HookList<MallocHook::DeleteHook> delete_hooks_ = INIT_HOOK_LIST;
-HookList<MallocHook::PreMmapHook> premmap_hooks_ =
-    INIT_HOOK_LIST_WITH_VALUE(&InitialPreMMapHook);
-HookList<MallocHook::MmapHook> mmap_hooks_ = INIT_HOOK_LIST;
-HookList<MallocHook::MunmapHook> munmap_hooks_ = INIT_HOOK_LIST;
-HookList<MallocHook::MremapHook> mremap_hooks_ = INIT_HOOK_LIST;
-HookList<MallocHook::PreSbrkHook> presbrk_hooks_ =
-    INIT_HOOK_LIST_WITH_VALUE(InitialPreSbrkHook);
-HookList<MallocHook::SbrkHook> sbrk_hooks_ = INIT_HOOK_LIST;
-
-// These lists contain either 0 or 1 hooks.
-HookList<MallocHook::MmapReplacement> mmap_replacement_ = { 0 };
-HookList<MallocHook::MunmapReplacement> munmap_replacement_ = { 0 };
-
-#undef INIT_HOOK_LIST_WITH_VALUE
-#undef INIT_HOOK_LIST
-
-} }  // namespace base::internal
-
-using base::internal::kHookListMaxValues;
-using base::internal::new_hooks_;
-using base::internal::delete_hooks_;
-using base::internal::premmap_hooks_;
-using base::internal::mmap_hooks_;
-using base::internal::mmap_replacement_;
-using base::internal::munmap_hooks_;
-using base::internal::munmap_replacement_;
-using base::internal::mremap_hooks_;
-using base::internal::presbrk_hooks_;
-using base::internal::sbrk_hooks_;
-
-// These are available as C bindings as well as C++, hence their
-// definition outside the MallocHook class.
-extern "C"
-int MallocHook_AddNewHook(MallocHook_NewHook hook) {
-  RAW_VLOG(10, "AddNewHook(%p)", hook);
-  return new_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveNewHook(MallocHook_NewHook hook) {
-  RAW_VLOG(10, "RemoveNewHook(%p)", hook);
-  return new_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook) {
-  RAW_VLOG(10, "AddDeleteHook(%p)", hook);
-  return delete_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook) {
-  RAW_VLOG(10, "RemoveDeleteHook(%p)", hook);
-  return delete_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook) {
-  RAW_VLOG(10, "AddPreMmapHook(%p)", hook);
-  return premmap_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook) {
-  RAW_VLOG(10, "RemovePreMmapHook(%p)", hook);
-  return premmap_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook) {
-  RAW_VLOG(10, "SetMmapReplacement(%p)", hook);
-  // NOTE this is a best effort CHECK. Concurrent sets could succeed since
-  // this test is outside of the Add spin lock.
-  RAW_CHECK(mmap_replacement_.empty(), "Only one MMapReplacement is allowed.");
-  return mmap_replacement_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook) {
-  RAW_VLOG(10, "RemoveMmapReplacement(%p)", hook);
-  return mmap_replacement_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddMmapHook(MallocHook_MmapHook hook) {
-  RAW_VLOG(10, "AddMmapHook(%p)", hook);
-  return mmap_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook) {
-  RAW_VLOG(10, "RemoveMmapHook(%p)", hook);
-  return mmap_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook) {
-  RAW_VLOG(10, "AddMunmapHook(%p)", hook);
-  return munmap_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook) {
-  RAW_VLOG(10, "RemoveMunmapHook(%p)", hook);
-  return munmap_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook) {
-  RAW_VLOG(10, "SetMunmapReplacement(%p)", hook);
-  // NOTE this is a best effort CHECK. Concurrent sets could succeed since
-  // this test is outside of the Add spin lock.
-  RAW_CHECK(munmap_replacement_.empty(),
-            "Only one MunmapReplacement is allowed.");
-  return munmap_replacement_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook) {
-  RAW_VLOG(10, "RemoveMunmapReplacement(%p)", hook);
-  return munmap_replacement_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddMremapHook(MallocHook_MremapHook hook) {
-  RAW_VLOG(10, "AddMremapHook(%p)", hook);
-  return mremap_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook) {
-  RAW_VLOG(10, "RemoveMremapHook(%p)", hook);
-  return mremap_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook) {
-  RAW_VLOG(10, "AddPreSbrkHook(%p)", hook);
-  return presbrk_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook) {
-  RAW_VLOG(10, "RemovePreSbrkHook(%p)", hook);
-  return presbrk_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook) {
-  RAW_VLOG(10, "AddSbrkHook(%p)", hook);
-  return sbrk_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook) {
-  RAW_VLOG(10, "RemoveSbrkHook(%p)", hook);
-  return sbrk_hooks_.Remove(hook);
-}
-
-// The code below is DEPRECATED.
-extern "C"
-MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook) {
-  RAW_VLOG(10, "SetNewHook(%p)", hook);
-  return new_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook) {
-  RAW_VLOG(10, "SetDeleteHook(%p)", hook);
-  return delete_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook) {
-  RAW_VLOG(10, "SetPreMmapHook(%p)", hook);
-  return premmap_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook) {
-  RAW_VLOG(10, "SetMmapHook(%p)", hook);
-  return mmap_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook) {
-  RAW_VLOG(10, "SetMunmapHook(%p)", hook);
-  return munmap_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook) {
-  RAW_VLOG(10, "SetMremapHook(%p)", hook);
-  return mremap_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook) {
-  RAW_VLOG(10, "SetPreSbrkHook(%p)", hook);
-  return presbrk_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook) {
-  RAW_VLOG(10, "SetSbrkHook(%p)", hook);
-  return sbrk_hooks_.ExchangeSingular(hook);
-}
-// End of DEPRECATED code section.
-
-// Note: embedding the function calls inside the traversal of HookList would be
-// very confusing, as it is legal for a hook to remove itself and add other
-// hooks.  Doing traversal first, and then calling the hooks ensures we only
-// call the hooks registered at the start.
-#define INVOKE_HOOKS(HookType, hook_list, args) do {                    \
-    HookType hooks[kHookListMaxValues];                                 \
-    int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues);      \
-    for (int i = 0; i < num_hooks; ++i) {                               \
-      (*hooks[i])args;                                                  \
-    }                                                                   \
-  } while (0)
-
-// There should only be one replacement. Return the result of the first
-// one, or false if there is none.
-#define INVOKE_REPLACEMENT(HookType, hook_list, args) do {              \
-    HookType hooks[kHookListMaxValues];                                 \
-    int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues);      \
-    return (num_hooks > 0 && (*hooks[0])args);                          \
-  } while (0)
-
-
-void MallocHook::InvokeNewHookSlow(const void* p, size_t s) {
-  if (tcmalloc::IsEmergencyPtr(p)) {
-    return;
-  }
-  INVOKE_HOOKS(NewHook, new_hooks_, (p, s));
-}
-
-void MallocHook::InvokeDeleteHookSlow(const void* p) {
-  if (tcmalloc::IsEmergencyPtr(p)) {
-    return;
-  }
-  INVOKE_HOOKS(DeleteHook, delete_hooks_, (p));
-}
-
-void MallocHook::InvokePreMmapHookSlow(const void* start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset) {
-  INVOKE_HOOKS(PreMmapHook, premmap_hooks_, (start, size, protection, flags, fd,
-                                            offset));
-}
-
-void MallocHook::InvokeMmapHookSlow(const void* result,
-                                    const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset) {
-  INVOKE_HOOKS(MmapHook, mmap_hooks_, (result, start, size, protection, flags,
-                                       fd, offset));
-}
-
-bool MallocHook::InvokeMmapReplacementSlow(const void* start,
-                                           size_t size,
-                                           int protection,
-                                           int flags,
-                                           int fd,
-                                           off_t offset,
-                                           void** result) {
-  INVOKE_REPLACEMENT(MmapReplacement, mmap_replacement_,
-                      (start, size, protection, flags, fd, offset, result));
-}
-
-void MallocHook::InvokeMunmapHookSlow(const void* p, size_t s) {
-  INVOKE_HOOKS(MunmapHook, munmap_hooks_, (p, s));
-}
-
-bool MallocHook::InvokeMunmapReplacementSlow(const void* p,
-                                             size_t s,
-                                             int* result) {
-  INVOKE_REPLACEMENT(MunmapReplacement, munmap_replacement_, (p, s, result));
-}
-
-void MallocHook::InvokeMremapHookSlow(const void* result,
-                                      const void* old_addr,
-                                      size_t old_size,
-                                      size_t new_size,
-                                      int flags,
-                                      const void* new_addr) {
-  INVOKE_HOOKS(MremapHook, mremap_hooks_, (result, old_addr, old_size, new_size,
-                                           flags, new_addr));
-}
-
-void MallocHook::InvokePreSbrkHookSlow(ptrdiff_t increment) {
-  INVOKE_HOOKS(PreSbrkHook, presbrk_hooks_, (increment));
-}
-
-void MallocHook::InvokeSbrkHookSlow(const void* result, ptrdiff_t increment) {
-  INVOKE_HOOKS(SbrkHook, sbrk_hooks_, (result, increment));
-}
-
-#undef INVOKE_HOOKS
-
-#ifndef NO_TCMALLOC_SAMPLES
-
-DEFINE_ATTRIBUTE_SECTION_VARS(google_malloc);
-DECLARE_ATTRIBUTE_SECTION_VARS(google_malloc);
-  // actual functions are in debugallocation.cc or tcmalloc.cc
-DEFINE_ATTRIBUTE_SECTION_VARS(malloc_hook);
-DECLARE_ATTRIBUTE_SECTION_VARS(malloc_hook);
-  // actual functions are in this file, malloc_hook.cc, and low_level_alloc.cc
-
-#define ADDR_IN_ATTRIBUTE_SECTION(addr, name) \
-  (reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_START(name)) <= \
-     reinterpret_cast<uintptr_t>(addr) && \
-   reinterpret_cast<uintptr_t>(addr) < \
-     reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_STOP(name)))
-
-// Return true iff 'caller' is a return address within a function
-// that calls one of our hooks via MallocHook:Invoke*.
-// A helper for GetCallerStackTrace.
-static inline bool InHookCaller(const void* caller) {
-  return ADDR_IN_ATTRIBUTE_SECTION(caller, google_malloc) ||
-         ADDR_IN_ATTRIBUTE_SECTION(caller, malloc_hook);
-  // We can use one section for everything except tcmalloc_or_debug
-  // due to its special linkage mode, which prevents merging of the sections.
-}
-
-#undef ADDR_IN_ATTRIBUTE_SECTION
-
-static bool checked_sections = false;
-
-static inline void CheckInHookCaller() {
-  if (!checked_sections) {
-    INIT_ATTRIBUTE_SECTION_VARS(google_malloc);
-    if (ATTRIBUTE_SECTION_START(google_malloc) ==
-        ATTRIBUTE_SECTION_STOP(google_malloc)) {
-      RAW_LOG(ERROR, "google_malloc section is missing, "
-                     "thus InHookCaller is broken!");
-    }
-    INIT_ATTRIBUTE_SECTION_VARS(malloc_hook);
-    if (ATTRIBUTE_SECTION_START(malloc_hook) ==
-        ATTRIBUTE_SECTION_STOP(malloc_hook)) {
-      RAW_LOG(ERROR, "malloc_hook section is missing, "
-                     "thus InHookCaller is broken!");
-    }
-    checked_sections = true;
-  }
-}
-
-#endif // !NO_TCMALLOC_SAMPLES
-
-// We can improve behavior/compactness of this function
-// if we pass a generic test function (with a generic arg)
-// into the implementations for GetStackTrace instead of the skip_count.
-extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth,
-                                              int skip_count) {
-#if defined(NO_TCMALLOC_SAMPLES)
-  return 0;
-#elif !defined(HAVE_ATTRIBUTE_SECTION_START)
-  // Fall back to GetStackTrace and good old but fragile frame skip counts.
-  // Note: this path is inaccurate when a hook is not called directly by an
-  // allocation function but is daisy-chained through another hook,
-  // search for MallocHook::(Get|Set|Invoke)* to find such cases.
-  return GetStackTrace(result, max_depth, skip_count + int(DEBUG_MODE));
-  // due to -foptimize-sibling-calls in opt mode
-  // there's no need for extra frame skip here then
-#else
-  CheckInHookCaller();
-  // MallocHook caller determination via InHookCaller works, use it:
-  static const int kMaxSkip = 32 + 6 + 3;
-    // Constant tuned to do just one GetStackTrace call below in practice
-    // and not get many frames that we don't actually need:
-    // currently max passsed max_depth is 32,
-    // max passed/needed skip_count is 6
-    // and 3 is to account for some hook daisy chaining.
-  static const int kStackSize = kMaxSkip + 1;
-  void* stack[kStackSize];
-  int depth = GetStackTrace(stack, kStackSize, 1);  // skip this function frame
-  if (depth == 0)   // silenty propagate cases when GetStackTrace does not work
-    return 0;
-  for (int i = 0; i < depth; ++i) {  // stack[0] is our immediate caller
-    if (InHookCaller(stack[i])) {
-      RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p",
-                   i, stack[i], stack[i+1]);
-      i += 1;  // skip hook caller frame
-      depth -= i;  // correct depth
-      if (depth > max_depth) depth = max_depth;
-      copy(stack + i, stack + i + depth, result);
-      if (depth < max_depth  &&  depth + i == kStackSize) {
-        // get frames for the missing depth
-        depth +=
-          GetStackTrace(result + depth, max_depth - depth, 1 + kStackSize);
-      }
-      return depth;
-    }
-  }
-  RAW_LOG(WARNING, "Hooked allocator frame not found, returning empty trace");
-    // If this happens try increasing kMaxSkip
-    // or else something must be wrong with InHookCaller,
-    // e.g. for every section used in InHookCaller
-    // all functions in that section must be inside the same library.
-  return 0;
-#endif
-}
-
-// On systems where we know how, we override mmap/munmap/mremap/sbrk
-// to provide support for calling the related hooks (in addition,
-// of course, to doing what these functions normally do).
-
-#if defined(__linux)
-# include "malloc_hook_mmap_linux.h"
-
-#elif defined(__FreeBSD__)
-# include "malloc_hook_mmap_freebsd.h"
-
-#else
-
-/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
-                                         int flags, int fd, off_t offset) {
-  void* result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = mmap(start, length, prot, flags, fd, offset);
-  }
-  return result;
-}
-
-/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = munmap(start, length);
-  }
-  return result;
-}
-
-#endif
diff --git a/contrib/libtcmalloc/src/malloc_hook_mmap_freebsd.h b/contrib/libtcmalloc/src/malloc_hook_mmap_freebsd.h
deleted file mode 100644
index 8575dcc7c08..00000000000
--- a/contrib/libtcmalloc/src/malloc_hook_mmap_freebsd.h
+++ /dev/null
@@ -1,135 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Override mmap/munmap/mremap/sbrk to provide support for calling the
-// related hooks (in addition, of course, to doing what these
-// functions normally do).
-
-#ifndef __FreeBSD__
-# error Should only be including malloc_hook_mmap_freebsd.h on FreeBSD systems.
-#endif
-
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <sys/mman.h>
-#include <errno.h>
-#include <dlfcn.h>
-
-// Make sure mmap doesn't get #define'd away by <sys/mman.h>
-#undef mmap
-
-// According to the FreeBSD documentation, use syscall if you do not
-// need 64-bit alignment otherwise use __syscall. Indeed, syscall
-// doesn't work correctly in most situations on 64-bit. It's return
-// type is 'int' so for things like SYS_mmap, it actually truncates
-// the returned address to 32-bits.
-#if defined(__amd64__) || defined(__x86_64__)
-# define MALLOC_HOOK_SYSCALL __syscall
-#else
-# define MALLOC_HOOK_SYSCALL syscall
-#endif
-
-
-extern "C" {
-  void* mmap(void *start, size_t length,int prot, int flags,
-             int fd, off_t offset) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  int munmap(void* start, size_t length) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  void* sbrk(intptr_t increment) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-}
-
-static inline void* do_mmap(void *start, size_t length,
-                            int prot, int flags,
-                            int fd, off_t offset) __THROW {
-  return (void *)MALLOC_HOOK_SYSCALL(SYS_mmap,
-                                     start, length, prot, flags, fd, offset);
-}
-
-static inline void* do_sbrk(intptr_t increment) {
-  static void *(*libc_sbrk)(intptr_t);
-  if (libc_sbrk == NULL)
-    libc_sbrk = (void *(*)(intptr_t))dlsym(RTLD_NEXT, "sbrk");
-
-  return libc_sbrk(increment);
-}
-
-
-extern "C" void* mmap(void *start, size_t length, int prot, int flags,
-                      int fd, off_t offset) __THROW {
-  MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset);
-  void *result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap(start, length, prot, flags, fd,
-                       static_cast<size_t>(offset)); // avoid sign extension
-  }
-  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
-  return result;
-}
-
-extern "C" int munmap(void* start, size_t length) __THROW {
-  MallocHook::InvokeMunmapHook(start, length);
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length);
-  }
-
-  return result;
-}
-
-extern "C" void* sbrk(intptr_t increment) __THROW {
-  MallocHook::InvokePreSbrkHook(increment);
-  void *result = do_sbrk(increment);
-  MallocHook::InvokeSbrkHook(result, increment);
-  return result;
-}
-
-/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
-                                         int flags, int fd, off_t offset) {
-  void* result;
-  if (!MallocHook::InvokeMmapReplacement(
-	  start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap(start, length, prot, flags, fd, offset);
-  }
-
-  return result;
-}
-
-/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length);
-  }
-  return result;
-}
-
-#undef MALLOC_HOOK_SYSCALL
diff --git a/contrib/libtcmalloc/src/malloc_hook_mmap_linux.h b/contrib/libtcmalloc/src/malloc_hook_mmap_linux.h
deleted file mode 100644
index 4b1386185bc..00000000000
--- a/contrib/libtcmalloc/src/malloc_hook_mmap_linux.h
+++ /dev/null
@@ -1,238 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-// We define mmap() and mmap64(), which somewhat reimplements libc's mmap
-// syscall stubs.  Unfortunately libc only exports the stubs via weak symbols
-// (which we're overriding with our mmap64() and mmap() wrappers) so we can't
-// just call through to them.
-
-#ifndef __linux
-# error Should only be including malloc_hook_mmap_linux.h on linux systems.
-#endif
-
-#include <unistd.h>
-#include <syscall.h>
-#include <sys/mman.h>
-#include <errno.h>
-#include "base/linux_syscall_support.h"
-
-// The x86-32 case and the x86-64 case differ:
-// 32b has a mmap2() syscall, 64b does not.
-// 64b and 32b have different calling conventions for mmap().
-
-// I test for 64-bit first so I don't have to do things like
-// '#if (defined(__mips__) && !defined(__MIPS64__))' as a mips32 check.
-#if defined(__x86_64__) || defined(__PPC64__) || defined(__aarch64__) || (defined(_MIPS_SIM) && _MIPS_SIM == _ABI64) || defined(__s390__)
-
-static inline void* do_mmap64(void *start, size_t length,
-                              int prot, int flags,
-                              int fd, __off64_t offset) __THROW {
-  return sys_mmap(start, length, prot, flags, fd, offset);
-}
-
-#define MALLOC_HOOK_HAVE_DO_MMAP64 1
-
-#elif defined(__i386__) || defined(__PPC__) || defined(__mips__) || \
-      defined(__arm__)
-
-static inline void* do_mmap64(void *start, size_t length,
-                              int prot, int flags,
-                              int fd, __off64_t offset) __THROW {
-  void *result;
-
-  // Try mmap2() unless it's not supported
-  static bool have_mmap2 = true;
-  if (have_mmap2) {
-    static int pagesize = 0;
-    if (!pagesize) pagesize = getpagesize();
-
-    // Check that the offset is page aligned
-    if (offset & (pagesize - 1)) {
-      result = MAP_FAILED;
-      errno = EINVAL;
-      goto out;
-    }
-
-    result = (void *)syscall(SYS_mmap2,
-                             start, length, prot, flags, fd,
-                             (off_t) (offset / pagesize));
-    if (result != MAP_FAILED || errno != ENOSYS)  goto out;
-
-    // We don't have mmap2() after all - don't bother trying it in future
-    have_mmap2 = false;
-  }
-
-  if (((off_t)offset) != offset) {
-    // If we're trying to map a 64-bit offset, fail now since we don't
-    // have 64-bit mmap() support.
-    result = MAP_FAILED;
-    errno = EINVAL;
-    goto out;
-  }
-
-#ifdef __NR_mmap
-  {
-    // Fall back to old 32-bit offset mmap() call
-    // Old syscall interface cannot handle six args, so pass in an array
-    int32 args[6] = { (int32) start, (int32) length, prot, flags, fd,
-                      (int32)(off_t) offset };
-    result = (void *)syscall(SYS_mmap, args);
-  }
-#else
-  // Some Linux ports like ARM EABI Linux has no mmap, just mmap2.
-  result = MAP_FAILED;
-#endif
-
- out:
-  return result;
-}
-
-#define MALLOC_HOOK_HAVE_DO_MMAP64 1
-
-#endif  // #if defined(__x86_64__)
-
-
-#ifdef MALLOC_HOOK_HAVE_DO_MMAP64
-
-// We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook
-// calls right into mmap and mmap64, so that the stack frames in the caller's
-// stack are at the same offsets for all the calls of memory allocating
-// functions.
-
-// Put all callers of MallocHook::Invoke* in this module into
-// malloc_hook section,
-// so that MallocHook::GetCallerStackTrace can function accurately:
-
-// Make sure mmap doesn't get #define'd away by <sys/mman.h>
-# undef mmap
-
-extern "C" {
-  void* mmap64(void *start, size_t length, int prot, int flags,
-               int fd, __off64_t offset  ) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  void* mmap(void *start, size_t length,int prot, int flags,
-             int fd, off_t offset) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  int munmap(void* start, size_t length) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  void* mremap(void* old_addr, size_t old_size, size_t new_size,
-               int flags, ...) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  void* sbrk(ptrdiff_t increment) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-}
-
-extern "C" void* mmap64(void *start, size_t length, int prot, int flags,
-                        int fd, __off64_t offset) __THROW {
-  MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset);
-  void *result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap64(start, length, prot, flags, fd, offset);
-  }
-  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
-  return result;
-}
-
-# if !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH)
-
-extern "C" void* mmap(void *start, size_t length, int prot, int flags,
-                      int fd, off_t offset) __THROW {
-  MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset);
-  void *result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap64(start, length, prot, flags, fd,
-                       static_cast<size_t>(offset)); // avoid sign extension
-  }
-  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
-  return result;
-}
-
-# endif  // !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH)
-
-extern "C" int munmap(void* start, size_t length) __THROW {
-  MallocHook::InvokeMunmapHook(start, length);
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = sys_munmap(start, length);
-  }
-  return result;
-}
-
-extern "C" void* mremap(void* old_addr, size_t old_size, size_t new_size,
-                        int flags, ...) __THROW {
-  va_list ap;
-  va_start(ap, flags);
-  void *new_address = va_arg(ap, void *);
-  va_end(ap);
-  void* result = sys_mremap(old_addr, old_size, new_size, flags, new_address);
-  MallocHook::InvokeMremapHook(result, old_addr, old_size, new_size, flags,
-                               new_address);
-  return result;
-}
-
-#ifndef __UCLIBC__
-// libc's version:
-extern "C" void* __sbrk(ptrdiff_t increment);
-
-extern "C" void* sbrk(ptrdiff_t increment) __THROW {
-  MallocHook::InvokePreSbrkHook(increment);
-  void *result = __sbrk(increment);
-  MallocHook::InvokeSbrkHook(result, increment);
-  return result;
-}
-
-#endif
-
-/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
-                                         int flags, int fd, off_t offset) {
-  void* result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap64(start, length, prot, flags, fd, offset);
-  }
-  return result;
-}
-
-/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = syscall(SYS_munmap, start, length);
-  }
-  return result;
-}
-
-#undef MALLOC_HOOK_HAVE_DO_MMAP64
-
-#endif  // #ifdef MALLOC_HOOK_HAVE_DO_MMAP64
diff --git a/contrib/libtcmalloc/src/maybe_emergency_malloc.h b/contrib/libtcmalloc/src/maybe_emergency_malloc.h
deleted file mode 100644
index 250ecf01a3f..00000000000
--- a/contrib/libtcmalloc/src/maybe_emergency_malloc.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2014, gperftools Contributors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef MAYBE_EMERGENCY_MALLOC_H
-#define MAYBE_EMERGENCY_MALLOC_H
-
-#include "config.h"
-
-#ifdef ENABLE_EMERGENCY_MALLOC
-
-#include "emergency_malloc.h"
-
-#else
-
-namespace tcmalloc {
-  static inline void *EmergencyMalloc(size_t size) {return NULL;}
-  static inline void EmergencyFree(void *p) {}
-  static inline void *EmergencyCalloc(size_t n, size_t elem_size) {return NULL;}
-  static inline void *EmergencyRealloc(void *old_ptr, size_t new_size) {return NULL;}
-
-  static inline bool IsEmergencyPtr(const void *_ptr) {
-    return false;
-  }
-}
-
-#endif // ENABLE_EMERGENCY_MALLOC
-
-#endif
diff --git a/contrib/libtcmalloc/src/maybe_threads.cc b/contrib/libtcmalloc/src/maybe_threads.cc
deleted file mode 100644
index acfc99a5ae5..00000000000
--- a/contrib/libtcmalloc/src/maybe_threads.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Menage <opensource@google.com>
-//
-// Some wrappers for pthread functions so that we can be LD_PRELOADed
-// against non-pthreads apps.
-//
-// This module will behave very strangely if some pthreads functions
-// exist and others don't.
-
-#include "config.h"
-#include <assert.h>
-#include <string.h>    // for memcmp
-#include <stdio.h>     // for __isthreaded on FreeBSD
-// We don't actually need strings. But including this header seems to
-// stop the compiler trying to short-circuit our pthreads existence
-// tests and claiming that the address of a function is always
-// non-zero. I have no idea why ...
-#include <string>
-#include "maybe_threads.h"
-#include "base/basictypes.h"
-#include "base/logging.h"
-
-// __THROW is defined in glibc systems.  It means, counter-intuitively,
-// "This function will never throw an exception."  It's an optional
-// optimization tool, but we may need to use it to match glibc prototypes.
-#ifndef __THROW    // I guess we're not on a glibc system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-// These are the methods we're going to conditionally include.
-extern "C" {
-  int pthread_key_create (pthread_key_t*, void (*)(void*))
-      __THROW ATTRIBUTE_WEAK;
-  int pthread_key_delete (pthread_key_t)
-      __THROW ATTRIBUTE_WEAK;
-  void *pthread_getspecific(pthread_key_t)
-      __THROW ATTRIBUTE_WEAK;
-  int pthread_setspecific(pthread_key_t, const void*)
-      __THROW ATTRIBUTE_WEAK;
-  int pthread_once(pthread_once_t *, void (*)(void))
-      ATTRIBUTE_WEAK;
-  int pthread_atfork(void (*__prepare) (void),
-                     void (*__parent) (void),
-                     void (*__child) (void))
-    __THROW ATTRIBUTE_WEAK;
-}
-
-#define MAX_PERTHREAD_VALS 16
-static void *perftools_pthread_specific_vals[MAX_PERTHREAD_VALS];
-static int next_key;
-
-// NOTE: it's similar to bitcast defined in basic_types.h with
-// exception of ignoring sizes mismatch
-template <typename T1, typename T2>
-static T2 memcpy_cast(const T1 &input) {
-  T2 output;
-  size_t s = sizeof(input);
-  if (sizeof(output) < s) {
-    s = sizeof(output);
-  }
-  memcpy(&output, &input, s);
-  return output;
-}
-
-int perftools_pthread_key_create(pthread_key_t *key,
-                                 void (*destr_function) (void *)) {
-  if (pthread_key_create) {
-    return pthread_key_create(key, destr_function);
-  } else {
-    assert(next_key < MAX_PERTHREAD_VALS);
-    *key = memcpy_cast<int, pthread_key_t>(next_key++);
-    return 0;
-  }
-}
-
-int perftools_pthread_key_delete(pthread_key_t key) {
-  if (pthread_key_delete) {
-    return pthread_key_delete(key);
-  } else {
-    return 0;
-  }
-}
-
-void *perftools_pthread_getspecific(pthread_key_t key) {
-  if (pthread_getspecific) {
-    return pthread_getspecific(key);
-  } else {
-    return perftools_pthread_specific_vals[memcpy_cast<pthread_key_t, int>(key)];
-  }
-}
-
-int perftools_pthread_setspecific(pthread_key_t key, void *val) {
-  if (pthread_setspecific) {
-    return pthread_setspecific(key, val);
-  } else {
-    perftools_pthread_specific_vals[memcpy_cast<pthread_key_t, int>(key)] = val;
-    return 0;
-  }
-}
-
-
-static pthread_once_t pthread_once_init = PTHREAD_ONCE_INIT;
-int perftools_pthread_once(pthread_once_t *ctl,
-                           void  (*init_routine) (void)) {
-#ifdef __FreeBSD__
-  // On __FreeBSD__, calling pthread_once on a system that is not
-  // linked with -pthread is silently a noop. :-( Luckily, we have a
-  // workaround: FreeBSD exposes __isthreaded in <stdio.h>, which is
-  // set to 1 when the first thread is spawned.  So on those systems,
-  // we can use our own separate pthreads-once mechanism, which is
-  // used until __isthreaded is 1 (which will never be true if the app
-  // is not linked with -pthread).
-  static bool pthread_once_ran_before_threads = false;
-  if (pthread_once_ran_before_threads) {
-    return 0;
-  }
-  if (!__isthreaded) {
-    init_routine();
-    pthread_once_ran_before_threads = true;
-    return 0;
-  }
-#endif
-  if (pthread_once) {
-    return pthread_once(ctl, init_routine);
-  } else {
-    if (memcmp(ctl, &pthread_once_init, sizeof(*ctl)) == 0) {
-      init_routine();
-      ++*(char*)(ctl);        // make it so it's no longer equal to init
-    }
-    return 0;
-  }
-}
-
-void perftools_pthread_atfork(void (*before)(),
-                              void (*parent_after)(),
-                              void (*child_after)()) {
-  if (pthread_atfork) {
-    int rv = pthread_atfork(before, parent_after, child_after);
-    CHECK(rv == 0);
-  }
-}
diff --git a/contrib/libtcmalloc/src/maybe_threads.h b/contrib/libtcmalloc/src/maybe_threads.h
deleted file mode 100644
index c6cfdf7d158..00000000000
--- a/contrib/libtcmalloc/src/maybe_threads.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Menage <opensource@google.com>
-
-//-------------------------------------------------------------------
-// Some wrappers for pthread functions so that we can be LD_PRELOADed
-// against non-pthreads apps.
-//-------------------------------------------------------------------
-
-#ifndef GOOGLE_MAYBE_THREADS_H_
-#define GOOGLE_MAYBE_THREADS_H_
-
-#ifdef HAVE_PTHREAD
-#include <pthread.h>
-#endif
-
-int perftools_pthread_key_create(pthread_key_t *key,
-                                 void (*destr_function) (void *));
-int perftools_pthread_key_delete(pthread_key_t key);
-void *perftools_pthread_getspecific(pthread_key_t key);
-int perftools_pthread_setspecific(pthread_key_t key, void *val);
-int perftools_pthread_once(pthread_once_t *ctl,
-                           void  (*init_routine) (void));
-
-// Our wrapper for pthread_atfork. Does _nothing_ when there are no
-// threads. See static_vars.cc:SetupAtForkLocksHandler for only user
-// of this.
-void perftools_pthread_atfork(void (*before)(),
-                              void (*parent_after)(),
-                              void (*child_after)());
-
-#endif  /* GOOGLE_MAYBE_THREADS_H_ */
diff --git a/contrib/libtcmalloc/src/memfs_malloc.cc b/contrib/libtcmalloc/src/memfs_malloc.cc
deleted file mode 100644
index 419ef24e43b..00000000000
--- a/contrib/libtcmalloc/src/memfs_malloc.cc
+++ /dev/null
@@ -1,272 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Arun Sharma
-//
-// A tcmalloc system allocator that uses a memory based filesystem such as
-// tmpfs or hugetlbfs
-//
-// Since these only exist on linux, we only register this allocator there.
-
-#ifdef __linux
-
-#include "config.h"
-#include <errno.h>                      // for errno, EINVAL
-#include <inttypes.h>                   // for PRId64
-#include <limits.h>                     // for PATH_MAX
-#include <stddef.h>                     // for size_t, NULL
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for int64_t, uintptr_t
-#endif
-#include <stdio.h>                      // for snprintf
-#include <stdlib.h>                     // for mkstemp
-#include <string.h>                     // for strerror
-#include <sys/mman.h>                   // for mmap, MAP_FAILED, etc
-#include <sys/statfs.h>                 // for fstatfs, statfs
-#include <unistd.h>                     // for ftruncate, off_t, unlink
-#include <new>                          // for operator new
-#include <string>
-
-#include <gperftools/malloc_extension.h>
-#include "base/basictypes.h"
-#include "base/googleinit.h"
-#include "base/sysinfo.h"
-#include "internal_logging.h"
-
-// TODO(sanjay): Move the code below into the tcmalloc namespace
-using tcmalloc::kLog;
-using tcmalloc::kCrash;
-using tcmalloc::Log;
-using std::string;
-
-DEFINE_string(memfs_malloc_path, EnvToString("TCMALLOC_MEMFS_MALLOC_PATH", ""),
-              "Path where hugetlbfs or tmpfs is mounted. The caller is "
-              "responsible for ensuring that the path is unique and does "
-              "not conflict with another process");
-DEFINE_int64(memfs_malloc_limit_mb,
-             EnvToInt("TCMALLOC_MEMFS_LIMIT_MB", 0),
-             "Limit total allocation size to the "
-             "specified number of MiB.  0 == no limit.");
-DEFINE_bool(memfs_malloc_abort_on_fail,
-            EnvToBool("TCMALLOC_MEMFS_ABORT_ON_FAIL", false),
-            "abort() whenever memfs_malloc fails to satisfy an allocation "
-            "for any reason.");
-DEFINE_bool(memfs_malloc_ignore_mmap_fail,
-            EnvToBool("TCMALLOC_MEMFS_IGNORE_MMAP_FAIL", false),
-            "Ignore failures from mmap");
-DEFINE_bool(memfs_malloc_map_private,
-            EnvToBool("TCMALLOC_MEMFS_MAP_PRIVATE", false),
-	    "Use MAP_PRIVATE with mmap");
-
-// Hugetlbfs based allocator for tcmalloc
-class HugetlbSysAllocator: public SysAllocator {
-public:
-  explicit HugetlbSysAllocator(SysAllocator* fallback)
-    : failed_(true),  // To disable allocator until Initialize() is called.
-      big_page_size_(0),
-      hugetlb_fd_(-1),
-      hugetlb_base_(0),
-      fallback_(fallback) {
-  }
-
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-  bool Initialize();
-
-  bool failed_;          // Whether failed to allocate memory.
-
-private:
-  void* AllocInternal(size_t size, size_t *actual_size, size_t alignment);
-
-  int64 big_page_size_;
-  int hugetlb_fd_;       // file descriptor for hugetlb
-  off_t hugetlb_base_;
-
-  SysAllocator* fallback_;  // Default system allocator to fall back to.
-};
-static union {
-  char buf[sizeof(HugetlbSysAllocator)];
-  void *ptr;
-} hugetlb_space;
-
-// No locking needed here since we assume that tcmalloc calls
-// us with an internal lock held (see tcmalloc/system-alloc.cc).
-void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size,
-                                 size_t alignment) {
-  if (failed_) {
-    return fallback_->Alloc(size, actual_size, alignment);
-  }
-
-  // We don't respond to allocation requests smaller than big_page_size_ unless
-  // the caller is ok to take more than they asked for. Used by MetaDataAlloc.
-  if (actual_size == NULL && size < big_page_size_) {
-    return fallback_->Alloc(size, actual_size, alignment);
-  }
-
-  // Enforce huge page alignment.  Be careful to deal with overflow.
-  size_t new_alignment = alignment;
-  if (new_alignment < big_page_size_) new_alignment = big_page_size_;
-  size_t aligned_size = ((size + new_alignment - 1) /
-                         new_alignment) * new_alignment;
-  if (aligned_size < size) {
-    return fallback_->Alloc(size, actual_size, alignment);
-  }
-
-  void* result = AllocInternal(aligned_size, actual_size, new_alignment);
-  if (result != NULL) {
-    return result;
-  }
-  Log(kLog, __FILE__, __LINE__,
-      "HugetlbSysAllocator: (failed, allocated)", failed_, hugetlb_base_);
-  if (FLAGS_memfs_malloc_abort_on_fail) {
-    Log(kCrash, __FILE__, __LINE__,
-        "memfs_malloc_abort_on_fail is set");
-  }
-  return fallback_->Alloc(size, actual_size, alignment);
-}
-
-void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size,
-                                         size_t alignment) {
-  // Ask for extra memory if alignment > pagesize
-  size_t extra = 0;
-  if (alignment > big_page_size_) {
-    extra = alignment - big_page_size_;
-  }
-
-  // Test if this allocation would put us over the limit.
-  off_t limit = FLAGS_memfs_malloc_limit_mb*1024*1024;
-  if (limit > 0 && hugetlb_base_ + size + extra > limit) {
-    // Disable the allocator when there's less than one page left.
-    if (limit - hugetlb_base_ < big_page_size_) {
-      Log(kLog, __FILE__, __LINE__, "reached memfs_malloc_limit_mb");
-      failed_ = true;
-    }
-    else {
-      Log(kLog, __FILE__, __LINE__,
-          "alloc too large (size, bytes left)", size, limit-hugetlb_base_);
-    }
-    return NULL;
-  }
-
-  // This is not needed for hugetlbfs, but needed for tmpfs.  Annoyingly
-  // hugetlbfs returns EINVAL for ftruncate.
-  int ret = ftruncate(hugetlb_fd_, hugetlb_base_ + size + extra);
-  if (ret != 0 && errno != EINVAL) {
-    Log(kLog, __FILE__, __LINE__,
-        "ftruncate failed", strerror(errno));
-    failed_ = true;
-    return NULL;
-  }
-
-  // Note: size + extra does not overflow since:
-  //            size + alignment < (1<<NBITS).
-  // and        extra <= alignment
-  // therefore  size + extra < (1<<NBITS)
-  void *result;
-  result = mmap(0, size + extra, PROT_WRITE|PROT_READ,
-                FLAGS_memfs_malloc_map_private ? MAP_PRIVATE : MAP_SHARED,
-                hugetlb_fd_, hugetlb_base_);
-  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
-    if (!FLAGS_memfs_malloc_ignore_mmap_fail) {
-      Log(kLog, __FILE__, __LINE__,
-          "mmap failed (size, error)", size + extra, strerror(errno));
-      failed_ = true;
-    }
-    return NULL;
-  }
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
-
-  // Adjust the return memory so it is aligned
-  size_t adjust = 0;
-  if ((ptr & (alignment - 1)) != 0) {
-    adjust = alignment - (ptr & (alignment - 1));
-  }
-  ptr += adjust;
-  hugetlb_base_ += (size + extra);
-
-  if (actual_size) {
-    *actual_size = size + extra - adjust;
-  }
-
-  return reinterpret_cast<void*>(ptr);
-}
-
-bool HugetlbSysAllocator::Initialize() {
-  char path[PATH_MAX];
-  const int pathlen = FLAGS_memfs_malloc_path.size();
-  if (pathlen + 8 > sizeof(path)) {
-    Log(kCrash, __FILE__, __LINE__, "XX fatal: memfs_malloc_path too long");
-    return false;
-  }
-  memcpy(path, FLAGS_memfs_malloc_path.data(), pathlen);
-  memcpy(path + pathlen, ".XXXXXX", 8);  // Also copies terminating \0
-
-  int hugetlb_fd = mkstemp(path);
-  if (hugetlb_fd == -1) {
-    Log(kLog, __FILE__, __LINE__,
-        "warning: unable to create memfs_malloc_path",
-        path, strerror(errno));
-    return false;
-  }
-
-  // Cleanup memory on process exit
-  if (unlink(path) == -1) {
-    Log(kCrash, __FILE__, __LINE__,
-        "fatal: error unlinking memfs_malloc_path", path, strerror(errno));
-    return false;
-  }
-
-  // Use fstatfs to figure out the default page size for memfs
-  struct statfs sfs;
-  if (fstatfs(hugetlb_fd, &sfs) == -1) {
-    Log(kCrash, __FILE__, __LINE__,
-        "fatal: error fstatfs of memfs_malloc_path", strerror(errno));
-    return false;
-  }
-  int64 page_size = sfs.f_bsize;
-
-  hugetlb_fd_ = hugetlb_fd;
-  big_page_size_ = page_size;
-  failed_ = false;
-  return true;
-}
-
-REGISTER_MODULE_INITIALIZER(memfs_malloc, {
-  if (FLAGS_memfs_malloc_path.length()) {
-    SysAllocator* alloc = MallocExtension::instance()->GetSystemAllocator();
-    HugetlbSysAllocator* hp =
-      new (hugetlb_space.buf) HugetlbSysAllocator(alloc);
-    if (hp->Initialize()) {
-      MallocExtension::instance()->SetSystemAllocator(hp);
-    }
-  }
-});
-
-#endif   /* ifdef __linux */
diff --git a/contrib/libtcmalloc/src/memory_region_map.cc b/contrib/libtcmalloc/src/memory_region_map.cc
deleted file mode 100644
index 841d6f3cf85..00000000000
--- a/contrib/libtcmalloc/src/memory_region_map.cc
+++ /dev/null
@@ -1,831 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Maxim Lifantsev
- */
-
-//
-// Background and key design points of MemoryRegionMap.
-//
-// MemoryRegionMap is a low-level module with quite atypical requirements that
-// result in some degree of non-triviality of the implementation and design.
-//
-// MemoryRegionMap collects info about *all* memory regions created with
-// mmap, munmap, mremap, sbrk.
-// They key word above is 'all': all that are happening in a process
-// during its lifetime frequently starting even before global object
-// constructor execution.
-//
-// This is needed by the primary client of MemoryRegionMap:
-// HeapLeakChecker uses the regions and the associated stack traces
-// to figure out what part of the memory is the heap:
-// if MemoryRegionMap were to miss some (early) regions, leak checking would
-// stop working correctly.
-//
-// To accomplish the goal of functioning before/during global object
-// constructor execution MemoryRegionMap is done as a singleton service
-// that relies on own on-demand initialized static constructor-less data,
-// and only relies on other low-level modules that can also function properly
-// even before global object constructors run.
-//
-// Accomplishing the goal of collecting data about all mmap, munmap, mremap,
-// sbrk occurrences is a more involved: conceptually to do this one needs to
-// record some bits of data in particular about any mmap or sbrk call,
-// but to do that one needs to allocate memory for that data at some point,
-// but all memory allocations in the end themselves come from an mmap
-// or sbrk call (that's how the address space of the process grows).
-//
-// Also note that we need to do all the above recording from
-// within an mmap/sbrk hook which is sometimes/frequently is made by a memory
-// allocator, including the allocator MemoryRegionMap itself must rely on.
-// In the case of heap-checker usage this includes even the very first
-// mmap/sbrk call happening in the program: heap-checker gets activated due to
-// a link-time installed mmap/sbrk hook and it initializes MemoryRegionMap
-// and asks it to record info about this very first call right from that
-// very first hook invocation.
-//
-// MemoryRegionMap is doing its memory allocations via LowLevelAlloc:
-// unlike more complex standard memory allocator, LowLevelAlloc cooperates with
-// MemoryRegionMap by not holding any of its own locks while it calls mmap
-// to get memory, thus we are able to call LowLevelAlloc from
-// our mmap/sbrk hooks without causing a deadlock in it.
-// For the same reason of deadlock prevention the locking in MemoryRegionMap
-// itself is write-recursive which is an exception to Google's mutex usage.
-//
-// We still need to break the infinite cycle of mmap calling our hook,
-// which asks LowLevelAlloc for memory to record this mmap,
-// which (sometimes) causes mmap, which calls our hook, and so on.
-// We do this as follows: on a recursive call of MemoryRegionMap's
-// mmap/sbrk/mremap hook we record the data about the allocation in a
-// static fixed-sized stack (saved_regions and saved_buckets), when the
-// recursion unwinds but before returning from the outer hook call we unwind
-// this stack and move the data from saved_regions and saved_buckets to its
-// permanent place in the RegionSet and "bucket_table" respectively,
-// which can cause more allocations and mmap-s and recursion and unwinding,
-// but the whole process ends eventually due to the fact that for the small
-// allocations we are doing LowLevelAlloc reuses one mmap call and parcels out
-// the memory it created to satisfy several of our allocation requests.
-//
-
-// ========================================================================= //
-
-#include <config.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#elif !defined(MAP_FAILED)
-#define MAP_FAILED -1  // the only thing we need from mman.h
-#endif
-#ifdef HAVE_PTHREAD
-#include <pthread.h>   // for pthread_t, pthread_self()
-#endif
-#include <stddef.h>
-
-#include <algorithm>
-#include <set>
-
-#include "memory_region_map.h"
-
-#include "base/googleinit.h"
-#include "base/logging.h"
-#include "base/low_level_alloc.h"
-#include "malloc_hook-inl.h"
-
-#include <gperftools/stacktrace.h>
-#include <gperftools/malloc_hook.h>
-
-// MREMAP_FIXED is a linux extension.  How it's used in this file,
-// setting it to 0 is equivalent to saying, "This feature isn't
-// supported", which is right.
-#ifndef MREMAP_FIXED
-# define MREMAP_FIXED  0
-#endif
-
-using std::max;
-
-// ========================================================================= //
-
-int MemoryRegionMap::client_count_ = 0;
-int MemoryRegionMap::max_stack_depth_ = 0;
-MemoryRegionMap::RegionSet* MemoryRegionMap::regions_ = NULL;
-LowLevelAlloc::Arena* MemoryRegionMap::arena_ = NULL;
-SpinLock MemoryRegionMap::lock_(SpinLock::LINKER_INITIALIZED);
-SpinLock MemoryRegionMap::owner_lock_(  // ACQUIRED_AFTER(lock_)
-    SpinLock::LINKER_INITIALIZED);
-int MemoryRegionMap::recursion_count_ = 0;  // GUARDED_BY(owner_lock_)
-pthread_t MemoryRegionMap::lock_owner_tid_;  // GUARDED_BY(owner_lock_)
-int64 MemoryRegionMap::map_size_ = 0;
-int64 MemoryRegionMap::unmap_size_ = 0;
-HeapProfileBucket** MemoryRegionMap::bucket_table_ = NULL;  // GUARDED_BY(lock_)
-int MemoryRegionMap::num_buckets_ = 0;  // GUARDED_BY(lock_)
-int MemoryRegionMap::saved_buckets_count_ = 0;  // GUARDED_BY(lock_)
-HeapProfileBucket MemoryRegionMap::saved_buckets_[20];  // GUARDED_BY(lock_)
-
-// GUARDED_BY(lock_)
-const void* MemoryRegionMap::saved_buckets_keys_[20][kMaxStackDepth];
-
-// ========================================================================= //
-
-// Simple hook into execution of global object constructors,
-// so that we do not call pthread_self() when it does not yet work.
-static bool libpthread_initialized = false;
-REGISTER_MODULE_INITIALIZER(libpthread_initialized_setter,
-                            libpthread_initialized = true);
-
-static inline bool current_thread_is(pthread_t should_be) {
-  // Before main() runs, there's only one thread, so we're always that thread
-  if (!libpthread_initialized) return true;
-  // this starts working only sometime well into global constructor execution:
-  return pthread_equal(pthread_self(), should_be);
-}
-
-// ========================================================================= //
-
-// Constructor-less place-holder to store a RegionSet in.
-union MemoryRegionMap::RegionSetRep {
-  char rep[sizeof(RegionSet)];
-  void* align_it;  // do not need a better alignment for 'rep' than this
-  RegionSet* region_set() { return reinterpret_cast<RegionSet*>(rep); }
-};
-
-// The bytes where MemoryRegionMap::regions_ will point to.
-// We use RegionSetRep with noop c-tor so that global construction
-// does not interfere.
-static MemoryRegionMap::RegionSetRep regions_rep;
-
-// ========================================================================= //
-
-// Has InsertRegionLocked been called recursively
-// (or rather should we *not* use regions_ to record a hooked mmap).
-static bool recursive_insert = false;
-
-void MemoryRegionMap::Init(int max_stack_depth, bool use_buckets) {
-  RAW_VLOG(10, "MemoryRegionMap Init");
-  RAW_CHECK(max_stack_depth >= 0, "");
-  // Make sure we don't overflow the memory in region stacks:
-  RAW_CHECK(max_stack_depth <= kMaxStackDepth,
-            "need to increase kMaxStackDepth?");
-  Lock();
-  client_count_ += 1;
-  max_stack_depth_ = max(max_stack_depth_, max_stack_depth);
-  if (client_count_ > 1) {
-    // not first client: already did initialization-proper
-    Unlock();
-    RAW_VLOG(10, "MemoryRegionMap Init increment done");
-    return;
-  }
-  // Set our hooks and make sure they were installed:
-  RAW_CHECK(MallocHook::AddMmapHook(&MmapHook), "");
-  RAW_CHECK(MallocHook::AddMremapHook(&MremapHook), "");
-  RAW_CHECK(MallocHook::AddSbrkHook(&SbrkHook), "");
-  RAW_CHECK(MallocHook::AddMunmapHook(&MunmapHook), "");
-  // We need to set recursive_insert since the NewArena call itself
-  // will already do some allocations with mmap which our hooks will catch
-  // recursive_insert allows us to buffer info about these mmap calls.
-  // Note that Init() can be (and is) sometimes called
-  // already from within an mmap/sbrk hook.
-  recursive_insert = true;
-  arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena());
-  recursive_insert = false;
-  HandleSavedRegionsLocked(&InsertRegionLocked);  // flush the buffered ones
-    // Can't instead use HandleSavedRegionsLocked(&DoInsertRegionLocked) before
-    // recursive_insert = false; as InsertRegionLocked will also construct
-    // regions_ on demand for us.
-  if (use_buckets) {
-    const int table_bytes = kHashTableSize * sizeof(*bucket_table_);
-    recursive_insert = true;
-    bucket_table_ = static_cast<HeapProfileBucket**>(
-        MyAllocator::Allocate(table_bytes));
-    recursive_insert = false;
-    memset(bucket_table_, 0, table_bytes);
-    num_buckets_ = 0;
-  }
-  Unlock();
-  RAW_VLOG(10, "MemoryRegionMap Init done");
-}
-
-bool MemoryRegionMap::Shutdown() {
-  RAW_VLOG(10, "MemoryRegionMap Shutdown");
-  Lock();
-  RAW_CHECK(client_count_ > 0, "");
-  client_count_ -= 1;
-  if (client_count_ != 0) {  // not last client; need not really shutdown
-    Unlock();
-    RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done");
-    return true;
-  }
-  if (bucket_table_ != NULL) {
-    for (int i = 0; i < kHashTableSize; i++) {
-      for (HeapProfileBucket* curr = bucket_table_[i]; curr != 0; /**/) {
-        HeapProfileBucket* bucket = curr;
-        curr = curr->next;
-        MyAllocator::Free(bucket->stack, 0);
-        MyAllocator::Free(bucket, 0);
-      }
-    }
-    MyAllocator::Free(bucket_table_, 0);
-    num_buckets_ = 0;
-    bucket_table_ = NULL;
-  }
-  RAW_CHECK(MallocHook::RemoveMmapHook(&MmapHook), "");
-  RAW_CHECK(MallocHook::RemoveMremapHook(&MremapHook), "");
-  RAW_CHECK(MallocHook::RemoveSbrkHook(&SbrkHook), "");
-  RAW_CHECK(MallocHook::RemoveMunmapHook(&MunmapHook), "");
-  if (regions_) regions_->~RegionSet();
-  regions_ = NULL;
-  bool deleted_arena = LowLevelAlloc::DeleteArena(arena_);
-  if (deleted_arena) {
-    arena_ = 0;
-  } else {
-    RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used");
-  }
-  Unlock();
-  RAW_VLOG(10, "MemoryRegionMap Shutdown done");
-  return deleted_arena;
-}
-
-bool MemoryRegionMap::IsRecordingLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  return client_count_ > 0;
-}
-
-// Invariants (once libpthread_initialized is true):
-//   * While lock_ is not held, recursion_count_ is 0 (and
-//     lock_owner_tid_ is the previous owner, but we don't rely on
-//     that).
-//   * recursion_count_ and lock_owner_tid_ are only written while
-//     both lock_ and owner_lock_ are held. They may be read under
-//     just owner_lock_.
-//   * At entry and exit of Lock() and Unlock(), the current thread
-//     owns lock_ iff pthread_equal(lock_owner_tid_, pthread_self())
-//     && recursion_count_ > 0.
-void MemoryRegionMap::Lock() {
-  {
-    SpinLockHolder l(&owner_lock_);
-    if (recursion_count_ > 0 && current_thread_is(lock_owner_tid_)) {
-      RAW_CHECK(lock_.IsHeld(), "Invariants violated");
-      recursion_count_++;
-      RAW_CHECK(recursion_count_ <= 5,
-                "recursive lock nesting unexpectedly deep");
-      return;
-    }
-  }
-  lock_.Lock();
-  {
-    SpinLockHolder l(&owner_lock_);
-    RAW_CHECK(recursion_count_ == 0,
-              "Last Unlock didn't reset recursion_count_");
-    if (libpthread_initialized)
-      lock_owner_tid_ = pthread_self();
-    recursion_count_ = 1;
-  }
-}
-
-void MemoryRegionMap::Unlock() {
-  SpinLockHolder l(&owner_lock_);
-  RAW_CHECK(recursion_count_ >  0, "unlock when not held");
-  RAW_CHECK(lock_.IsHeld(),
-            "unlock when not held, and recursion_count_ is wrong");
-  RAW_CHECK(current_thread_is(lock_owner_tid_), "unlock by non-holder");
-  recursion_count_--;
-  if (recursion_count_ == 0) {
-    lock_.Unlock();
-  }
-}
-
-bool MemoryRegionMap::LockIsHeld() {
-  SpinLockHolder l(&owner_lock_);
-  return lock_.IsHeld()  &&  current_thread_is(lock_owner_tid_);
-}
-
-const MemoryRegionMap::Region*
-MemoryRegionMap::DoFindRegionLocked(uintptr_t addr) {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  if (regions_ != NULL) {
-    Region sample;
-    sample.SetRegionSetKey(addr);
-    RegionSet::iterator region = regions_->lower_bound(sample);
-    if (region != regions_->end()) {
-      RAW_CHECK(addr <= region->end_addr, "");
-      if (region->start_addr <= addr  &&  addr < region->end_addr) {
-        return &(*region);
-      }
-    }
-  }
-  return NULL;
-}
-
-bool MemoryRegionMap::FindRegion(uintptr_t addr, Region* result) {
-  Lock();
-  const Region* region = DoFindRegionLocked(addr);
-  if (region != NULL) *result = *region;  // create it as an independent copy
-  Unlock();
-  return region != NULL;
-}
-
-bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top,
-                                             Region* result) {
-  Lock();
-  const Region* region = DoFindRegionLocked(stack_top);
-  if (region != NULL) {
-    RAW_VLOG(10, "Stack at %p is inside region %p..%p",
-                reinterpret_cast<void*>(stack_top),
-                reinterpret_cast<void*>(region->start_addr),
-                reinterpret_cast<void*>(region->end_addr));
-    const_cast<Region*>(region)->set_is_stack();  // now we know
-      // cast is safe (set_is_stack does not change the set ordering key)
-    *result = *region;  // create *result as an independent copy
-  }
-  Unlock();
-  return region != NULL;
-}
-
-HeapProfileBucket* MemoryRegionMap::GetBucket(int depth,
-                                              const void* const key[]) {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  // Make hash-value
-  uintptr_t hash = 0;
-  for (int i = 0; i < depth; i++) {
-    hash += reinterpret_cast<uintptr_t>(key[i]);
-    hash += hash << 10;
-    hash ^= hash >> 6;
-  }
-  hash += hash << 3;
-  hash ^= hash >> 11;
-
-  // Lookup stack trace in table
-  unsigned int hash_index = (static_cast<unsigned int>(hash)) % kHashTableSize;
-  for (HeapProfileBucket* bucket = bucket_table_[hash_index];
-       bucket != 0;
-       bucket = bucket->next) {
-    if ((bucket->hash == hash) && (bucket->depth == depth) &&
-        std::equal(key, key + depth, bucket->stack)) {
-      return bucket;
-    }
-  }
-
-  // Create new bucket
-  const size_t key_size = sizeof(key[0]) * depth;
-  HeapProfileBucket* bucket;
-  if (recursive_insert) {  // recursion: save in saved_buckets_
-    const void** key_copy = saved_buckets_keys_[saved_buckets_count_];
-    std::copy(key, key + depth, key_copy);
-    bucket = &saved_buckets_[saved_buckets_count_];
-    memset(bucket, 0, sizeof(*bucket));
-    ++saved_buckets_count_;
-    bucket->stack = key_copy;
-    bucket->next  = NULL;
-  } else {
-    recursive_insert = true;
-    const void** key_copy = static_cast<const void**>(
-        MyAllocator::Allocate(key_size));
-    recursive_insert = false;
-    std::copy(key, key + depth, key_copy);
-    recursive_insert = true;
-    bucket = static_cast<HeapProfileBucket*>(
-        MyAllocator::Allocate(sizeof(HeapProfileBucket)));
-    recursive_insert = false;
-    memset(bucket, 0, sizeof(*bucket));
-    bucket->stack = key_copy;
-    bucket->next  = bucket_table_[hash_index];
-  }
-  bucket->hash = hash;
-  bucket->depth = depth;
-  bucket_table_[hash_index] = bucket;
-  ++num_buckets_;
-  return bucket;
-}
-
-MemoryRegionMap::RegionIterator MemoryRegionMap::BeginRegionLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  RAW_CHECK(regions_ != NULL, "");
-  return regions_->begin();
-}
-
-MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  RAW_CHECK(regions_ != NULL, "");
-  return regions_->end();
-}
-
-inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) {
-  RAW_VLOG(12, "Inserting region %p..%p from %p",
-              reinterpret_cast<void*>(region.start_addr),
-              reinterpret_cast<void*>(region.end_addr),
-              reinterpret_cast<void*>(region.caller()));
-  RegionSet::const_iterator i = regions_->lower_bound(region);
-  if (i != regions_->end() && i->start_addr <= region.start_addr) {
-    RAW_DCHECK(region.end_addr <= i->end_addr, "");  // lower_bound ensures this
-    return;  // 'region' is a subset of an already recorded region; do nothing
-    // We can be stricter and allow this only when *i has been created via
-    // an mmap with MAP_NORESERVE flag set.
-  }
-  if (DEBUG_MODE) {
-    RAW_CHECK(i == regions_->end()  ||  !region.Overlaps(*i),
-              "Wow, overlapping memory regions");
-    Region sample;
-    sample.SetRegionSetKey(region.start_addr);
-    i = regions_->lower_bound(sample);
-    RAW_CHECK(i == regions_->end()  ||  !region.Overlaps(*i),
-              "Wow, overlapping memory regions");
-  }
-  region.AssertIsConsistent();  // just making sure
-  // This inserts and allocates permanent storage for region
-  // and its call stack data: it's safe to do it now:
-  regions_->insert(region);
-  RAW_VLOG(12, "Inserted region %p..%p :",
-              reinterpret_cast<void*>(region.start_addr),
-              reinterpret_cast<void*>(region.end_addr));
-  if (VLOG_IS_ON(12))  LogAllLocked();
-}
-
-// These variables are local to MemoryRegionMap::InsertRegionLocked()
-// and MemoryRegionMap::HandleSavedRegionsLocked()
-// and are file-level to ensure that they are initialized at load time.
-
-// Number of unprocessed region inserts.
-static int saved_regions_count = 0;
-
-// Unprocessed inserts (must be big enough to hold all allocations that can
-// be caused by a InsertRegionLocked call).
-// Region has no constructor, so that c-tor execution does not interfere
-// with the any-time use of the static memory behind saved_regions.
-static MemoryRegionMap::Region saved_regions[20];
-
-inline void MemoryRegionMap::HandleSavedRegionsLocked(
-              void (*insert_func)(const Region& region)) {
-  while (saved_regions_count > 0) {
-    // Making a local-var copy of the region argument to insert_func
-    // including its stack (w/o doing any memory allocations) is important:
-    // in many cases the memory in saved_regions
-    // will get written-to during the (*insert_func)(r) call below.
-    Region r = saved_regions[--saved_regions_count];
-    (*insert_func)(r);
-  }
-}
-
-void MemoryRegionMap::RestoreSavedBucketsLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  while (saved_buckets_count_ > 0) {
-    HeapProfileBucket bucket = saved_buckets_[--saved_buckets_count_];
-    unsigned int hash_index =
-        static_cast<unsigned int>(bucket.hash) % kHashTableSize;
-    bool is_found = false;
-    for (HeapProfileBucket* curr = bucket_table_[hash_index];
-         curr != 0;
-         curr = curr->next) {
-      if ((curr->hash == bucket.hash) && (curr->depth == bucket.depth) &&
-          std::equal(bucket.stack, bucket.stack + bucket.depth, curr->stack)) {
-        curr->allocs += bucket.allocs;
-        curr->alloc_size += bucket.alloc_size;
-        curr->frees += bucket.frees;
-        curr->free_size += bucket.free_size;
-        is_found = true;
-        break;
-      }
-    }
-    if (is_found) continue;
-
-    const size_t key_size = sizeof(bucket.stack[0]) * bucket.depth;
-    const void** key_copy = static_cast<const void**>(
-        MyAllocator::Allocate(key_size));
-    std::copy(bucket.stack, bucket.stack + bucket.depth, key_copy);
-    HeapProfileBucket* new_bucket = static_cast<HeapProfileBucket*>(
-        MyAllocator::Allocate(sizeof(HeapProfileBucket)));
-    memset(new_bucket, 0, sizeof(*new_bucket));
-    new_bucket->hash = bucket.hash;
-    new_bucket->depth = bucket.depth;
-    new_bucket->stack = key_copy;
-    new_bucket->next = bucket_table_[hash_index];
-    bucket_table_[hash_index] = new_bucket;
-    ++num_buckets_;
-  }
-}
-
-inline void MemoryRegionMap::InsertRegionLocked(const Region& region) {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  // We can be called recursively, because RegionSet constructor
-  // and DoInsertRegionLocked() (called below) can call the allocator.
-  // recursive_insert tells us if that's the case. When this happens,
-  // region insertion information is recorded in saved_regions[],
-  // and taken into account when the recursion unwinds.
-  // Do the insert:
-  if (recursive_insert) {  // recursion: save in saved_regions
-    RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p",
-                reinterpret_cast<void*>(region.start_addr),
-                reinterpret_cast<void*>(region.end_addr),
-                reinterpret_cast<void*>(region.caller()));
-    RAW_CHECK(saved_regions_count < arraysize(saved_regions), "");
-    // Copy 'region' to saved_regions[saved_regions_count]
-    // together with the contents of its call_stack,
-    // then increment saved_regions_count.
-    saved_regions[saved_regions_count++] = region;
-  } else {  // not a recusrive call
-    if (regions_ == NULL) {  // init regions_
-      RAW_VLOG(12, "Initializing region set");
-      regions_ = regions_rep.region_set();
-      recursive_insert = true;
-      new(regions_) RegionSet();
-      HandleSavedRegionsLocked(&DoInsertRegionLocked);
-      recursive_insert = false;
-    }
-    recursive_insert = true;
-    // Do the actual insertion work to put new regions into regions_:
-    DoInsertRegionLocked(region);
-    HandleSavedRegionsLocked(&DoInsertRegionLocked);
-    recursive_insert = false;
-  }
-}
-
-// We strip out different number of stack frames in debug mode
-// because less inlining happens in that case
-#ifdef NDEBUG
-static const int kStripFrames = 1;
-#else
-static const int kStripFrames = 3;
-#endif
-
-void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) {
-  // Record start/end info about this memory acquisition call in a new region:
-  Region region;
-  region.Create(start, size);
-  // First get the call stack info into the local varible 'region':
-  int depth = 0;
-  // NOTE: libunwind also does mmap and very much likely while holding
-  // it's own lock(s). So some threads may first take libunwind lock,
-  // and then take region map lock (necessary to record mmap done from
-  // inside libunwind). On the other hand other thread(s) may do
-  // normal mmap. Which would call this method to record it. Which
-  // would then proceed with installing that record to region map
-  // while holding region map lock. That may cause mmap from our own
-  // internal allocators, so attempt to unwind in this case may cause
-  // reverse order of taking libuwind and region map locks. Which is
-  // obvious deadlock.
-  //
-  // Thankfully, we can easily detect if we're holding region map lock
-  // and avoid recording backtrace in this (rare and largely
-  // irrelevant) case. By doing this we "declare" that thread needing
-  // both locks must take region map lock last. In other words we do
-  // not allow taking libuwind lock when we already have region map
-  // lock. Note, this is generally impossible when somebody tries to
-  // mix cpu profiling and heap checking/profiling, because cpu
-  // profiler grabs backtraces at arbitrary places. But at least such
-  // combination is rarer and less relevant.
-  if (max_stack_depth_ > 0 && !LockIsHeld()) {
-    depth = MallocHook::GetCallerStackTrace(const_cast<void**>(region.call_stack),
-                                            max_stack_depth_, kStripFrames + 1);
-  }
-  region.set_call_stack_depth(depth);  // record stack info fully
-  RAW_VLOG(10, "New global region %p..%p from %p",
-              reinterpret_cast<void*>(region.start_addr),
-              reinterpret_cast<void*>(region.end_addr),
-              reinterpret_cast<void*>(region.caller()));
-  // Note: none of the above allocates memory.
-  Lock();  // recursively lock
-  map_size_ += size;
-  InsertRegionLocked(region);
-    // This will (eventually) allocate storage for and copy over the stack data
-    // from region.call_stack_data_ that is pointed by region.call_stack().
-  if (bucket_table_ != NULL) {
-    HeapProfileBucket* b = GetBucket(depth, region.call_stack);
-    ++b->allocs;
-    b->alloc_size += size;
-    if (!recursive_insert) {
-      recursive_insert = true;
-      RestoreSavedBucketsLocked();
-      recursive_insert = false;
-    }
-  }
-  Unlock();
-}
-
-void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
-  Lock();
-  if (recursive_insert) {
-    // First remove the removed region from saved_regions, if it's
-    // there, to prevent overrunning saved_regions in recursive
-    // map/unmap call sequences, and also from later inserting regions
-    // which have already been unmapped.
-    uintptr_t start_addr = reinterpret_cast<uintptr_t>(start);
-    uintptr_t end_addr = start_addr + size;
-    int put_pos = 0;
-    int old_count = saved_regions_count;
-    for (int i = 0; i < old_count; ++i, ++put_pos) {
-      Region& r = saved_regions[i];
-      if (r.start_addr == start_addr && r.end_addr == end_addr) {
-        // An exact match, so it's safe to remove.
-        RecordRegionRemovalInBucket(r.call_stack_depth, r.call_stack, size);
-        --saved_regions_count;
-        --put_pos;
-        RAW_VLOG(10, ("Insta-Removing saved region %p..%p; "
-                     "now have %d saved regions"),
-                 reinterpret_cast<void*>(start_addr),
-                 reinterpret_cast<void*>(end_addr),
-                 saved_regions_count);
-      } else {
-        if (put_pos < i) {
-          saved_regions[put_pos] = saved_regions[i];
-        }
-      }
-    }
-  }
-  if (regions_ == NULL) {  // We must have just unset the hooks,
-                           // but this thread was already inside the hook.
-    Unlock();
-    return;
-  }
-  if (!recursive_insert) {
-    HandleSavedRegionsLocked(&InsertRegionLocked);
-  }
-    // first handle adding saved regions if any
-  uintptr_t start_addr = reinterpret_cast<uintptr_t>(start);
-  uintptr_t end_addr = start_addr + size;
-  // subtract start_addr, end_addr from all the regions
-  RAW_VLOG(10, "Removing global region %p..%p; have %" PRIuS " regions",
-              reinterpret_cast<void*>(start_addr),
-              reinterpret_cast<void*>(end_addr),
-              regions_->size());
-  Region sample;
-  sample.SetRegionSetKey(start_addr);
-  // Only iterate over the regions that might overlap start_addr..end_addr:
-  for (RegionSet::iterator region = regions_->lower_bound(sample);
-       region != regions_->end()  &&  region->start_addr < end_addr;
-       /*noop*/) {
-    RAW_VLOG(13, "Looking at region %p..%p",
-                reinterpret_cast<void*>(region->start_addr),
-                reinterpret_cast<void*>(region->end_addr));
-    if (start_addr <= region->start_addr  &&
-        region->end_addr <= end_addr) {  // full deletion
-      RAW_VLOG(12, "Deleting region %p..%p",
-                  reinterpret_cast<void*>(region->start_addr),
-                  reinterpret_cast<void*>(region->end_addr));
-      RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack,
-                                  region->end_addr - region->start_addr);
-      RegionSet::iterator d = region;
-      ++region;
-      regions_->erase(d);
-      continue;
-    } else if (region->start_addr < start_addr  &&
-               end_addr < region->end_addr) {  // cutting-out split
-      RAW_VLOG(12, "Splitting region %p..%p in two",
-                  reinterpret_cast<void*>(region->start_addr),
-                  reinterpret_cast<void*>(region->end_addr));
-      RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack,
-                                  end_addr - start_addr);
-      // Make another region for the start portion:
-      // The new region has to be the start portion because we can't
-      // just modify region->end_addr as it's the sorting key.
-      Region r = *region;
-      r.set_end_addr(start_addr);
-      InsertRegionLocked(r);
-      // cut *region from start:
-      const_cast<Region&>(*region).set_start_addr(end_addr);
-    } else if (end_addr > region->start_addr  &&
-               start_addr <= region->start_addr) {  // cut from start
-      RAW_VLOG(12, "Start-chopping region %p..%p",
-                  reinterpret_cast<void*>(region->start_addr),
-                  reinterpret_cast<void*>(region->end_addr));
-      RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack,
-                                  end_addr - region->start_addr);
-      const_cast<Region&>(*region).set_start_addr(end_addr);
-    } else if (start_addr > region->start_addr  &&
-               start_addr < region->end_addr) {  // cut from end
-      RAW_VLOG(12, "End-chopping region %p..%p",
-                  reinterpret_cast<void*>(region->start_addr),
-                  reinterpret_cast<void*>(region->end_addr));
-      RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack,
-                                  region->end_addr - start_addr);
-      // Can't just modify region->end_addr (it's the sorting key):
-      Region r = *region;
-      r.set_end_addr(start_addr);
-      RegionSet::iterator d = region;
-      ++region;
-      // It's safe to erase before inserting since r is independent of *d:
-      // r contains an own copy of the call stack:
-      regions_->erase(d);
-      InsertRegionLocked(r);
-      continue;
-    }
-    ++region;
-  }
-  RAW_VLOG(12, "Removed region %p..%p; have %" PRIuS " regions",
-              reinterpret_cast<void*>(start_addr),
-              reinterpret_cast<void*>(end_addr),
-              regions_->size());
-  if (VLOG_IS_ON(12))  LogAllLocked();
-  unmap_size_ += size;
-  Unlock();
-}
-
-void MemoryRegionMap::RecordRegionRemovalInBucket(int depth,
-                                                  const void* const stack[],
-                                                  size_t size) {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  if (bucket_table_ == NULL) return;
-  HeapProfileBucket* b = GetBucket(depth, stack);
-  ++b->frees;
-  b->free_size += size;
-}
-
-void MemoryRegionMap::MmapHook(const void* result,
-                               const void* start, size_t size,
-                               int prot, int flags,
-                               int fd, off_t offset) {
-  // TODO(maxim): replace all 0x%" PRIxS " by %p when RAW_VLOG uses a safe
-  // snprintf reimplementation that does not malloc to pretty-print NULL
-  RAW_VLOG(10, "MMap = 0x%" PRIxPTR " of %" PRIuS " at %" PRIu64 " "
-              "prot %d flags %d fd %d offs %" PRId64,
-              reinterpret_cast<uintptr_t>(result), size,
-              reinterpret_cast<uint64>(start), prot, flags, fd,
-              static_cast<int64>(offset));
-  if (result != reinterpret_cast<void*>(MAP_FAILED)  &&  size != 0) {
-    RecordRegionAddition(result, size);
-  }
-}
-
-void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) {
-  RAW_VLOG(10, "MUnmap of %p %" PRIuS "", ptr, size);
-  if (size != 0) {
-    RecordRegionRemoval(ptr, size);
-  }
-}
-
-void MemoryRegionMap::MremapHook(const void* result,
-                                 const void* old_addr, size_t old_size,
-                                 size_t new_size, int flags,
-                                 const void* new_addr) {
-  RAW_VLOG(10, "MRemap = 0x%" PRIxPTR " of 0x%" PRIxPTR " %" PRIuS " "
-              "to %" PRIuS " flags %d new_addr=0x%" PRIxPTR,
-              (uintptr_t)result, (uintptr_t)old_addr,
-               old_size, new_size, flags,
-               flags & MREMAP_FIXED ? (uintptr_t)new_addr : 0);
-  if (result != reinterpret_cast<void*>(-1)) {
-    RecordRegionRemoval(old_addr, old_size);
-    RecordRegionAddition(result, new_size);
-  }
-}
-
-void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) {
-  RAW_VLOG(10, "Sbrk = 0x%" PRIxPTR " of %" PRIdS "", (uintptr_t)result, increment);
-  if (result != reinterpret_cast<void*>(-1)) {
-    if (increment > 0) {
-      void* new_end = sbrk(0);
-      RecordRegionAddition(result, reinterpret_cast<uintptr_t>(new_end) -
-                                   reinterpret_cast<uintptr_t>(result));
-    } else if (increment < 0) {
-      void* new_end = sbrk(0);
-      RecordRegionRemoval(new_end, reinterpret_cast<uintptr_t>(result) -
-                                   reinterpret_cast<uintptr_t>(new_end));
-    }
-  }
-}
-
-void MemoryRegionMap::LogAllLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  RAW_LOG(INFO, "List of regions:");
-  uintptr_t previous = 0;
-  for (RegionSet::const_iterator r = regions_->begin();
-       r != regions_->end(); ++r) {
-    RAW_LOG(INFO, "Memory region 0x%" PRIxPTR "..0x%" PRIxPTR " "
-                  "from 0x%" PRIxPTR " stack=%d",
-                  r->start_addr, r->end_addr, r->caller(), r->is_stack);
-    RAW_CHECK(previous < r->end_addr, "wow, we messed up the set order");
-      // this must be caused by uncontrolled recursive operations on regions_
-    previous = r->end_addr;
-  }
-  RAW_LOG(INFO, "End of regions list");
-}
diff --git a/contrib/libtcmalloc/src/memory_region_map.h b/contrib/libtcmalloc/src/memory_region_map.h
deleted file mode 100644
index ec388e1cc54..00000000000
--- a/contrib/libtcmalloc/src/memory_region_map.h
+++ /dev/null
@@ -1,413 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Maxim Lifantsev
- */
-
-#ifndef BASE_MEMORY_REGION_MAP_H_
-#define BASE_MEMORY_REGION_MAP_H_
-
-#include <config.h>
-
-#ifdef HAVE_PTHREAD
-#include <pthread.h>
-#endif
-#include <stddef.h>
-#include <set>
-#include "base/stl_allocator.h"
-#include "base/spinlock.h"
-#include "base/thread_annotations.h"
-#include "base/low_level_alloc.h"
-#include "heap-profile-stats.h"
-
-// TODO(maxim): add a unittest:
-//  execute a bunch of mmaps and compare memory map what strace logs
-//  execute a bunch of mmap/munmup and compare memory map with
-//  own accounting of what those mmaps generated
-
-// Thread-safe class to collect and query the map of all memory regions
-// in a process that have been created with mmap, munmap, mremap, sbrk.
-// For each memory region, we keep track of (and provide to users)
-// the stack trace that allocated that memory region.
-// The recorded stack trace depth is bounded by
-// a user-supplied max_stack_depth parameter of Init().
-// After initialization with Init()
-// (which can happened even before global object constructor execution)
-// we collect the map by installing and monitoring MallocHook-s
-// to mmap, munmap, mremap, sbrk.
-// At any time one can query this map via provided interface.
-// For more details on the design of MemoryRegionMap
-// see the comment at the top of our .cc file.
-class MemoryRegionMap {
- private:
-  // Max call stack recording depth supported by Init().  Set it to be
-  // high enough for all our clients.  Note: we do not define storage
-  // for this (doing that requires special handling in windows), so
-  // don't take the address of it!
-  static const int kMaxStackDepth = 32;
-
-  // Size of the hash table of buckets.  A structure of the bucket table is
-  // described in heap-profile-stats.h.
-  static const int kHashTableSize = 179999;
-
- public:
-  // interface ================================================================
-
-  // Every client of MemoryRegionMap must call Init() before first use,
-  // and Shutdown() after last use.  This allows us to reference count
-  // this (singleton) class properly.  MemoryRegionMap assumes it's the
-  // only client of MallocHooks, so a client can only register other
-  // MallocHooks after calling Init() and must unregister them before
-  // calling Shutdown().
-
-  // Initialize this module to record memory allocation stack traces.
-  // Stack traces that have more than "max_stack_depth" frames
-  // are automatically shrunk to "max_stack_depth" when they are recorded.
-  // Init() can be called more than once w/o harm, largest max_stack_depth
-  // will be the effective one.
-  // When "use_buckets" is true, then counts of mmap and munmap sizes will be
-  // recorded with each stack trace.  If Init() is called more than once, then
-  // counting will be effective after any call contained "use_buckets" of true.
-  // It will install mmap, munmap, mremap, sbrk hooks
-  // and initialize arena_ and our hook and locks, hence one can use
-  // MemoryRegionMap::Lock()/Unlock() to manage the locks.
-  // Uses Lock/Unlock inside.
-  static void Init(int max_stack_depth, bool use_buckets);
-
-  // Try to shutdown this module undoing what Init() did.
-  // Returns true iff could do full shutdown (or it was not attempted).
-  // Full shutdown is attempted when the number of Shutdown() calls equals
-  // the number of Init() calls.
-  static bool Shutdown();
-
-  // Return true if MemoryRegionMap is initialized and recording, i.e. when
-  // then number of Init() calls are more than the number of Shutdown() calls.
-  static bool IsRecordingLocked();
-
-  // Locks to protect our internal data structures.
-  // These also protect use of arena_ if our Init() has been done.
-  // The lock is recursive.
-  static void Lock() EXCLUSIVE_LOCK_FUNCTION(lock_);
-  static void Unlock() UNLOCK_FUNCTION(lock_);
-
-  // Returns true when the lock is held by this thread (for use in RAW_CHECK-s).
-  static bool LockIsHeld();
-
-  // Locker object that acquires the MemoryRegionMap::Lock
-  // for the duration of its lifetime (a C++ scope).
-  class LockHolder {
-   public:
-    LockHolder() { Lock(); }
-    ~LockHolder() { Unlock(); }
-   private:
-    DISALLOW_COPY_AND_ASSIGN(LockHolder);
-  };
-
-  // A memory region that we know about through malloc_hook-s.
-  // This is essentially an interface through which MemoryRegionMap
-  // exports the collected data to its clients.  Thread-compatible.
-  struct Region {
-    uintptr_t start_addr;  // region start address
-    uintptr_t end_addr;  // region end address
-    int call_stack_depth;  // number of caller stack frames that we saved
-    const void* call_stack[kMaxStackDepth];  // caller address stack array
-                                             // filled to call_stack_depth size
-    bool is_stack;  // does this region contain a thread's stack:
-                    // a user of MemoryRegionMap supplies this info
-
-    // Convenience accessor for call_stack[0],
-    // i.e. (the program counter of) the immediate caller
-    // of this region's allocation function,
-    // but it also returns NULL when call_stack_depth is 0,
-    // i.e whe we weren't able to get the call stack.
-    // This usually happens in recursive calls, when the stack-unwinder
-    // calls mmap() which in turn calls the stack-unwinder.
-    uintptr_t caller() const {
-      return reinterpret_cast<uintptr_t>(call_stack_depth >= 1
-                                         ? call_stack[0] : NULL);
-    }
-
-    // Return true iff this region overlaps region x.
-    bool Overlaps(const Region& x) const {
-      return start_addr < x.end_addr  &&  end_addr > x.start_addr;
-    }
-
-   private:  // helpers for MemoryRegionMap
-    friend class MemoryRegionMap;
-
-    // The ways we create Region-s:
-    void Create(const void* start, size_t size) {
-      start_addr = reinterpret_cast<uintptr_t>(start);
-      end_addr = start_addr + size;
-      is_stack = false;  // not a stack till marked such
-      call_stack_depth = 0;
-      AssertIsConsistent();
-    }
-    void set_call_stack_depth(int depth) {
-      RAW_DCHECK(call_stack_depth == 0, "");  // only one such set is allowed
-      call_stack_depth = depth;
-      AssertIsConsistent();
-    }
-
-    // The ways we modify Region-s:
-    void set_is_stack() { is_stack = true; }
-    void set_start_addr(uintptr_t addr) {
-      start_addr = addr;
-      AssertIsConsistent();
-    }
-    void set_end_addr(uintptr_t addr) {
-      end_addr = addr;
-      AssertIsConsistent();
-    }
-
-    // Verifies that *this contains consistent data, crashes if not the case.
-    void AssertIsConsistent() const {
-      RAW_DCHECK(start_addr < end_addr, "");
-      RAW_DCHECK(call_stack_depth >= 0  &&
-                 call_stack_depth <= kMaxStackDepth, "");
-    }
-
-    // Post-default construction helper to make a Region suitable
-    // for searching in RegionSet regions_.
-    void SetRegionSetKey(uintptr_t addr) {
-      // make sure *this has no usable data:
-      if (DEBUG_MODE) memset(this, 0xFF, sizeof(*this));
-      end_addr = addr;
-    }
-
-    // Note: call_stack[kMaxStackDepth] as a member lets us make Region
-    // a simple self-contained struct with correctly behaving bit-vise copying.
-    // This simplifies the code of this module but wastes some memory:
-    // in most-often use case of this module (leak checking)
-    // only one call_stack element out of kMaxStackDepth is actually needed.
-    // Making the storage for call_stack variable-sized,
-    // substantially complicates memory management for the Region-s:
-    // as they need to be created and manipulated for some time
-    // w/o any memory allocations, yet are also given out to the users.
-  };
-
-  // Find the region that covers addr and write its data into *result if found,
-  // in which case *result gets filled so that it stays fully functional
-  // even when the underlying region gets removed from MemoryRegionMap.
-  // Returns success. Uses Lock/Unlock inside.
-  static bool FindRegion(uintptr_t addr, Region* result);
-
-  // Find the region that contains stack_top, mark that region as
-  // a stack region, and write its data into *result if found,
-  // in which case *result gets filled so that it stays fully functional
-  // even when the underlying region gets removed from MemoryRegionMap.
-  // Returns success. Uses Lock/Unlock inside.
-  static bool FindAndMarkStackRegion(uintptr_t stack_top, Region* result);
-
-  // Iterate over the buckets which store mmap and munmap counts per stack
-  // trace.  It calls "callback" for each bucket, and passes "arg" to it.
-  template<class Type>
-  static void IterateBuckets(void (*callback)(const HeapProfileBucket*, Type),
-                             Type arg);
-
-  // Get the bucket whose caller stack trace is "key".  The stack trace is
-  // used to a depth of "depth" at most.  The requested bucket is created if
-  // needed.
-  // The bucket table is described in heap-profile-stats.h.
-  static HeapProfileBucket* GetBucket(int depth, const void* const key[]);
-
- private:  // our internal types ==============================================
-
-  // Region comparator for sorting with STL
-  struct RegionCmp {
-    bool operator()(const Region& x, const Region& y) const {
-      return x.end_addr < y.end_addr;
-    }
-  };
-
-  // We allocate STL objects in our own arena.
-  struct MyAllocator {
-    static void *Allocate(size_t n) {
-      return LowLevelAlloc::AllocWithArena(n, arena_);
-    }
-    static void Free(const void *p, size_t /* n */) {
-      LowLevelAlloc::Free(const_cast<void*>(p));
-    }
-  };
-
-  // Set of the memory regions
-  typedef std::set<Region, RegionCmp,
-              STL_Allocator<Region, MyAllocator> > RegionSet;
-
- public:  // more in-depth interface ==========================================
-
-  // STL iterator with values of Region
-  typedef RegionSet::const_iterator RegionIterator;
-
-  // Return the begin/end iterators to all the regions.
-  // These need Lock/Unlock protection around their whole usage (loop).
-  // Even when the same thread causes modifications during such a loop
-  // (which are permitted due to recursive locking)
-  // the loop iterator will still be valid as long as its region
-  // has not been deleted, but EndRegionLocked should be
-  // re-evaluated whenever the set of regions has changed.
-  static RegionIterator BeginRegionLocked();
-  static RegionIterator EndRegionLocked();
-
-  // Return the accumulated sizes of mapped and unmapped regions.
-  static int64 MapSize() { return map_size_; }
-  static int64 UnmapSize() { return unmap_size_; }
-
-  // Effectively private type from our .cc =================================
-  // public to let us declare global objects:
-  union RegionSetRep;
-
- private:
-  // representation ===========================================================
-
-  // Counter of clients of this module that have called Init().
-  static int client_count_;
-
-  // Maximal number of caller stack frames to save (>= 0).
-  static int max_stack_depth_;
-
-  // Arena used for our allocations in regions_.
-  static LowLevelAlloc::Arena* arena_;
-
-  // Set of the mmap/sbrk/mremap-ed memory regions
-  // To be accessed *only* when Lock() is held.
-  // Hence we protect the non-recursive lock used inside of arena_
-  // with our recursive Lock(). This lets a user prevent deadlocks
-  // when threads are stopped by TCMalloc_ListAllProcessThreads at random spots
-  // simply by acquiring our recursive Lock() before that.
-  static RegionSet* regions_;
-
-  // Lock to protect regions_ and buckets_ variables and the data behind.
-  static SpinLock lock_;
-  // Lock to protect the recursive lock itself.
-  static SpinLock owner_lock_;
-
-  // Recursion count for the recursive lock.
-  static int recursion_count_;
-  // The thread id of the thread that's inside the recursive lock.
-  static pthread_t lock_owner_tid_;
-
-  // Total size of all mapped pages so far
-  static int64 map_size_;
-  // Total size of all unmapped pages so far
-  static int64 unmap_size_;
-
-  // Bucket hash table which is described in heap-profile-stats.h.
-  static HeapProfileBucket** bucket_table_ GUARDED_BY(lock_);
-  static int num_buckets_ GUARDED_BY(lock_);
-
-  // The following members are local to MemoryRegionMap::GetBucket()
-  // and MemoryRegionMap::HandleSavedBucketsLocked()
-  // and are file-level to ensure that they are initialized at load time.
-  //
-  // These are used as temporary storage to break the infinite cycle of mmap
-  // calling our hook which (sometimes) causes mmap.  It must be a static
-  // fixed-size array.  The size 20 is just an expected value for safety.
-  // The details are described in memory_region_map.cc.
-
-  // Number of unprocessed bucket inserts.
-  static int saved_buckets_count_ GUARDED_BY(lock_);
-
-  // Unprocessed inserts (must be big enough to hold all mmaps that can be
-  // caused by a GetBucket call).
-  // Bucket has no constructor, so that c-tor execution does not interfere
-  // with the any-time use of the static memory behind saved_buckets.
-  static HeapProfileBucket saved_buckets_[20] GUARDED_BY(lock_);
-
-  static const void* saved_buckets_keys_[20][kMaxStackDepth] GUARDED_BY(lock_);
-
-  // helpers ==================================================================
-
-  // Helper for FindRegion and FindAndMarkStackRegion:
-  // returns the region covering 'addr' or NULL; assumes our lock_ is held.
-  static const Region* DoFindRegionLocked(uintptr_t addr);
-
-  // Verifying wrapper around regions_->insert(region)
-  // To be called to do InsertRegionLocked's work only!
-  inline static void DoInsertRegionLocked(const Region& region);
-  // Handle regions saved by InsertRegionLocked into a tmp static array
-  // by calling insert_func on them.
-  inline static void HandleSavedRegionsLocked(
-                       void (*insert_func)(const Region& region));
-
-  // Restore buckets saved in a tmp static array by GetBucket to the bucket
-  // table where all buckets eventually should be.
-  static void RestoreSavedBucketsLocked();
-
-  // Wrapper around DoInsertRegionLocked
-  // that handles the case of recursive allocator calls.
-  inline static void InsertRegionLocked(const Region& region);
-
-  // Record addition of a memory region at address "start" of size "size"
-  // (called from our mmap/mremap/sbrk hooks).
-  static void RecordRegionAddition(const void* start, size_t size);
-  // Record deletion of a memory region at address "start" of size "size"
-  // (called from our munmap/mremap/sbrk hooks).
-  static void RecordRegionRemoval(const void* start, size_t size);
-
-  // Record deletion of a memory region of size "size" in a bucket whose
-  // caller stack trace is "key".  The stack trace is used to a depth of
-  // "depth" at most.
-  static void RecordRegionRemovalInBucket(int depth,
-                                          const void* const key[],
-                                          size_t size);
-
-  // Hooks for MallocHook
-  static void MmapHook(const void* result,
-                       const void* start, size_t size,
-                       int prot, int flags,
-                       int fd, off_t offset);
-  static void MunmapHook(const void* ptr, size_t size);
-  static void MremapHook(const void* result, const void* old_addr,
-                         size_t old_size, size_t new_size, int flags,
-                         const void* new_addr);
-  static void SbrkHook(const void* result, ptrdiff_t increment);
-
-  // Log all memory regions; Useful for debugging only.
-  // Assumes Lock() is held
-  static void LogAllLocked();
-
-  DISALLOW_COPY_AND_ASSIGN(MemoryRegionMap);
-};
-
-template <class Type>
-void MemoryRegionMap::IterateBuckets(
-    void (*callback)(const HeapProfileBucket*, Type), Type callback_arg) {
-  for (int index = 0; index < kHashTableSize; index++) {
-    for (HeapProfileBucket* bucket = bucket_table_[index];
-         bucket != NULL;
-         bucket = bucket->next) {
-      callback(bucket, callback_arg);
-    }
-  }
-}
-
-#endif  // BASE_MEMORY_REGION_MAP_H_
diff --git a/contrib/libtcmalloc/src/packed-cache-inl.h b/contrib/libtcmalloc/src/packed-cache-inl.h
deleted file mode 100644
index 09462608ece..00000000000
--- a/contrib/libtcmalloc/src/packed-cache-inl.h
+++ /dev/null
@@ -1,239 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Geoff Pike
-//
-// This file provides a minimal cache that can hold a <key, value> pair
-// with little if any wasted space.  The types of the key and value
-// must be unsigned integral types or at least have unsigned semantics
-// for >>, casting, and similar operations.
-//
-// Synchronization is not provided.  However, the cache is implemented
-// as an array of cache entries whose type is chosen at compile time.
-// If a[i] is atomic on your hardware for the chosen array type then
-// raciness will not necessarily lead to bugginess.  The cache entries
-// must be large enough to hold a partial key and a value packed
-// together.  The partial keys are bit strings of length
-// kKeybits - kHashbits, and the values are bit strings of length kValuebits.
-//
-// In an effort to use minimal space, every cache entry represents
-// some <key, value> pair; the class provides no way to mark a cache
-// entry as empty or uninitialized.  In practice, you may want to have
-// reserved keys or values to get around this limitation.  For example, in
-// tcmalloc's PageID-to-sizeclass cache, a value of 0 is used as
-// "unknown sizeclass."
-//
-// Usage Considerations
-// --------------------
-//
-// kHashbits controls the size of the cache.  The best value for
-// kHashbits will of course depend on the application.  Perhaps try
-// tuning the value of kHashbits by measuring different values on your
-// favorite benchmark.  Also remember not to be a pig; other
-// programs that need resources may suffer if you are.
-//
-// The main uses for this class will be when performance is
-// critical and there's a convenient type to hold the cache's
-// entries.  As described above, the number of bits required
-// for a cache entry is (kKeybits - kHashbits) + kValuebits.  Suppose
-// kKeybits + kValuebits is 43.  Then it probably makes sense to
-// chose kHashbits >= 11 so that cache entries fit in a uint32.
-//
-// On the other hand, suppose kKeybits = kValuebits = 64.  Then
-// using this class may be less worthwhile.  You'll probably
-// be using 128 bits for each entry anyway, so maybe just pick
-// a hash function, H, and use an array indexed by H(key):
-//    void Put(K key, V value) { a_[H(key)] = pair<K, V>(key, value); }
-//    V GetOrDefault(K key, V default) { const pair<K, V> &p = a_[H(key)]; ... }
-//    etc.
-//
-// Further Details
-// ---------------
-//
-// For caches used only by one thread, the following is true:
-// 1. For a cache c,
-//      (c.Put(key, value), c.GetOrDefault(key, 0)) == value
-//    and
-//      (c.Put(key, value), <...>, c.GetOrDefault(key, 0)) == value
-//    if the elided code contains no c.Put calls.
-//
-// 2. Has(key) will return false if no <key, value> pair with that key
-//    has ever been Put.  However, a newly initialized cache will have
-//    some <key, value> pairs already present.  When you create a new
-//    cache, you must specify an "initial value."  The initialization
-//    procedure is equivalent to Clear(initial_value), which is
-//    equivalent to Put(k, initial_value) for all keys k from 0 to
-//    2^kHashbits - 1.
-//
-// 3. If key and key' differ then the only way Put(key, value) may
-//    cause Has(key') to change is that Has(key') may change from true to
-//    false. Furthermore, a Put() call that doesn't change Has(key')
-//    doesn't change GetOrDefault(key', ...) either.
-//
-// Implementation details:
-//
-// This is a direct-mapped cache with 2^kHashbits entries; the hash
-// function simply takes the low bits of the key.  We store whole keys
-// if a whole key plus a whole value fits in an entry.  Otherwise, an
-// entry is the high bits of a key and a value, packed together.
-// E.g., a 20 bit key and a 7 bit value only require a uint16 for each
-// entry if kHashbits >= 11.
-//
-// Alternatives to this scheme will be added as needed.
-
-#ifndef TCMALLOC_PACKED_CACHE_INL_H_
-#define TCMALLOC_PACKED_CACHE_INL_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uintptr_t
-#endif
-#include "base/basictypes.h"
-#include "internal_logging.h"
-
-// A safe way of doing "(1 << n) - 1" -- without worrying about overflow
-// Note this will all be resolved to a constant expression at compile-time
-#define N_ONES_(IntType, N)                                     \
-  ( (N) == 0 ? 0 : ((static_cast<IntType>(1) << ((N)-1))-1 +    \
-                    (static_cast<IntType>(1) << ((N)-1))) )
-
-// The types K and V provide upper bounds on the number of valid keys
-// and values, but we explicitly require the keys to be less than
-// 2^kKeybits and the values to be less than 2^kValuebits.  The size of
-// the table is controlled by kHashbits, and the type of each entry in
-// the cache is T.  See also the big comment at the top of the file.
-template <int kKeybits, typename T>
-class PackedCache {
- public:
-  typedef uintptr_t K;
-  typedef size_t V;
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-  // Decrease the size map cache if running in the small memory mode.
-  static const int kHashbits = 12;
-#else
-  static const int kHashbits = 16;
-#endif
-  static const int kValuebits = 7;
-  static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T);
-
-  explicit PackedCache(V initial_value) {
-    COMPILE_ASSERT(kKeybits <= sizeof(K) * 8, key_size);
-    COMPILE_ASSERT(kValuebits <= sizeof(V) * 8, value_size);
-    COMPILE_ASSERT(kHashbits <= kKeybits, hash_function);
-    COMPILE_ASSERT(kKeybits - kHashbits + kValuebits <= kTbits,
-                   entry_size_must_be_big_enough);
-    Clear(initial_value);
-  }
-
-  void Put(K key, V value) {
-    ASSERT(key == (key & kKeyMask));
-    ASSERT(value == (value & kValueMask));
-    array_[Hash(key)] = KeyToUpper(key) | value;
-  }
-
-  bool Has(K key) const {
-    ASSERT(key == (key & kKeyMask));
-    return KeyMatch(array_[Hash(key)], key);
-  }
-
-  V GetOrDefault(K key, V default_value) const {
-    // As with other code in this class, we touch array_ as few times
-    // as we can.  Assuming entries are read atomically (e.g., their
-    // type is uintptr_t on most hardware) then certain races are
-    // harmless.
-    ASSERT(key == (key & kKeyMask));
-    T entry = array_[Hash(key)];
-    return KeyMatch(entry, key) ? EntryToValue(entry) : default_value;
-  }
-
-  void Clear(V value) {
-    ASSERT(value == (value & kValueMask));
-    for (int i = 0; i < 1 << kHashbits; i++) {
-      ASSERT(kUseWholeKeys || KeyToUpper(i) == 0);
-      array_[i] = kUseWholeKeys ? (value | KeyToUpper(i)) : value;
-    }
-  }
-
- private:
-  // We are going to pack a value and the upper part of a key (or a
-  // whole key) into an entry of type T.  The UPPER type is for the
-  // upper part of a key, after the key has been masked and shifted
-  // for inclusion in an entry.
-  typedef T UPPER;
-
-  static V EntryToValue(T t) { return t & kValueMask; }
-
-  // If we have space for a whole key, we just shift it left.
-  // Otherwise kHashbits determines where in a K to find the upper
-  // part of the key, and kValuebits determines where in the entry to
-  // put it.
-  static UPPER KeyToUpper(K k) {
-    if (kUseWholeKeys) {
-      return static_cast<T>(k) << kValuebits;
-    } else {
-      const int shift = kHashbits - kValuebits;
-      // Assume kHashbits >= kValuebits.  It'd be easy to lift this assumption.
-      return static_cast<T>(k >> shift) & kUpperMask;
-    }
-  }
-
-  static size_t Hash(K key) {
-    return static_cast<size_t>(key) & N_ONES_(size_t, kHashbits);
-  }
-
-  // Does the entry match the relevant part of the given key?
-  static bool KeyMatch(T entry, K key) {
-    return kUseWholeKeys ?
-        (entry >> kValuebits == key) :
-        ((KeyToUpper(key) ^ entry) & kUpperMask) == 0;
-  }
-
-  static const int kTbits = 8 * sizeof(T);
-  static const int kUpperbits = kUseWholeKeys ? kKeybits : kKeybits - kHashbits;
-
-  // For masking a K.
-  static const K kKeyMask = N_ONES_(K, kKeybits);
-
-  // For masking a T.
-  static const T kUpperMask = N_ONES_(T, kUpperbits) << kValuebits;
-
-  // For masking a V or a T.
-  static const V kValueMask = N_ONES_(V, kValuebits);
-
-  // array_ is the cache.  Its elements are volatile because any
-  // thread can write any array element at any time.
-  volatile T array_[1 << kHashbits];
-};
-
-#undef N_ONES_
-
-#endif  // TCMALLOC_PACKED_CACHE_INL_H_
diff --git a/contrib/libtcmalloc/src/page_heap.cc b/contrib/libtcmalloc/src/page_heap.cc
deleted file mode 100644
index f1915623308..00000000000
--- a/contrib/libtcmalloc/src/page_heap.cc
+++ /dev/null
@@ -1,682 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>                   // for PRIuPTR
-#endif
-#include <errno.h>                      // for ENOMEM, errno
-#include <gperftools/malloc_extension.h>      // for MallocRange, etc
-#include "base/basictypes.h"
-#include "base/commandlineflags.h"
-#include "internal_logging.h"  // for ASSERT, TCMalloc_Printer, etc
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "static_vars.h"       // for Static
-#include "system-alloc.h"      // for TCMalloc_SystemAlloc, etc
-
-DEFINE_double(tcmalloc_release_rate,
-              EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0),
-              "Rate at which we release unused memory to the system.  "
-              "Zero means we never release memory back to the system.  "
-              "Increase this flag to return memory faster; decrease it "
-              "to return memory slower.  Reasonable rates are in the "
-              "range [0,10]");
-
-DEFINE_int64(tcmalloc_heap_limit_mb,
-              EnvToInt("TCMALLOC_HEAP_LIMIT_MB", 0),
-              "Limit total size of the process heap to the "
-              "specified number of MiB. "
-              "When we approach the limit the memory is released "
-              "to the system more aggressively (more minor page faults). "
-              "Zero means to allocate as long as system allows.");
-
-namespace tcmalloc {
-
-PageHeap::PageHeap()
-    : pagemap_(MetaDataAlloc),
-      pagemap_cache_(0),
-      scavenge_counter_(0),
-      // Start scavenging at kMaxPages list
-      release_index_(kMaxPages),
-      aggressive_decommit_(false) {
-  COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
-  DLL_Init(&large_.normal);
-  DLL_Init(&large_.returned);
-  for (int i = 0; i < kMaxPages; i++) {
-    DLL_Init(&free_[i].normal);
-    DLL_Init(&free_[i].returned);
-  }
-}
-
-Span* PageHeap::SearchFreeAndLargeLists(Length n) {
-  ASSERT(Check());
-  ASSERT(n > 0);
-
-  // Find first size >= n that has a non-empty list
-  for (Length s = n; s < kMaxPages; s++) {
-    Span* ll = &free_[s].normal;
-    // If we're lucky, ll is non-empty, meaning it has a suitable span.
-    if (!DLL_IsEmpty(ll)) {
-      ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
-      return Carve(ll->next, n);
-    }
-    // Alternatively, maybe there's a usable returned span.
-    ll = &free_[s].returned;
-    if (!DLL_IsEmpty(ll)) {
-      // We did not call EnsureLimit before, to avoid releasing the span
-      // that will be taken immediately back.
-      // Calling EnsureLimit here is not very expensive, as it fails only if
-      // there is no more normal spans (and it fails efficiently)
-      // or SystemRelease does not work (there is probably no returned spans).
-      if (EnsureLimit(n)) {
-        // ll may have became empty due to coalescing
-        if (!DLL_IsEmpty(ll)) {
-          ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
-          return Carve(ll->next, n);
-        }
-      }
-    }
-  }
-  // No luck in free lists, our last chance is in a larger class.
-  return AllocLarge(n);  // May be NULL
-}
-
-static const size_t kForcedCoalesceInterval = 128*1024*1024;
-
-Span* PageHeap::New(Length n) {
-  ASSERT(Check());
-  ASSERT(n > 0);
-
-  Span* result = SearchFreeAndLargeLists(n);
-  if (result != NULL)
-    return result;
-
-  if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0
-      && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4
-      && (stats_.system_bytes / kForcedCoalesceInterval
-          != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) {
-    // We're about to grow heap, but there are lots of free pages.
-    // tcmalloc's design decision to keep unmapped and free spans
-    // separately and never coalesce them means that sometimes there
-    // can be free pages span of sufficient size, but it consists of
-    // "segments" of different type so page heap search cannot find
-    // it. In order to prevent growing heap and wasting memory in such
-    // case we're going to unmap all free pages. So that all free
-    // spans are maximally coalesced.
-    //
-    // We're also limiting 'rate' of going into this path to be at
-    // most once per 128 megs of heap growth. Otherwise programs that
-    // grow heap frequently (and that means by small amount) could be
-    // penalized with higher count of minor page faults.
-    //
-    // See also large_heap_fragmentation_unittest.cc and
-    // https://code.google.com/p/gperftools/issues/detail?id=368
-    ReleaseAtLeastNPages(static_cast<Length>(0x7fffffff));
-
-    // then try again. If we are forced to grow heap because of large
-    // spans fragmentation and not because of problem described above,
-    // then at the very least we've just unmapped free but
-    // insufficiently big large spans back to OS. So in case of really
-    // unlucky memory fragmentation we'll be consuming virtual address
-    // space, but not real memory
-    result = SearchFreeAndLargeLists(n);
-    if (result != NULL) return result;
-  }
-
-  // Grow the heap and try again.
-  if (!GrowHeap(n)) {
-    ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
-    ASSERT(Check());
-    // underlying SysAllocator likely set ENOMEM but we can get here
-    // due to EnsureLimit so we set it here too.
-    //
-    // Setting errno to ENOMEM here allows us to avoid dealing with it
-    // in fast-path.
-    errno = ENOMEM;
-    return NULL;
-  }
-  return SearchFreeAndLargeLists(n);
-}
-
-Span* PageHeap::AllocLarge(Length n) {
-  // find the best span (closest to n in size).
-  // The following loops implements address-ordered best-fit.
-  Span *best = NULL;
-
-  // Search through normal list
-  for (Span* span = large_.normal.next;
-       span != &large_.normal;
-       span = span->next) {
-    if (span->length >= n) {
-      if ((best == NULL)
-          || (span->length < best->length)
-          || ((span->length == best->length) && (span->start < best->start))) {
-        best = span;
-        ASSERT(best->location == Span::ON_NORMAL_FREELIST);
-      }
-    }
-  }
-
-  Span *bestNormal = best;
-
-  // Search through released list in case it has a better fit
-  for (Span* span = large_.returned.next;
-       span != &large_.returned;
-       span = span->next) {
-    if (span->length >= n) {
-      if ((best == NULL)
-          || (span->length < best->length)
-          || ((span->length == best->length) && (span->start < best->start))) {
-        best = span;
-        ASSERT(best->location == Span::ON_RETURNED_FREELIST);
-      }
-    }
-  }
-
-  if (best == bestNormal) {
-    return best == NULL ? NULL : Carve(best, n);
-  }
-
-  // best comes from returned list.
-
-  if (EnsureLimit(n, false)) {
-    return Carve(best, n);
-  }
-
-  if (EnsureLimit(n, true)) {
-    // best could have been destroyed by coalescing.
-    // bestNormal is not a best-fit, and it could be destroyed as well.
-    // We retry, the limit is already ensured:
-    return AllocLarge(n);
-  }
-
-  // If bestNormal existed, EnsureLimit would succeeded:
-  ASSERT(bestNormal == NULL);
-  // We are not allowed to take best from returned list.
-  return NULL;
-}
-
-Span* PageHeap::Split(Span* span, Length n) {
-  ASSERT(0 < n);
-  ASSERT(n < span->length);
-  ASSERT(span->location == Span::IN_USE);
-  ASSERT(span->sizeclass == 0);
-  Event(span, 'T', n);
-
-  const int extra = span->length - n;
-  Span* leftover = NewSpan(span->start + n, extra);
-  ASSERT(leftover->location == Span::IN_USE);
-  Event(leftover, 'U', extra);
-  RecordSpan(leftover);
-  pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
-  span->length = n;
-
-  return leftover;
-}
-
-void PageHeap::CommitSpan(Span* span) {
-  TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift),
-                        static_cast<size_t>(span->length << kPageShift));
-  stats_.committed_bytes += span->length << kPageShift;
-}
-
-bool PageHeap::DecommitSpan(Span* span) {
-  bool rv = TCMalloc_SystemRelease(reinterpret_cast<void*>(span->start << kPageShift),
-                                   static_cast<size_t>(span->length << kPageShift));
-  if (rv) {
-    stats_.committed_bytes -= span->length << kPageShift;
-  }
-
-  return rv;
-}
-
-Span* PageHeap::Carve(Span* span, Length n) {
-  ASSERT(n > 0);
-  ASSERT(span->location != Span::IN_USE);
-  const int old_location = span->location;
-  RemoveFromFreeList(span);
-  span->location = Span::IN_USE;
-  Event(span, 'A', n);
-
-  const int extra = span->length - n;
-  ASSERT(extra >= 0);
-  if (extra > 0) {
-    Span* leftover = NewSpan(span->start + n, extra);
-    leftover->location = old_location;
-    Event(leftover, 'S', extra);
-    RecordSpan(leftover);
-
-    // The previous span of |leftover| was just splitted -- no need to
-    // coalesce them. The next span of |leftover| was not previously coalesced
-    // with |span|, i.e. is NULL or has got location other than |old_location|.
-#ifndef NDEBUG
-    const PageID p = leftover->start;
-    const Length len = leftover->length;
-    Span* next = GetDescriptor(p+len);
-    ASSERT (next == NULL ||
-            next->location == Span::IN_USE ||
-            next->location != leftover->location);
-#endif
-
-    PrependToFreeList(leftover);  // Skip coalescing - no candidates possible
-    span->length = n;
-    pagemap_.set(span->start + n - 1, span);
-  }
-  ASSERT(Check());
-  if (old_location == Span::ON_RETURNED_FREELIST) {
-    // We need to recommit this address space.
-    CommitSpan(span);
-  }
-  ASSERT(span->location == Span::IN_USE);
-  ASSERT(span->length == n);
-  ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
-  return span;
-}
-
-void PageHeap::Delete(Span* span) {
-  ASSERT(Check());
-  ASSERT(span->location == Span::IN_USE);
-  ASSERT(span->length > 0);
-  ASSERT(GetDescriptor(span->start) == span);
-  ASSERT(GetDescriptor(span->start + span->length - 1) == span);
-  const Length n = span->length;
-  span->sizeclass = 0;
-  span->sample = 0;
-  span->location = Span::ON_NORMAL_FREELIST;
-  Event(span, 'D', span->length);
-  MergeIntoFreeList(span);  // Coalesces if possible
-  IncrementalScavenge(n);
-  ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
-  ASSERT(Check());
-}
-
-bool PageHeap::MayMergeSpans(Span *span, Span *other) {
-  if (aggressive_decommit_) {
-    return other->location != Span::IN_USE;
-  }
-  return span->location == other->location;
-}
-
-void PageHeap::MergeIntoFreeList(Span* span) {
-  ASSERT(span->location != Span::IN_USE);
-
-  // Coalesce -- we guarantee that "p" != 0, so no bounds checking
-  // necessary.  We do not bother resetting the stale pagemap
-  // entries for the pieces we are merging together because we only
-  // care about the pagemap entries for the boundaries.
-  //
-  // Note: depending on aggressive_decommit_ mode we allow only
-  // similar spans to be coalesced.
-  //
-  // The following applies if aggressive_decommit_ is enabled:
-  //
-  // Note that the adjacent spans we merge into "span" may come out of a
-  // "normal" (committed) list, and cleanly merge with our IN_USE span, which
-  // is implicitly committed.  If the adjacents spans are on the "returned"
-  // (decommitted) list, then we must get both spans into the same state before
-  // or after we coalesce them.  The current code always decomits. This is
-  // achieved by blindly decommitting the entire coalesced region, which  may
-  // include any combination of committed and decommitted spans, at the end of
-  // the method.
-
-  // TODO(jar): "Always decommit" causes some extra calls to commit when we are
-  // called in GrowHeap() during an allocation :-/.  We need to eval the cost of
-  // that oscillation, and possibly do something to reduce it.
-
-  // TODO(jar): We need a better strategy for deciding to commit, or decommit,
-  // based on memory usage and free heap sizes.
-
-  uint64_t temp_committed = 0;
-
-  const PageID p = span->start;
-  const Length n = span->length;
-  Span* prev = GetDescriptor(p-1);
-  if (prev != NULL && MayMergeSpans(span, prev)) {
-    // Merge preceding span into this span
-    ASSERT(prev->start + prev->length == p);
-    const Length len = prev->length;
-    if (aggressive_decommit_ && prev->location == Span::ON_RETURNED_FREELIST) {
-      // We're about to put the merge span into the returned freelist and call
-      // DecommitSpan() on it, which will mark the entire span including this
-      // one as released and decrease stats_.committed_bytes by the size of the
-      // merged span.  To make the math work out we temporarily increase the
-      // stats_.committed_bytes amount.
-      temp_committed = prev->length << kPageShift;
-    }
-    RemoveFromFreeList(prev);
-    DeleteSpan(prev);
-    span->start -= len;
-    span->length += len;
-    pagemap_.set(span->start, span);
-    Event(span, 'L', len);
-  }
-  Span* next = GetDescriptor(p+n);
-  if (next != NULL && MayMergeSpans(span, next)) {
-    // Merge next span into this span
-    ASSERT(next->start == p+n);
-    const Length len = next->length;
-    if (aggressive_decommit_ && next->location == Span::ON_RETURNED_FREELIST) {
-      // See the comment below 'if (prev->location ...' for explanation.
-      temp_committed += next->length << kPageShift;
-    }
-    RemoveFromFreeList(next);
-    DeleteSpan(next);
-    span->length += len;
-    pagemap_.set(span->start + span->length - 1, span);
-    Event(span, 'R', len);
-  }
-
-  if (aggressive_decommit_) {
-    if (DecommitSpan(span)) {
-      span->location = Span::ON_RETURNED_FREELIST;
-      stats_.committed_bytes += temp_committed;
-    } else {
-      ASSERT(temp_committed == 0);
-    }
-  }
-  PrependToFreeList(span);
-}
-
-void PageHeap::PrependToFreeList(Span* span) {
-  ASSERT(span->location != Span::IN_USE);
-  SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_;
-  if (span->location == Span::ON_NORMAL_FREELIST) {
-    stats_.free_bytes += (span->length << kPageShift);
-    DLL_Prepend(&list->normal, span);
-  } else {
-    stats_.unmapped_bytes += (span->length << kPageShift);
-    DLL_Prepend(&list->returned, span);
-  }
-}
-
-void PageHeap::RemoveFromFreeList(Span* span) {
-  ASSERT(span->location != Span::IN_USE);
-  if (span->location == Span::ON_NORMAL_FREELIST) {
-    stats_.free_bytes -= (span->length << kPageShift);
-  } else {
-    stats_.unmapped_bytes -= (span->length << kPageShift);
-  }
-  DLL_Remove(span);
-}
-
-void PageHeap::IncrementalScavenge(Length n) {
-  // Fast path; not yet time to release memory
-  scavenge_counter_ -= n;
-  if (scavenge_counter_ >= 0) return;  // Not yet time to scavenge
-
-  const double rate = FLAGS_tcmalloc_release_rate;
-  if (rate <= 1e-6) {
-    // Tiny release rate means that releasing is disabled.
-    scavenge_counter_ = kDefaultReleaseDelay;
-    return;
-  }
-
-  Length released_pages = ReleaseAtLeastNPages(1);
-
-  if (released_pages == 0) {
-    // Nothing to scavenge, delay for a while.
-    scavenge_counter_ = kDefaultReleaseDelay;
-  } else {
-    // Compute how long to wait until we return memory.
-    // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages
-    // after releasing one page.
-    const double mult = 1000.0 / rate;
-    double wait = mult * static_cast<double>(released_pages);
-    if (wait > kMaxReleaseDelay) {
-      // Avoid overflow and bound to reasonable range.
-      wait = kMaxReleaseDelay;
-    }
-    scavenge_counter_ = static_cast<int64_t>(wait);
-  }
-}
-
-Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) {
-  Span* s = slist->normal.prev;
-  ASSERT(s->location == Span::ON_NORMAL_FREELIST);
-
-  if (DecommitSpan(s)) {
-    RemoveFromFreeList(s);
-    const Length n = s->length;
-    s->location = Span::ON_RETURNED_FREELIST;
-    MergeIntoFreeList(s);  // Coalesces if possible.
-    return n;
-  }
-
-  return 0;
-}
-
-Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
-  Length released_pages = 0;
-
-  // Round robin through the lists of free spans, releasing the last
-  // span in each list.  Stop after releasing at least num_pages
-  // or when there is nothing more to release.
-  while (released_pages < num_pages && stats_.free_bytes > 0) {
-    for (int i = 0; i < kMaxPages+1 && released_pages < num_pages;
-         i++, release_index_++) {
-      if (release_index_ > kMaxPages) release_index_ = 0;
-      SpanList* slist = (release_index_ == kMaxPages) ?
-          &large_ : &free_[release_index_];
-      if (!DLL_IsEmpty(&slist->normal)) {
-        Length released_len = ReleaseLastNormalSpan(slist);
-        // Some systems do not support release
-        if (released_len == 0) return released_pages;
-        released_pages += released_len;
-      }
-    }
-  }
-  return released_pages;
-}
-
-bool PageHeap::EnsureLimit(Length n, bool withRelease)
-{
-  Length limit = (FLAGS_tcmalloc_heap_limit_mb*1024*1024) >> kPageShift;
-  if (limit == 0) return true; //there is no limit
-
-  // We do not use stats_.system_bytes because it does not take
-  // MetaDataAllocs into account.
-  Length takenPages = TCMalloc_SystemTaken >> kPageShift;
-  //XXX takenPages may be slightly bigger than limit for two reasons:
-  //* MetaDataAllocs ignore the limit (it is not easy to handle
-  //  out of memory there)
-  //* sys_alloc may round allocation up to huge page size,
-  //  although smaller limit was ensured
-
-  ASSERT(takenPages >= stats_.unmapped_bytes >> kPageShift);
-  takenPages -= stats_.unmapped_bytes >> kPageShift;
-
-  if (takenPages + n > limit && withRelease) {
-    takenPages -= ReleaseAtLeastNPages(takenPages + n - limit);
-  }
-
-  return takenPages + n <= limit;
-}
-
-void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
-  // Associate span object with all interior pages as well
-  ASSERT(span->location == Span::IN_USE);
-  ASSERT(GetDescriptor(span->start) == span);
-  ASSERT(GetDescriptor(span->start+span->length-1) == span);
-  Event(span, 'C', sc);
-  span->sizeclass = sc;
-  for (Length i = 1; i < span->length-1; i++) {
-    pagemap_.set(span->start+i, span);
-  }
-}
-
-void PageHeap::GetSmallSpanStats(SmallSpanStats* result) {
-  for (int s = 0; s < kMaxPages; s++) {
-    result->normal_length[s] = DLL_Length(&free_[s].normal);
-    result->returned_length[s] = DLL_Length(&free_[s].returned);
-  }
-}
-
-void PageHeap::GetLargeSpanStats(LargeSpanStats* result) {
-  result->spans = 0;
-  result->normal_pages = 0;
-  result->returned_pages = 0;
-  for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
-    result->normal_pages += s->length;;
-    result->spans++;
-  }
-  for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
-    result->returned_pages += s->length;
-    result->spans++;
-  }
-}
-
-bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) {
-  Span* span = reinterpret_cast<Span*>(pagemap_.Next(start));
-  if (span == NULL) {
-    return false;
-  }
-  r->address = span->start << kPageShift;
-  r->length = span->length << kPageShift;
-  r->fraction = 0;
-  switch (span->location) {
-    case Span::IN_USE:
-      r->type = base::MallocRange::INUSE;
-      r->fraction = 1;
-      if (span->sizeclass > 0) {
-        // Only some of the objects in this span may be in use.
-        const size_t osize = Static::sizemap()->class_to_size(span->sizeclass);
-        r->fraction = (1.0 * osize * span->refcount) / r->length;
-      }
-      break;
-    case Span::ON_NORMAL_FREELIST:
-      r->type = base::MallocRange::FREE;
-      break;
-    case Span::ON_RETURNED_FREELIST:
-      r->type = base::MallocRange::UNMAPPED;
-      break;
-    default:
-      r->type = base::MallocRange::UNKNOWN;
-      break;
-  }
-  return true;
-}
-
-static void RecordGrowth(size_t growth) {
-  StackTrace* t = Static::stacktrace_allocator()->New();
-  t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3);
-  t->size = growth;
-  t->stack[kMaxStackDepth-1] = reinterpret_cast<void*>(Static::growth_stacks());
-  Static::set_growth_stacks(t);
-}
-
-bool PageHeap::GrowHeap(Length n) {
-  ASSERT(kMaxPages >= kMinSystemAlloc);
-  if (n > kMaxValidPages) return false;
-  Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
-  size_t actual_size;
-  void* ptr = NULL;
-  if (EnsureLimit(ask)) {
-      ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
-  }
-  if (ptr == NULL) {
-    if (n < ask) {
-      // Try growing just "n" pages
-      ask = n;
-      if (EnsureLimit(ask)) {
-        ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
-      }
-    }
-    if (ptr == NULL) return false;
-  }
-  ask = actual_size >> kPageShift;
-  RecordGrowth(ask << kPageShift);
-
-  uint64_t old_system_bytes = stats_.system_bytes;
-  stats_.system_bytes += (ask << kPageShift);
-  stats_.committed_bytes += (ask << kPageShift);
-  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  ASSERT(p > 0);
-
-  // If we have already a lot of pages allocated, just pre allocate a bunch of
-  // memory for the page map. This prevents fragmentation by pagemap metadata
-  // when a program keeps allocating and freeing large blocks.
-
-  if (old_system_bytes < kPageMapBigAllocationThreshold
-      && stats_.system_bytes >= kPageMapBigAllocationThreshold) {
-    pagemap_.PreallocateMoreMemory();
-  }
-
-  // Make sure pagemap_ has entries for all of the new pages.
-  // Plus ensure one before and one after so coalescing code
-  // does not need bounds-checking.
-  if (pagemap_.Ensure(p-1, ask+2)) {
-    // Pretend the new area is allocated and then Delete() it to cause
-    // any necessary coalescing to occur.
-    Span* span = NewSpan(p, ask);
-    RecordSpan(span);
-    Delete(span);
-    ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
-    ASSERT(Check());
-    return true;
-  } else {
-    // We could not allocate memory within "pagemap_"
-    // TODO: Once we can return memory to the system, return the new span
-    return false;
-  }
-}
-
-bool PageHeap::Check() {
-  ASSERT(free_[0].normal.next == &free_[0].normal);
-  ASSERT(free_[0].returned.next == &free_[0].returned);
-  return true;
-}
-
-bool PageHeap::CheckExpensive() {
-  bool result = Check();
-  CheckList(&large_.normal, kMaxPages, 1000000000, Span::ON_NORMAL_FREELIST);
-  CheckList(&large_.returned, kMaxPages, 1000000000, Span::ON_RETURNED_FREELIST);
-  for (Length s = 1; s < kMaxPages; s++) {
-    CheckList(&free_[s].normal, s, s, Span::ON_NORMAL_FREELIST);
-    CheckList(&free_[s].returned, s, s, Span::ON_RETURNED_FREELIST);
-  }
-  return result;
-}
-
-bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages,
-                         int freelist) {
-  for (Span* s = list->next; s != list; s = s->next) {
-    CHECK_CONDITION(s->location == freelist);  // NORMAL or RETURNED
-    CHECK_CONDITION(s->length >= min_pages);
-    CHECK_CONDITION(s->length <= max_pages);
-    CHECK_CONDITION(GetDescriptor(s->start) == s);
-    CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
-  }
-  return true;
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/page_heap.h b/contrib/libtcmalloc/src/page_heap.h
deleted file mode 100644
index 89fab81da69..00000000000
--- a/contrib/libtcmalloc/src/page_heap.h
+++ /dev/null
@@ -1,316 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#ifndef TCMALLOC_PAGE_HEAP_H_
-#define TCMALLOC_PAGE_HEAP_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uint64_t, int64_t, uint16_t
-#endif
-#include <gperftools/malloc_extension.h>
-#include "base/basictypes.h"
-#include "common.h"
-#include "packed-cache-inl.h"
-#include "pagemap.h"
-#include "span.h"
-
-// We need to dllexport PageHeap just for the unittest.  MSVC complains
-// that we don't dllexport the PageHeap members, but we don't need to
-// test those, so I just suppress this warning.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable:4251)
-#endif
-
-// This #ifdef should almost never be set.  Set NO_TCMALLOC_SAMPLES if
-// you're porting to a system where you really can't get a stacktrace.
-// Because we control the definition of GetStackTrace, all clients of
-// GetStackTrace should #include us rather than stacktrace.h.
-#ifdef NO_TCMALLOC_SAMPLES
-  // We use #define so code compiles even if you #include stacktrace.h somehow.
-# define GetStackTrace(stack, depth, skip)  (0)
-#else
-# include <gperftools/stacktrace.h>
-#endif
-
-namespace base {
-struct MallocRange;
-}
-
-namespace tcmalloc {
-
-// -------------------------------------------------------------------------
-// Map from page-id to per-page data
-// -------------------------------------------------------------------------
-
-// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
-// We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
-// because sometimes the sizeclass is all the information we need.
-
-// Selector class -- general selector uses 3-level map
-template <int BITS> class MapSelector {
- public:
-  typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
-  typedef PackedCache<BITS-kPageShift, uint64_t> CacheType;
-};
-
-// A two-level map for 32-bit machines
-template <> class MapSelector<32> {
- public:
-  typedef TCMalloc_PageMap2<32-kPageShift> Type;
-  typedef PackedCache<32-kPageShift, uint16_t> CacheType;
-};
-
-// -------------------------------------------------------------------------
-// Page-level allocator
-//  * Eager coalescing
-//
-// Heap for page-level allocation.  We allow allocating and freeing a
-// contiguous runs of pages (called a "span").
-// -------------------------------------------------------------------------
-
-class PERFTOOLS_DLL_DECL PageHeap {
- public:
-  PageHeap();
-
-  // Allocate a run of "n" pages.  Returns zero if out of memory.
-  // Caller should not pass "n == 0" -- instead, n should have
-  // been rounded up already.
-  Span* New(Length n);
-
-  // Delete the span "[p, p+n-1]".
-  // REQUIRES: span was returned by earlier call to New() and
-  //           has not yet been deleted.
-  void Delete(Span* span);
-
-  // Mark an allocated span as being used for small objects of the
-  // specified size-class.
-  // REQUIRES: span was returned by an earlier call to New()
-  //           and has not yet been deleted.
-  void RegisterSizeClass(Span* span, size_t sc);
-
-  // Split an allocated span into two spans: one of length "n" pages
-  // followed by another span of length "span->length - n" pages.
-  // Modifies "*span" to point to the first span of length "n" pages.
-  // Returns a pointer to the second span.
-  //
-  // REQUIRES: "0 < n < span->length"
-  // REQUIRES: span->location == IN_USE
-  // REQUIRES: span->sizeclass == 0
-  Span* Split(Span* span, Length n);
-
-  // Return the descriptor for the specified page.  Returns NULL if
-  // this PageID was not allocated previously.
-  inline Span* GetDescriptor(PageID p) const {
-    return reinterpret_cast<Span*>(pagemap_.get(p));
-  }
-
-  // If this page heap is managing a range with starting page # >= start,
-  // store info about the range in *r and return true.  Else return false.
-  bool GetNextRange(PageID start, base::MallocRange* r);
-
-  // Page heap statistics
-  struct Stats {
-    Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0), committed_bytes(0) {}
-    uint64_t system_bytes;    // Total bytes allocated from system
-    uint64_t free_bytes;      // Total bytes on normal freelists
-    uint64_t unmapped_bytes;  // Total bytes on returned freelists
-    uint64_t committed_bytes;  // Bytes committed, always <= system_bytes_.
-
-  };
-  inline Stats stats() const { return stats_; }
-
-  struct SmallSpanStats {
-    // For each free list of small spans, the length (in spans) of the
-    // normal and returned free lists for that size.
-    int64 normal_length[kMaxPages];
-    int64 returned_length[kMaxPages];
-  };
-  void GetSmallSpanStats(SmallSpanStats* result);
-
-  // Stats for free large spans (i.e., spans with more than kMaxPages pages).
-  struct LargeSpanStats {
-    int64 spans;           // Number of such spans
-    int64 normal_pages;    // Combined page length of normal large spans
-    int64 returned_pages;  // Combined page length of unmapped spans
-  };
-  void GetLargeSpanStats(LargeSpanStats* result);
-
-  bool Check();
-  // Like Check() but does some more comprehensive checking.
-  bool CheckExpensive();
-  bool CheckList(Span* list, Length min_pages, Length max_pages,
-                 int freelist);  // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST
-
-  // Try to release at least num_pages for reuse by the OS.  Returns
-  // the actual number of pages released, which may be less than
-  // num_pages if there weren't enough pages to release. The result
-  // may also be larger than num_pages since page_heap might decide to
-  // release one large range instead of fragmenting it into two
-  // smaller released and unreleased ranges.
-  Length ReleaseAtLeastNPages(Length num_pages);
-
-  // Return 0 if we have no information, or else the correct sizeclass for p.
-  // Reads and writes to pagemap_cache_ do not require locking.
-  // The entries are 64 bits on 64-bit hardware and 16 bits on
-  // 32-bit hardware, and we don't mind raciness as long as each read of
-  // an entry yields a valid entry, not a partially updated entry.
-  size_t GetSizeClassIfCached(PageID p) const {
-    return pagemap_cache_.GetOrDefault(p, 0);
-  }
-  void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
-
-  bool GetAggressiveDecommit(void) {return aggressive_decommit_;}
-  void SetAggressiveDecommit(bool aggressive_decommit) {
-    aggressive_decommit_ = aggressive_decommit;
-  }
-
- private:
-  // Allocates a big block of memory for the pagemap once we reach more than
-  // 128MB
-  static const size_t kPageMapBigAllocationThreshold = 128 << 20;
-
-  // Minimum number of pages to fetch from system at a time.  Must be
-  // significantly bigger than kBlockSize to amortize system-call
-  // overhead, and also to reduce external fragementation.  Also, we
-  // should keep this value big because various incarnations of Linux
-  // have small limits on the number of mmap() regions per
-  // address-space.
-  // REQUIRED: kMinSystemAlloc <= kMaxPages;
-  static const int kMinSystemAlloc = kMaxPages;
-
-  // Never delay scavenging for more than the following number of
-  // deallocated pages.  With 4K pages, this comes to 4GB of
-  // deallocation.
-  static const int kMaxReleaseDelay = 1 << 20;
-
-  // If there is nothing to release, wait for so many pages before
-  // scavenging again.  With 4K pages, this comes to 1GB of memory.
-  static const int kDefaultReleaseDelay = 1 << 18;
-
-  // Pick the appropriate map and cache types based on pointer size
-  typedef MapSelector<kAddressBits>::Type PageMap;
-  typedef MapSelector<kAddressBits>::CacheType PageMapCache;
-  PageMap pagemap_;
-  mutable PageMapCache pagemap_cache_;
-
-  // We segregate spans of a given size into two circular linked
-  // lists: one for normal spans, and one for spans whose memory
-  // has been returned to the system.
-  struct SpanList {
-    Span        normal;
-    Span        returned;
-  };
-
-  // List of free spans of length >= kMaxPages
-  SpanList large_;
-
-  // Array mapping from span length to a doubly linked list of free spans
-  SpanList free_[kMaxPages];
-
-  // Statistics on system, free, and unmapped bytes
-  Stats stats_;
-
-  Span* SearchFreeAndLargeLists(Length n);
-
-  bool GrowHeap(Length n);
-
-  // REQUIRES: span->length >= n
-  // REQUIRES: span->location != IN_USE
-  // Remove span from its free list, and move any leftover part of
-  // span into appropriate free lists.  Also update "span" to have
-  // length exactly "n" and mark it as non-free so it can be returned
-  // to the client.  After all that, decrease free_pages_ by n and
-  // return span.
-  Span* Carve(Span* span, Length n);
-
-  void RecordSpan(Span* span) {
-    pagemap_.set(span->start, span);
-    if (span->length > 1) {
-      pagemap_.set(span->start + span->length - 1, span);
-    }
-  }
-
-  // Allocate a large span of length == n.  If successful, returns a
-  // span of exactly the specified length.  Else, returns NULL.
-  Span* AllocLarge(Length n);
-
-  // Coalesce span with neighboring spans if possible, prepend to
-  // appropriate free list, and adjust stats.
-  void MergeIntoFreeList(Span* span);
-
-  // Commit the span.
-  void CommitSpan(Span* span);
-
-  // Decommit the span.
-  bool DecommitSpan(Span* span);
-
-  // Prepends span to appropriate free list, and adjusts stats.
-  void PrependToFreeList(Span* span);
-
-  // Removes span from its free list, and adjust stats.
-  void RemoveFromFreeList(Span* span);
-
-  // Incrementally release some memory to the system.
-  // IncrementalScavenge(n) is called whenever n pages are freed.
-  void IncrementalScavenge(Length n);
-
-  // Release the last span on the normal portion of this list.
-  // Return the length of that span or zero if release failed.
-  Length ReleaseLastNormalSpan(SpanList* slist);
-
-  // Checks if we are allowed to take more memory from the system.
-  // If limit is reached and allowRelease is true, tries to release
-  // some unused spans.
-  bool EnsureLimit(Length n, bool allowRelease = true);
-
-  bool MayMergeSpans(Span *span, Span *other);
-
-  // Number of pages to deallocate before doing more scavenging
-  int64_t scavenge_counter_;
-
-  // Index of last free list where we released memory to the OS.
-  int release_index_;
-
-  bool aggressive_decommit_;
-};
-
-}  // namespace tcmalloc
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif  // TCMALLOC_PAGE_HEAP_H_
diff --git a/contrib/libtcmalloc/src/page_heap_allocator.h b/contrib/libtcmalloc/src/page_heap_allocator.h
deleted file mode 100644
index 892d1c1abe3..00000000000
--- a/contrib/libtcmalloc/src/page_heap_allocator.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
-#define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
-
-#include <stddef.h>                     // for NULL, size_t
-
-#include "common.h"            // for MetaDataAlloc
-#include "internal_logging.h"  // for ASSERT
-
-namespace tcmalloc {
-
-// Simple allocator for objects of a specified type.  External locking
-// is required before accessing one of these objects.
-template <class T>
-class PageHeapAllocator {
- public:
-  // We use an explicit Init function because these variables are statically
-  // allocated and their constructors might not have run by the time some
-  // other static variable tries to allocate memory.
-  void Init() {
-    ASSERT(sizeof(T) <= kAllocIncrement);
-    inuse_ = 0;
-    free_area_ = NULL;
-    free_avail_ = 0;
-    free_list_ = NULL;
-    // Reserve some space at the beginning to avoid fragmentation.
-    Delete(New());
-  }
-
-  T* New() {
-    // Consult free list
-    void* result;
-    if (free_list_ != NULL) {
-      result = free_list_;
-      free_list_ = *(reinterpret_cast<void**>(result));
-    } else {
-      if (free_avail_ < sizeof(T)) {
-        // Need more room. We assume that MetaDataAlloc returns
-        // suitably aligned memory.
-        free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
-        if (free_area_ == NULL) {
-          Log(kCrash, __FILE__, __LINE__,
-              "FATAL ERROR: Out of memory trying to allocate internal "
-              "tcmalloc data (bytes, object-size)",
-              kAllocIncrement, sizeof(T));
-        }
-        free_avail_ = kAllocIncrement;
-      }
-      result = free_area_;
-      free_area_ += sizeof(T);
-      free_avail_ -= sizeof(T);
-    }
-    inuse_++;
-    return reinterpret_cast<T*>(result);
-  }
-
-  void Delete(T* p) {
-    *(reinterpret_cast<void**>(p)) = free_list_;
-    free_list_ = p;
-    inuse_--;
-  }
-
-  int inuse() const { return inuse_; }
-
- private:
-  // How much to allocate from system at a time
-  static const int kAllocIncrement = 128 << 10;
-
-  // Free area from which to carve new objects
-  char* free_area_;
-  size_t free_avail_;
-
-  // Free list of already carved objects
-  void* free_list_;
-
-  // Number of allocated but unfreed objects
-  int inuse_;
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
diff --git a/contrib/libtcmalloc/src/pagemap.h b/contrib/libtcmalloc/src/pagemap.h
deleted file mode 100644
index dd9442313af..00000000000
--- a/contrib/libtcmalloc/src/pagemap.h
+++ /dev/null
@@ -1,324 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// A data structure used by the caching malloc.  It maps from page# to
-// a pointer that contains info about that page.  We use two
-// representations: one for 32-bit addresses, and another for 64 bit
-// addresses.  Both representations provide the same interface.  The
-// first representation is implemented as a flat array, the seconds as
-// a three-level radix tree that strips away approximately 1/3rd of
-// the bits every time.
-//
-// The BITS parameter should be the number of bits required to hold
-// a page number.  E.g., with 32 bit pointers and 4K pages (i.e.,
-// page offset fits in lower 12 bits), BITS == 20.
-
-#ifndef TCMALLOC_PAGEMAP_H_
-#define TCMALLOC_PAGEMAP_H_
-
-#include "config.h"
-
-#include <stddef.h>                     // for NULL, size_t
-#include <string.h>                     // for memset
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-#include "internal_logging.h"  // for ASSERT
-
-// Single-level array
-template <int BITS>
-class TCMalloc_PageMap1 {
- private:
-  static const int LENGTH = 1 << BITS;
-
-  void** array_;
-
- public:
-  typedef uintptr_t Number;
-
-  explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) {
-    array_ = reinterpret_cast<void**>((*allocator)(sizeof(void*) << BITS));
-    memset(array_, 0, sizeof(void*) << BITS);
-  }
-
-  // Ensure that the map contains initialized entries "x .. x+n-1".
-  // Returns true if successful, false if we could not allocate memory.
-  bool Ensure(Number x, size_t n) {
-    // Nothing to do since flat array was allocated at start.  All
-    // that's left is to check for overflow (that is, we don't want to
-    // ensure a number y where array_[y] would be an out-of-bounds
-    // access).
-    return n <= LENGTH - x;   // an overflow-free way to do "x + n <= LENGTH"
-  }
-
-  void PreallocateMoreMemory() {}
-
-  // Return the current value for KEY.  Returns NULL if not yet set,
-  // or if k is out of range.
-  void* get(Number k) const {
-    if ((k >> BITS) > 0) {
-      return NULL;
-    }
-    return array_[k];
-  }
-
-  // REQUIRES "k" is in range "[0,2^BITS-1]".
-  // REQUIRES "k" has been ensured before.
-  //
-  // Sets the value 'v' for key 'k'.
-  void set(Number k, void* v) {
-    array_[k] = v;
-  }
-
-  // Return the first non-NULL pointer found in this map for
-  // a page number >= k.  Returns NULL if no such number is found.
-  void* Next(Number k) const {
-    while (k < (1 << BITS)) {
-      if (array_[k] != NULL) return array_[k];
-      k++;
-    }
-    return NULL;
-  }
-};
-
-// Two-level radix tree
-template <int BITS>
-class TCMalloc_PageMap2 {
- private:
-  // Put 32 entries in the root and (2^BITS)/32 entries in each leaf.
-  static const int ROOT_BITS = 5;
-  static const int ROOT_LENGTH = 1 << ROOT_BITS;
-
-  static const int LEAF_BITS = BITS - ROOT_BITS;
-  static const int LEAF_LENGTH = 1 << LEAF_BITS;
-
-  // Leaf node
-  struct Leaf {
-    void* values[LEAF_LENGTH];
-  };
-
-  Leaf* root_[ROOT_LENGTH];             // Pointers to 32 child nodes
-  void* (*allocator_)(size_t);          // Memory allocator
-
- public:
-  typedef uintptr_t Number;
-
-  explicit TCMalloc_PageMap2(void* (*allocator)(size_t)) {
-    allocator_ = allocator;
-    memset(root_, 0, sizeof(root_));
-  }
-
-  void* get(Number k) const {
-    const Number i1 = k >> LEAF_BITS;
-    const Number i2 = k & (LEAF_LENGTH-1);
-    if ((k >> BITS) > 0 || root_[i1] == NULL) {
-      return NULL;
-    }
-    return root_[i1]->values[i2];
-  }
-
-  void set(Number k, void* v) {
-    const Number i1 = k >> LEAF_BITS;
-    const Number i2 = k & (LEAF_LENGTH-1);
-    ASSERT(i1 < ROOT_LENGTH);
-    root_[i1]->values[i2] = v;
-  }
-
-  bool Ensure(Number start, size_t n) {
-    for (Number key = start; key <= start + n - 1; ) {
-      const Number i1 = key >> LEAF_BITS;
-
-      // Check for overflow
-      if (i1 >= ROOT_LENGTH)
-        return false;
-
-      // Make 2nd level node if necessary
-      if (root_[i1] == NULL) {
-        Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf)));
-        if (leaf == NULL) return false;
-        memset(leaf, 0, sizeof(*leaf));
-        root_[i1] = leaf;
-      }
-
-      // Advance key past whatever is covered by this leaf node
-      key = ((key >> LEAF_BITS) + 1) << LEAF_BITS;
-    }
-    return true;
-  }
-
-  void PreallocateMoreMemory() {
-    // Allocate enough to keep track of all possible pages
-    Ensure(0, 1 << BITS);
-  }
-
-  void* Next(Number k) const {
-    while (k < (1 << BITS)) {
-      const Number i1 = k >> LEAF_BITS;
-      Leaf* leaf = root_[i1];
-      if (leaf != NULL) {
-        // Scan forward in leaf
-        for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) {
-          if (leaf->values[i2] != NULL) {
-            return leaf->values[i2];
-          }
-        }
-      }
-      // Skip to next top-level entry
-      k = (i1 + 1) << LEAF_BITS;
-    }
-    return NULL;
-  }
-};
-
-// Three-level radix tree
-template <int BITS>
-class TCMalloc_PageMap3 {
- private:
-  // How many bits should we consume at each interior level
-  static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up
-  static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS;
-
-  // How many bits should we consume at leaf level
-  static const int LEAF_BITS = BITS - 2*INTERIOR_BITS;
-  static const int LEAF_LENGTH = 1 << LEAF_BITS;
-
-  // Interior node
-  struct Node {
-    Node* ptrs[INTERIOR_LENGTH];
-  };
-
-  // Leaf node
-  struct Leaf {
-    void* values[LEAF_LENGTH];
-  };
-
-  Node* root_;                          // Root of radix tree
-  void* (*allocator_)(size_t);          // Memory allocator
-
-  Node* NewNode() {
-    Node* result = reinterpret_cast<Node*>((*allocator_)(sizeof(Node)));
-    if (result != NULL) {
-      memset(result, 0, sizeof(*result));
-    }
-    return result;
-  }
-
- public:
-  typedef uintptr_t Number;
-
-  explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) {
-    allocator_ = allocator;
-    root_ = NewNode();
-  }
-
-  void* get(Number k) const {
-    const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
-    const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
-    const Number i3 = k & (LEAF_LENGTH-1);
-    if ((k >> BITS) > 0 ||
-        root_->ptrs[i1] == NULL || root_->ptrs[i1]->ptrs[i2] == NULL) {
-      return NULL;
-    }
-    return reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3];
-  }
-
-  void set(Number k, void* v) {
-    ASSERT(k >> BITS == 0);
-    const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
-    const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
-    const Number i3 = k & (LEAF_LENGTH-1);
-    reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3] = v;
-  }
-
-  bool Ensure(Number start, size_t n) {
-    for (Number key = start; key <= start + n - 1; ) {
-      const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS);
-      const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH-1);
-
-      // Check for overflow
-      if (i1 >= INTERIOR_LENGTH || i2 >= INTERIOR_LENGTH)
-        return false;
-
-      // Make 2nd level node if necessary
-      if (root_->ptrs[i1] == NULL) {
-        Node* n = NewNode();
-        if (n == NULL) return false;
-        root_->ptrs[i1] = n;
-      }
-
-      // Make leaf node if necessary
-      if (root_->ptrs[i1]->ptrs[i2] == NULL) {
-        Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf)));
-        if (leaf == NULL) return false;
-        memset(leaf, 0, sizeof(*leaf));
-        root_->ptrs[i1]->ptrs[i2] = reinterpret_cast<Node*>(leaf);
-      }
-
-      // Advance key past whatever is covered by this leaf node
-      key = ((key >> LEAF_BITS) + 1) << LEAF_BITS;
-    }
-    return true;
-  }
-
-  void PreallocateMoreMemory() {
-  }
-
-  void* Next(Number k) const {
-    while (k < (Number(1) << BITS)) {
-      const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
-      const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
-      if (root_->ptrs[i1] == NULL) {
-        // Advance to next top-level entry
-        k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS);
-      } else {
-        Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]);
-        if (leaf != NULL) {
-          for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) {
-            if (leaf->values[i3] != NULL) {
-              return leaf->values[i3];
-            }
-          }
-        }
-        // Advance to next interior entry
-        k = ((k >> LEAF_BITS) + 1) << LEAF_BITS;
-      }
-    }
-    return NULL;
-  }
-};
-
-#endif  // TCMALLOC_PAGEMAP_H_
diff --git a/contrib/libtcmalloc/src/raw_printer.cc b/contrib/libtcmalloc/src/raw_printer.cc
deleted file mode 100644
index 3cf028eeae0..00000000000
--- a/contrib/libtcmalloc/src/raw_printer.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: sanjay@google.com (Sanjay Ghemawat)
-
-#include <config.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include "raw_printer.h"
-#include "base/logging.h"
-
-namespace base {
-
-RawPrinter::RawPrinter(char* buf, int length)
-    : base_(buf),
-      ptr_(buf),
-      limit_(buf + length - 1) {
-  RAW_DCHECK(length > 0, "");
-  *ptr_ = '\0';
-  *limit_ = '\0';
-}
-
-void RawPrinter::Printf(const char* format, ...) {
-  if (limit_ > ptr_) {
-    va_list ap;
-    va_start(ap, format);
-    int avail = limit_ - ptr_;
-    // We pass avail+1 to vsnprintf() since that routine needs room
-    // to store the trailing \0.
-    const int r = perftools_vsnprintf(ptr_, avail+1, format, ap);
-    va_end(ap);
-    if (r < 0) {
-      // Perhaps an old glibc that returns -1 on truncation?
-      ptr_ = limit_;
-    } else if (r > avail) {
-      // Truncation
-      ptr_ = limit_;
-    } else {
-      ptr_ += r;
-    }
-  }
-}
-
-}
diff --git a/contrib/libtcmalloc/src/raw_printer.h b/contrib/libtcmalloc/src/raw_printer.h
deleted file mode 100644
index 9288bb5eeaa..00000000000
--- a/contrib/libtcmalloc/src/raw_printer.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// A printf() wrapper that writes into a fixed length buffer.
-// Useful in low-level code that does not want to use allocating
-// routines like StringPrintf().
-//
-// The implementation currently uses vsnprintf().  This seems to
-// be fine for use in many low-level contexts, but we may need to
-// rethink this decision if we hit a problem with it calling
-// down into malloc() etc.
-
-#ifndef BASE_RAW_PRINTER_H_
-#define BASE_RAW_PRINTER_H_
-
-#include <config.h>
-#include "base/basictypes.h"
-
-namespace base {
-
-class RawPrinter {
- public:
-  // REQUIRES: "length > 0"
-  // Will printf any data added to this into "buf[0,length-1]" and
-  // will arrange to always keep buf[] null-terminated.
-  RawPrinter(char* buf, int length);
-
-  // Return the number of bytes that have been appended to the string
-  // so far.  Does not count any bytes that were dropped due to overflow.
-  int length() const { return (ptr_ - base_); }
-
-  // Return the number of bytes that can be added to this.
-  int space_left() const { return (limit_ - ptr_); }
-
-  // Format the supplied arguments according to the "format" string
-  // and append to this.  Will silently truncate the output if it does
-  // not fit.
-  void Printf(const char* format, ...)
-#ifdef HAVE___ATTRIBUTE__
-  __attribute__ ((__format__ (__printf__, 2, 3)))
-#endif
-;
-
- private:
-  // We can write into [ptr_ .. limit_-1].
-  // *limit_ is also writable, but reserved for a terminating \0
-  // in case we overflow.
-  //
-  // Invariants: *ptr_ == \0
-  // Invariants: *limit_ == \0
-  char* base_;          // Initial pointer
-  char* ptr_;           // Where should we write next
-  char* limit_;         // One past last non-\0 char we can write
-
-  DISALLOW_COPY_AND_ASSIGN(RawPrinter);
-};
-
-}
-
-#endif  // BASE_RAW_PRINTER_H_
diff --git a/contrib/libtcmalloc/src/sampler.cc b/contrib/libtcmalloc/src/sampler.cc
deleted file mode 100644
index cc711123340..00000000000
--- a/contrib/libtcmalloc/src/sampler.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// All Rights Reserved.
-//
-// Author: Daniel Ford
-
-#include "sampler.h"
-
-#include <algorithm>  // For min()
-#include <math.h>
-#include "base/commandlineflags.h"
-
-using std::min;
-
-// The approximate gap in bytes between sampling actions.
-// I.e., we take one sample approximately once every
-// tcmalloc_sample_parameter bytes of allocation
-// i.e. about once every 512KB if value is 1<<19.
-#ifdef NO_TCMALLOC_SAMPLES
-DEFINE_int64(tcmalloc_sample_parameter, 0,
-             "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
-#else
-DEFINE_int64(tcmalloc_sample_parameter,
-             EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 0),
-             "The approximate gap in bytes between sampling actions. "
-             "This must be between 1 and 2^58.");
-#endif
-
-namespace tcmalloc {
-
-// Statics for Sampler
-double Sampler::log_table_[1<<kFastlogNumBits];
-
-// Populate the lookup table for FastLog2.
-// This approximates the log2 curve with a step function.
-// Steps have height equal to log2 of the mid-point of the step.
-void Sampler::PopulateFastLog2Table() {
-  for (int i = 0; i < (1<<kFastlogNumBits); i++) {
-    log_table_[i] = (log(1.0 + static_cast<double>(i+0.5)/(1<<kFastlogNumBits))
-                     / log(2.0));
-  }
-}
-
-int Sampler::GetSamplePeriod() {
-  return FLAGS_tcmalloc_sample_parameter;
-}
-
-// Run this before using your sampler
-void Sampler::Init(uint32_t seed) {
-  // Initialize PRNG
-  if (seed != 0) {
-    rnd_ = seed;
-  } else {
-    rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
-    if (rnd_ == 0) {
-      rnd_ = 1;
-    }
-  }
-  // Step it forward 20 times for good measure
-  for (int i = 0; i < 20; i++) {
-    rnd_ = NextRandom(rnd_);
-  }
-  // Initialize counter
-  bytes_until_sample_ = PickNextSamplingPoint();
-}
-
-// Initialize the Statics for the Sampler class
-void Sampler::InitStatics() {
-  PopulateFastLog2Table();
-}
-
-// Generates a geometric variable with the specified mean (512K by default).
-// This is done by generating a random number between 0 and 1 and applying
-// the inverse cumulative distribution function for an exponential.
-// Specifically: Let m be the inverse of the sample period, then
-// the probability distribution function is m*exp(-mx) so the CDF is
-// p = 1 - exp(-mx), so
-// q = 1 - p = exp(-mx)
-// log_e(q) = -mx
-// -log_e(q)/m = x
-// log_2(q) * (-log_e(2) * 1/m) = x
-// In the code, q is actually in the range 1 to 2**26, hence the -26 below
-size_t Sampler::PickNextSamplingPoint() {
-  rnd_ = NextRandom(rnd_);
-  // Take the top 26 bits as the random number
-  // (This plus the 1<<58 sampling bound give a max possible step of
-  // 5194297183973780480 bytes.)
-  const uint64_t prng_mod_power = 48;  // Number of bits in prng
-  // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
-  // under piii debug for some binaries.
-  double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0;
-  // Put the computed p-value through the CDF of a geometric.
-  // For faster performance (save ~1/20th exec time), replace
-  // min(0.0, FastLog2(q) - 26)  by  (Fastlog2(q) - 26.000705)
-  // The value 26.000705 is used rather than 26 to compensate
-  // for inaccuracies in FastLog2 which otherwise result in a
-  // negative answer.
-  return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * (-log(2.0)
-                             * FLAGS_tcmalloc_sample_parameter) + 1);
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/sampler.h b/contrib/libtcmalloc/src/sampler.h
deleted file mode 100644
index eb316d7493d..00000000000
--- a/contrib/libtcmalloc/src/sampler.h
+++ /dev/null
@@ -1,180 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// All Rights Reserved.
-//
-// Author: Daniel Ford
-
-#ifndef TCMALLOC_SAMPLER_H_
-#define TCMALLOC_SAMPLER_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uint64_t, uint32_t, int32_t
-#endif
-#include <string.h>                     // for memcpy
-#include "base/basictypes.h"  // for ASSERT
-#include "internal_logging.h"  // for ASSERT
-
-namespace tcmalloc {
-
-//-------------------------------------------------------------------
-// Sampler to decide when to create a sample trace for an allocation
-// Not thread safe: Each thread should have it's own sampler object.
-// Caller must use external synchronization if used
-// from multiple threads.
-//
-// With 512K average sample step (the default):
-//  the probability of sampling a 4K allocation is about 0.00778
-//  the probability of sampling a 1MB allocation is about 0.865
-//  the probability of sampling a 1GB allocation is about 1.00000
-// In general, the probablity of sampling is an allocation of size X
-// given a flag value of Y (default 1M) is:
-//  1 - e^(-X/Y)
-//
-// With 128K average sample step:
-//  the probability of sampling a 1MB allocation is about 0.99966
-//  the probability of sampling a 1GB allocation is about 1.0
-//  (about 1 - 2**(-26))
-// With 1M average sample step:
-//  the probability of sampling a 4K allocation is about 0.00390
-//  the probability of sampling a 1MB allocation is about 0.632
-//  the probability of sampling a 1GB allocation is about 1.0
-//
-// The sampler works by representing memory as a long stream from
-// which allocations are taken. Some of the bytes in this stream are
-// marked and if an allocation includes a marked byte then it is
-// sampled. Bytes are marked according to a Poisson point process
-// with each byte being marked independently with probability
-// p = 1/tcmalloc_sample_parameter.  This makes the probability
-// of sampling an allocation of X bytes equal to the CDF of
-// a geometric with mean tcmalloc_sample_parameter. (ie. the
-// probability that at least one byte in the range is marked). This
-// is accurately given by the CDF of the corresponding exponential
-// distribution : 1 - e^(X/tcmalloc_sample_parameter_)
-// Independence of the byte marking ensures independence of
-// the sampling of each allocation.
-//
-// This scheme is implemented by noting that, starting from any
-// fixed place, the number of bytes until the next marked byte
-// is geometrically distributed. This number is recorded as
-// bytes_until_sample_.  Every allocation subtracts from this
-// number until it is less than 0. When this happens the current
-// allocation is sampled.
-//
-// When an allocation occurs, bytes_until_sample_ is reset to
-// a new independtly sampled geometric number of bytes. The
-// memoryless property of the point process means that this may
-// be taken as the number of bytes after the end of the current
-// allocation until the next marked byte. This ensures that
-// very large allocations which would intersect many marked bytes
-// only result in a single call to PickNextSamplingPoint.
-//-------------------------------------------------------------------
-
-class PERFTOOLS_DLL_DECL Sampler {
- public:
-  // Initialize this sampler.
-  // Passing a seed of 0 gives a non-deterministic
-  // seed value given by casting the object ("this")
-  void Init(uint32_t seed);
-  void Cleanup();
-
-  // Record allocation of "k" bytes.  Return true iff allocation
-  // should be sampled
-  bool SampleAllocation(size_t k);
-
-  // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter)
-  size_t PickNextSamplingPoint();
-
-  // Initialize the statics for the Sampler class
-  static void InitStatics();
-
-  // Returns the current sample period
-  int GetSamplePeriod();
-
-  // The following are public for the purposes of testing
-  static uint64_t NextRandom(uint64_t rnd_);  // Returns the next prng value
-  static double FastLog2(const double & d);  // Computes Log2(x) quickly
-  static void PopulateFastLog2Table();  // Populate the lookup table
-
- private:
-  size_t        bytes_until_sample_;    // Bytes until we sample next
-  uint64_t      rnd_;                   // Cheap random number generator
-
-  // Statics for the fast log
-  // Note that this code may not depend on anything in //util
-  // hence the duplication of functionality here
-  static const int kFastlogNumBits = 10;
-  static const int kFastlogMask = (1 << kFastlogNumBits) - 1;
-  static double log_table_[1<<kFastlogNumBits];  // Constant
-};
-
-inline bool Sampler::SampleAllocation(size_t k) {
-  if (bytes_until_sample_ < k) {
-    bytes_until_sample_ = PickNextSamplingPoint();
-    return true;
-  } else {
-    bytes_until_sample_ -= k;
-    return false;
-  }
-}
-
-// Inline functions which are public for testing purposes
-
-// Returns the next prng value.
-// pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
-// This is the lrand64 generator.
-inline uint64_t Sampler::NextRandom(uint64_t rnd) {
-  const uint64_t prng_mult = 0x5DEECE66DLL;
-  const uint64_t prng_add = 0xB;
-  const uint64_t prng_mod_power = 48;
-  const uint64_t prng_mod_mask =
-                ~((~static_cast<uint64_t>(0)) << prng_mod_power);
-  return (prng_mult * rnd + prng_add) & prng_mod_mask;
-}
-
-// Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer
-// This mimics the VeryFastLog2 code in those files
-inline double Sampler::FastLog2(const double & d) {
-  ASSERT(d>0);
-  COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits);
-  uint64_t x;
-  memcpy(&x, &d, sizeof(x));   // we depend on the compiler inlining this
-  const uint32_t x_high = x >> 32;
-  const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask;
-  const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
-  return exponent + log_table_[y];
-}
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_SAMPLER_H_
diff --git a/contrib/libtcmalloc/src/span.cc b/contrib/libtcmalloc/src/span.cc
deleted file mode 100644
index 5f7ae436086..00000000000
--- a/contrib/libtcmalloc/src/span.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#include "span.h"
-
-#include <string.h>                     // for NULL, memset
-
-#include "internal_logging.h"  // for ASSERT
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "static_vars.h"       // for Static
-
-namespace tcmalloc {
-
-#ifdef SPAN_HISTORY
-void Event(Span* span, char op, int v = 0) {
-  span->history[span->nexthistory] = op;
-  span->value[span->nexthistory] = v;
-  span->nexthistory++;
-  if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
-}
-#endif
-
-Span* NewSpan(PageID p, Length len) {
-  Span* result = Static::span_allocator()->New();
-  memset(result, 0, sizeof(*result));
-  result->start = p;
-  result->length = len;
-#ifdef SPAN_HISTORY
-  result->nexthistory = 0;
-#endif
-  return result;
-}
-
-void DeleteSpan(Span* span) {
-#ifndef NDEBUG
-  // In debug mode, trash the contents of deleted Spans
-  memset(span, 0x3f, sizeof(*span));
-#endif
-  Static::span_allocator()->Delete(span);
-}
-
-void DLL_Init(Span* list) {
-  list->next = list;
-  list->prev = list;
-}
-
-void DLL_Remove(Span* span) {
-  span->prev->next = span->next;
-  span->next->prev = span->prev;
-  span->prev = NULL;
-  span->next = NULL;
-}
-
-int DLL_Length(const Span* list) {
-  int result = 0;
-  for (Span* s = list->next; s != list; s = s->next) {
-    result++;
-  }
-  return result;
-}
-
-void DLL_Prepend(Span* list, Span* span) {
-  ASSERT(span->next == NULL);
-  ASSERT(span->prev == NULL);
-  span->next = list->next;
-  span->prev = list;
-  list->next->prev = span;
-  list->next = span;
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/span.h b/contrib/libtcmalloc/src/span.h
deleted file mode 100644
index 3fe30ba33d0..00000000000
--- a/contrib/libtcmalloc/src/span.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// A Span is a contiguous run of pages.
-
-#ifndef TCMALLOC_SPAN_H_
-#define TCMALLOC_SPAN_H_
-
-#include "config.h"
-#include "common.h"
-
-namespace tcmalloc {
-
-// Information kept for a span (a contiguous run of pages).
-struct Span {
-  PageID        start;          // Starting page number
-  Length        length;         // Number of pages in span
-  Span*         next;           // Used when in link list
-  Span*         prev;           // Used when in link list
-  void*         objects;        // Linked list of free objects
-  unsigned int  refcount : 16;  // Number of non-free objects
-  unsigned int  sizeclass : 8;  // Size-class for small objects (or 0)
-  unsigned int  location : 2;   // Is the span on a freelist, and if so, which?
-  unsigned int  sample : 1;     // Sampled object?
-
-#undef SPAN_HISTORY
-#ifdef SPAN_HISTORY
-  // For debugging, we can keep a log events per span
-  int nexthistory;
-  char history[64];
-  int value[64];
-#endif
-
-  // What freelist the span is on: IN_USE if on none, or normal or returned
-  enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST };
-};
-
-#ifdef SPAN_HISTORY
-void Event(Span* span, char op, int v = 0);
-#else
-#define Event(s,o,v) ((void) 0)
-#endif
-
-// Allocator/deallocator for spans
-Span* NewSpan(PageID p, Length len);
-void DeleteSpan(Span* span);
-
-// -------------------------------------------------------------------------
-// Doubly linked list of spans.
-// -------------------------------------------------------------------------
-
-// Initialize *list to an empty list.
-void DLL_Init(Span* list);
-
-// Remove 'span' from the linked list in which it resides, updating the
-// pointers of adjacent Spans and setting span's next and prev to NULL.
-void DLL_Remove(Span* span);
-
-// Return true iff "list" is empty.
-inline bool DLL_IsEmpty(const Span* list) {
-  return list->next == list;
-}
-
-// Add span to the front of list.
-void DLL_Prepend(Span* list, Span* span);
-
-// Return the length of the linked list. O(n)
-int DLL_Length(const Span* list);
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_SPAN_H_
diff --git a/contrib/libtcmalloc/src/stack_trace_table.cc b/contrib/libtcmalloc/src/stack_trace_table.cc
deleted file mode 100644
index 049cca524b5..00000000000
--- a/contrib/libtcmalloc/src/stack_trace_table.cc
+++ /dev/null
@@ -1,160 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Andrew Fikes
-
-#include "config.h"
-#include "stack_trace_table.h"
-#include <string.h>                     // for NULL, memset
-#include "base/spinlock.h"              // for SpinLockHolder
-#include "common.h"            // for StackTrace
-#include "internal_logging.h"  // for ASSERT, Log
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "static_vars.h"       // for Static
-
-namespace tcmalloc {
-
-bool StackTraceTable::Bucket::KeyEqual(uintptr_t h,
-                                       const StackTrace& t) const {
-  const bool eq = (this->hash == h && this->trace.depth == t.depth);
-  for (int i = 0; eq && i < t.depth; ++i) {
-    if (this->trace.stack[i] != t.stack[i]) {
-      return false;
-    }
-  }
-  return eq;
-}
-
-StackTraceTable::StackTraceTable()
-    : error_(false),
-      depth_total_(0),
-      bucket_total_(0),
-      table_(new Bucket*[kHashTableSize]()) {
-  memset(table_, 0, kHashTableSize * sizeof(Bucket*));
-}
-
-StackTraceTable::~StackTraceTable() {
-  delete[] table_;
-}
-
-void StackTraceTable::AddTrace(const StackTrace& t) {
-  if (error_) {
-    return;
-  }
-
-  // Hash function borrowed from base/heap-profile-table.cc
-  uintptr_t h = 0;
-  for (int i = 0; i < t.depth; ++i) {
-    h += reinterpret_cast<uintptr_t>(t.stack[i]);
-    h += h << 10;
-    h ^= h >> 6;
-  }
-  h += h << 3;
-  h ^= h >> 11;
-
-  const int idx = h % kHashTableSize;
-
-  Bucket* b = table_[idx];
-  while (b != NULL && !b->KeyEqual(h, t)) {
-    b = b->next;
-  }
-  if (b != NULL) {
-    b->count++;
-    b->trace.size += t.size;  // keep cumulative size
-  } else {
-    depth_total_ += t.depth;
-    bucket_total_++;
-    b = Static::bucket_allocator()->New();
-    if (b == NULL) {
-      Log(kLog, __FILE__, __LINE__,
-          "tcmalloc: could not allocate bucket", sizeof(*b));
-      error_ = true;
-    } else {
-      b->hash = h;
-      b->trace = t;
-      b->count = 1;
-      b->next = table_[idx];
-      table_[idx] = b;
-    }
-  }
-}
-
-void** StackTraceTable::ReadStackTracesAndClear() {
-  if (error_) {
-    return NULL;
-  }
-
-  // Allocate output array
-  const int out_len = bucket_total_ * 3 + depth_total_ + 1;
-  void** out = new void*[out_len];
-  if (out == NULL) {
-    Log(kLog, __FILE__, __LINE__,
-        "tcmalloc: allocation failed for stack traces",
-        out_len * sizeof(*out));
-    return NULL;
-  }
-
-  // Fill output array
-  int idx = 0;
-  for (int i = 0; i < kHashTableSize; ++i) {
-    Bucket* b = table_[i];
-    while (b != NULL) {
-      out[idx++] = reinterpret_cast<void*>(static_cast<uintptr_t>(b->count));
-      out[idx++] = reinterpret_cast<void*>(b->trace.size);  // cumulative size
-      out[idx++] = reinterpret_cast<void*>(b->trace.depth);
-      for (int d = 0; d < b->trace.depth; ++d) {
-        out[idx++] = b->trace.stack[d];
-      }
-      b = b->next;
-    }
-  }
-  out[idx++] = NULL;
-  ASSERT(idx == out_len);
-
-  // Clear state
-  error_ = false;
-  depth_total_ = 0;
-  bucket_total_ = 0;
-  SpinLockHolder h(Static::pageheap_lock());
-  for (int i = 0; i < kHashTableSize; ++i) {
-    Bucket* b = table_[i];
-    while (b != NULL) {
-      Bucket* next = b->next;
-      Static::bucket_allocator()->Delete(b);
-      b = next;
-    }
-    table_[i] = NULL;
-  }
-
-  return out;
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/stack_trace_table.h b/contrib/libtcmalloc/src/stack_trace_table.h
deleted file mode 100644
index 66ed5d92822..00000000000
--- a/contrib/libtcmalloc/src/stack_trace_table.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Andrew Fikes
-//
-// Utility class for coalescing sampled stack traces.  Not thread-safe.
-
-#ifndef TCMALLOC_STACK_TRACE_TABLE_H_
-#define TCMALLOC_STACK_TRACE_TABLE_H_
-
-#include "config.h"
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uintptr_t
-#endif
-#include "common.h"
-
-namespace tcmalloc {
-
-class PERFTOOLS_DLL_DECL StackTraceTable {
- public:
-  // REQUIRES: L < pageheap_lock
-  StackTraceTable();
-  ~StackTraceTable();
-
-  // Adds stack trace "t" to table.
-  //
-  // REQUIRES: L >= pageheap_lock
-  void AddTrace(const StackTrace& t);
-
-  // Returns stack traces formatted per MallocExtension guidelines.
-  // May return NULL on error.  Clears state before returning.
-  //
-  // REQUIRES: L < pageheap_lock
-  void** ReadStackTracesAndClear();
-
-  // Exposed for PageHeapAllocator
-  struct Bucket {
-    // Key
-    uintptr_t hash;
-    StackTrace trace;
-
-    // Payload
-    int count;
-    Bucket* next;
-
-    bool KeyEqual(uintptr_t h, const StackTrace& t) const;
-  };
-
-  // For testing
-  int depth_total() const { return depth_total_; }
-  int bucket_total() const { return bucket_total_; }
-
- private:
-  static const int kHashTableSize = 1 << 14; // => table_ is 128k
-
-  bool error_;
-  int depth_total_;
-  int bucket_total_;
-  Bucket** table_;
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_STACK_TRACE_TABLE_H_
diff --git a/contrib/libtcmalloc/src/stacktrace.cc b/contrib/libtcmalloc/src/stacktrace.cc
deleted file mode 100644
index 88c8b15946d..00000000000
--- a/contrib/libtcmalloc/src/stacktrace.cc
+++ /dev/null
@@ -1,339 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Produce stack trace.
-//
-// There are three different ways we can try to get the stack trace:
-//
-// 1) Our hand-coded stack-unwinder.  This depends on a certain stack
-//    layout, which is used by gcc (and those systems using a
-//    gcc-compatible ABI) on x86 systems, at least since gcc 2.95.
-//    It uses the frame pointer to do its work.
-//
-// 2) The libunwind library.  This is still in development, and as a
-//    separate library adds a new dependency, abut doesn't need a frame
-//    pointer.  It also doesn't call malloc.
-//
-// 3) The gdb unwinder -- also the one used by the c++ exception code.
-//    It's obviously well-tested, but has a fatal flaw: it can call
-//    malloc() from the unwinder.  This is a problem because we're
-//    trying to use the unwinder to instrument malloc().
-//
-// Note: if you add a new implementation here, make sure it works
-// correctly when GetStackTrace() is called with max_depth == 0.
-// Some code may do that.
-
-#include "config.h"
-#include <stdlib.h> // for getenv
-#include <string.h> // for strcmp
-#include <stdio.h> // for fprintf
-#include "gperftools/stacktrace.h"
-#include "base/commandlineflags.h"
-#include "base/googleinit.h"
-
-
-// we're using plain struct and not class to avoid any possible issues
-// during initialization. Struct of pointers is easy to init at
-// link-time.
-struct GetStackImplementation {
-  int (*GetStackFramesPtr)(void** result, int* sizes, int max_depth,
-                           int skip_count);
-
-  int (*GetStackFramesWithContextPtr)(void** result, int* sizes, int max_depth,
-                                      int skip_count, const void *uc);
-
-  int (*GetStackTracePtr)(void** result, int max_depth,
-                          int skip_count);
-
-  int (*GetStackTraceWithContextPtr)(void** result, int max_depth,
-                                  int skip_count, const void *uc);
-
-  const char *name;
-};
-
-#if HAVE_DECL_BACKTRACE
-#define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h"
-#define GST_SUFFIX generic
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_generic
-#endif
-
-#ifdef HAVE_UNWIND_BACKTRACE
-#define STACKTRACE_INL_HEADER "stacktrace_libgcc-inl.h"
-#define GST_SUFFIX libgcc
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_libgcc
-#endif
-
-// libunwind uses __thread so we check for both libunwind.h and
-// __thread support
-#if defined(HAVE_LIBUNWIND_H) && defined(HAVE_TLS)
-#define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h"
-#define GST_SUFFIX libunwind
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_libunwind
-#endif // HAVE_LIBUNWIND_H
-
-#if defined(__i386__) || defined(__x86_64__)
-#define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h"
-#define GST_SUFFIX x86
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_x86
-#endif // i386 || x86_64
-
-#if defined(__ppc__) || defined(__PPC__)
-#if defined(__linux__)
-#define STACKTRACE_INL_HEADER "stacktrace_powerpc-linux-inl.h"
-#else
-#define STACKTRACE_INL_HEADER "stacktrace_powerpc-darwin-inl.h"
-#endif
-#define GST_SUFFIX ppc
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_ppc
-#endif
-
-#if defined(__arm__)
-#define STACKTRACE_INL_HEADER "stacktrace_arm-inl.h"
-#define GST_SUFFIX arm
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_arm
-#endif
-
-#ifdef TCMALLOC_ENABLE_INSTRUMENT_STACKTRACE
-#define STACKTRACE_INL_HEADER "stacktrace_instrument-inl.h"
-#define GST_SUFFIX instrument
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_instrument
-#endif
-
-// The Windows case -- probably cygwin and mingw will use one of the
-// x86-includes above, but if not, we can fall back to windows intrinsics.
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
-#define STACKTRACE_INL_HEADER "stacktrace_win32-inl.h"
-#define GST_SUFFIX win32
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_win32
-#endif
-
-static GetStackImplementation *all_impls[] = {
-#ifdef HAVE_GST_libgcc
-  &impl__libgcc,
-#endif
-#ifdef HAVE_GST_generic
-  &impl__generic,
-#endif
-#ifdef HAVE_GST_libunwind
-  &impl__libunwind,
-#endif
-#ifdef HAVE_GST_x86
-  &impl__x86,
-#endif
-#ifdef HAVE_GST_arm
-  &impl__arm,
-#endif
-#ifdef HAVE_GST_ppc
-  &impl__ppc,
-#endif
-#ifdef HAVE_GST_instrument
-  &impl__instrument,
-#endif
-#ifdef HAVE_GST_win32
-  &impl__win32,
-#endif
-  NULL
-};
-
-// ppc and i386 implementations prefer arch-specific asm implementations.
-// arm's asm implementation is broken
-#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__PPC__)
-#if !defined(NO_FRAME_POINTER)
-#define TCMALLOC_DONT_PREFER_LIBUNWIND
-#endif
-#endif
-
-static bool get_stack_impl_inited;
-
-#if defined(HAVE_GST_instrument)
-static GetStackImplementation *get_stack_impl = &impl__instrument;
-#elif defined(HAVE_GST_win32)
-static GetStackImplementation *get_stack_impl = &impl__win32;
-#elif defined(HAVE_GST_x86) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND)
-static GetStackImplementation *get_stack_impl = &impl__x86;
-#elif defined(HAVE_GST_ppc) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND)
-static GetStackImplementation *get_stack_impl = &impl__ppc;
-#elif defined(HAVE_GST_libunwind)
-static GetStackImplementation *get_stack_impl = &impl__libunwind;
-#elif defined(HAVE_GST_libgcc)
-static GetStackImplementation *get_stack_impl = &impl__libgcc;
-#elif defined(HAVE_GST_generic)
-static GetStackImplementation *get_stack_impl = &impl__generic;
-#elif defined(HAVE_GST_arm)
-static GetStackImplementation *get_stack_impl = &impl__arm;
-#elif 0
-// This is for the benefit of code analysis tools that may have
-// trouble with the computed #include above.
-# include "stacktrace_x86-inl.h"
-# include "stacktrace_libunwind-inl.h"
-# include "stacktrace_generic-inl.h"
-# include "stacktrace_powerpc-inl.h"
-# include "stacktrace_win32-inl.h"
-# include "stacktrace_arm-inl.h"
-# include "stacktrace_instrument-inl.h"
-#else
-#error Cannot calculate stack trace: will need to write for your environment
-#endif
-
-static int ATTRIBUTE_NOINLINE frame_forcer(int rv) {
-  return rv;
-}
-
-static void init_default_stack_impl_inner(void);
-
-namespace tcmalloc {
-  bool EnterStacktraceScope(void);
-  void LeaveStacktraceScope(void);
-}
-
-namespace {
-  using tcmalloc::EnterStacktraceScope;
-  using tcmalloc::LeaveStacktraceScope;
-
-  class StacktraceScope {
-    bool stacktrace_allowed;
-  public:
-    StacktraceScope() {
-      stacktrace_allowed = true;
-      stacktrace_allowed = EnterStacktraceScope();
-    }
-    bool IsStacktraceAllowed() {
-      return stacktrace_allowed;
-    }
-    ~StacktraceScope() {
-      if (stacktrace_allowed) {
-        LeaveStacktraceScope();
-      }
-    }
-  };
-}
-
-PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
-                                      int skip_count) {
-  StacktraceScope scope;
-  if (!scope.IsStacktraceAllowed()) {
-    return 0;
-  }
-  init_default_stack_impl_inner();
-  return frame_forcer(get_stack_impl->GetStackFramesPtr(result, sizes, max_depth, skip_count));
-}
-
-PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth,
-                                                 int skip_count, const void *uc) {
-  StacktraceScope scope;
-  if (!scope.IsStacktraceAllowed()) {
-    return 0;
-  }
-  init_default_stack_impl_inner();
-  return frame_forcer(get_stack_impl->GetStackFramesWithContextPtr(
-                        result, sizes, max_depth,
-                        skip_count, uc));
-}
-
-PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth,
-                                     int skip_count) {
-  StacktraceScope scope;
-  if (!scope.IsStacktraceAllowed()) {
-    return 0;
-  }
-  init_default_stack_impl_inner();
-  return frame_forcer(get_stack_impl->GetStackTracePtr(result, max_depth, skip_count));
-}
-
-PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth,
-                                                int skip_count, const void *uc) {
-  StacktraceScope scope;
-  if (!scope.IsStacktraceAllowed()) {
-    return 0;
-  }
-  init_default_stack_impl_inner();
-  return frame_forcer(get_stack_impl->GetStackTraceWithContextPtr(
-                        result, max_depth, skip_count, uc));
-}
-
-static void init_default_stack_impl_inner(void) {
-  if (get_stack_impl_inited) {
-    return;
-  }
-  get_stack_impl_inited = true;
-  char *val = getenv("TCMALLOC_STACKTRACE_METHOD");
-  if (!val || !*val) {
-    return;
-  }
-  for (GetStackImplementation **p = all_impls; *p; p++) {
-    GetStackImplementation *c = *p;
-    if (strcmp(c->name, val) == 0) {
-      get_stack_impl = c;
-      return;
-    }
-  }
-  fprintf(stderr, "Unknown or unsupported stacktrace method requested: %s. Ignoring it\n", val);
-}
-
-static void init_default_stack_impl(void) {
-  init_default_stack_impl_inner();
-  if (EnvToBool("TCMALLOC_STACKTRACE_METHOD_VERBOSE", false)) {
-    fprintf(stderr, "Chosen stacktrace method is %s\nSupported methods:\n", get_stack_impl->name);
-    for (GetStackImplementation **p = all_impls; *p; p++) {
-      GetStackImplementation *c = *p;
-      fprintf(stderr, "* %s\n", c->name);
-    }
-    fputs("\n", stderr);
-  }
-}
-
-REGISTER_MODULE_INITIALIZER(stacktrace_init_default_stack_impl, init_default_stack_impl());
diff --git a/contrib/libtcmalloc/src/stacktrace_arm-inl.h b/contrib/libtcmalloc/src/stacktrace_arm-inl.h
deleted file mode 100644
index 1586b8fec62..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_arm-inl.h
+++ /dev/null
@@ -1,148 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Doug Kwan
-// This is inspired by Craig Silverstein's PowerPC stacktrace code.
-//
-
-#ifndef BASE_STACKTRACE_ARM_INL_H_
-#define BASE_STACKTRACE_ARM_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-#include <stdint.h>   // for uintptr_t
-#include "base/basictypes.h"  // for NULL
-#include <gperftools/stacktrace.h>
-
-// WARNING:
-// This only works if all your code is in either ARM or THUMB mode.  With
-// interworking, the frame pointer of the caller can either be in r11 (ARM
-// mode) or r7 (THUMB mode).  A callee only saves the frame pointer of its
-// mode in a fixed location on its stack frame.  If the caller is a different
-// mode, there is no easy way to find the frame pointer.  It can either be
-// still in the designated register or saved on stack along with other callee
-// saved registers.
-
-// Given a pointer to a stack frame, locate and return the calling
-// stackframe, or return NULL if no stackframe can be found. Perform sanity
-// checks (the strictness of which is controlled by the boolean parameter
-// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
-template<bool STRICT_UNWINDING>
-static void **NextStackFrame(void **old_sp) {
-  void **new_sp = (void**) old_sp[-1];
-
-  // Check that the transition from frame pointer old_sp to frame
-  // pointer new_sp isn't clearly bogus
-  if (STRICT_UNWINDING) {
-    // With the stack growing downwards, older stack frame must be
-    // at a greater address that the current one.
-    if (new_sp <= old_sp) return NULL;
-    // Assume stack frames larger than 100,000 bytes are bogus.
-    if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
-  } else {
-    // In the non-strict mode, allow discontiguous stack frames.
-    // (alternate-signal-stacks for example).
-    if (new_sp == old_sp) return NULL;
-    // And allow frames upto about 1MB.
-    if ((new_sp > old_sp)
-        && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL;
-  }
-  if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
-  return new_sp;
-}
-
-// This ensures that GetStackTrace stes up the Link Register properly.
-#ifdef __GNUC__
-void StacktraceArmDummyFunction() __attribute__((noinline));
-void StacktraceArmDummyFunction() { __asm__ volatile(""); }
-#else
-# error StacktraceArmDummyFunction() needs to be ported to this platform.
-#endif
-#endif  // BASE_STACKTRACE_ARM_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-#ifdef __GNUC__
-  void **sp = reinterpret_cast<void**>(__builtin_frame_address(0));
-#else
-# error reading stack point not yet supported on this platform.
-#endif
-
-  // On ARM, the return address is stored in the link register (r14).
-  // This is not saved on the stack frame of a leaf function.  To
-  // simplify code that reads return addresses, we call a dummy
-  // function so that the return address of this function is also
-  // stored in the stack frame.  This works at least for gcc.
-  StacktraceArmDummyFunction();
-
-  skip_count++; // skip parent frame due to indirection in stacktrace.cc
-
-  int n = 0;
-  while (sp && n < max_depth) {
-    // The GetStackFrames routine is called when we are in some
-    // informational context (the failure signal handler for example).
-    // Use the non-strict unwinding rules to produce a stack trace
-    // that is as complete as possible (even if it contains a few bogus
-    // entries in some rare cases).
-    void **next_sp = NextStackFrame<IS_STACK_FRAMES == 0>(sp);
-
-    if (skip_count > 0) {
-      skip_count--;
-    } else {
-      result[n] = *sp;
-
-#if IS_STACK_FRAMES
-      if (next_sp > sp) {
-        sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
-      } else {
-        // A frame-size of 0 is used to indicate unknown frame size.
-        sizes[n] = 0;
-      }
-#endif
-      n++;
-    }
-    sp = next_sp;
-  }
-  return n;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_generic-inl.h b/contrib/libtcmalloc/src/stacktrace_generic-inl.h
deleted file mode 100644
index 7d7c22d9e45..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_generic-inl.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Portable implementation - just use glibc
-//
-// Note:  The glibc implementation may cause a call to malloc.
-// This can cause a deadlock in HeapProfiler.
-
-#ifndef BASE_STACKTRACE_GENERIC_INL_H_
-#define BASE_STACKTRACE_GENERIC_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-#include <execinfo.h>
-#include <string.h>
-#include "gperftools/stacktrace.h"
-#endif  // BASE_STACKTRACE_GENERIC_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-  static const int kStackLength = 64;
-  void * stack[kStackLength];
-  int size;
-
-  size = backtrace(stack, kStackLength);
-  skip_count += 2;  // we want to skip the current and it's parent frame as well
-  int result_count = size - skip_count;
-  if (result_count < 0)
-    result_count = 0;
-  if (result_count > max_depth)
-    result_count = max_depth;
-  for (int i = 0; i < result_count; i++)
-    result[i] = stack[i + skip_count];
-
-#if IS_STACK_FRAMES
-  // No implementation for finding out the stack frame sizes yet.
-  memset(sizes, 0, sizeof(*sizes) * result_count);
-#endif
-
-  return result_count;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_impl_setup-inl.h b/contrib/libtcmalloc/src/stacktrace_impl_setup-inl.h
deleted file mode 100644
index 698c5b38196..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_impl_setup-inl.h
+++ /dev/null
@@ -1,94 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// NOTE: this is NOT to be #include-d normally. It's internal
-// implementation detail of stacktrace.cc
-//
-
-// Copyright (c) 2014, gperftools Contributors.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Aliaksey Kandratsenka <alk@tut.by>
-//
-//  based on stacktrace.cc and stacktrace_config.h by Sanjay Ghemawat
-//  and Paul Pluzhnikov from Google Inc
-
-#define SIS_CONCAT2(a, b) a##b
-#define SIS_CONCAT(a, b) SIS_CONCAT2(a,b)
-
-#define SIS_STRINGIFY(a) SIS_STRINGIFY2(a)
-#define SIS_STRINGIFY2(a) #a
-
-#define IS_STACK_FRAMES 0
-#define IS_WITH_CONTEXT 0
-#define GET_STACK_TRACE_OR_FRAMES \
-  SIS_CONCAT(GetStackTrace_, GST_SUFFIX)(void **result, int max_depth, int skip_count)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 1
-#define IS_WITH_CONTEXT 0
-#define GET_STACK_TRACE_OR_FRAMES \
-  SIS_CONCAT(GetStackFrames_, GST_SUFFIX)(void **result, int *sizes, int max_depth, int skip_count)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 0
-#define IS_WITH_CONTEXT 1
-#define GET_STACK_TRACE_OR_FRAMES \
-  SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX)(void **result, int max_depth, \
-                                                   int skip_count, const void *ucp)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 1
-#define IS_WITH_CONTEXT 1
-#define GET_STACK_TRACE_OR_FRAMES \
-  SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX)(void **result, int *sizes, int max_depth, \
-                                                    int skip_count, const void *ucp)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-static GetStackImplementation SIS_CONCAT(impl__,GST_SUFFIX) = {
-  SIS_CONCAT(GetStackFrames_, GST_SUFFIX),
-  SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX),
-  SIS_CONCAT(GetStackTrace_, GST_SUFFIX),
-  SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX),
-  SIS_STRINGIFY(GST_SUFFIX)
-};
-
-#undef SIS_CONCAT2
-#undef SIS_CONCAT
diff --git a/contrib/libtcmalloc/src/stacktrace_instrument-inl.h b/contrib/libtcmalloc/src/stacktrace_instrument-inl.h
deleted file mode 100644
index c631765c8a2..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_instrument-inl.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2013, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Jean Lee <xiaoyur347@gmail.com>
-// based on gcc Code-Gen-Options "-finstrument-functions" listed in
-// http://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html .
-// Should run configure with CXXFLAGS="-finstrument-functions".
-
-// This file is a backtrace implementation for systems :
-// * The glibc implementation of backtrace() may cause a call to malloc,
-//   and cause a deadlock in HeapProfiler.
-// * The libunwind implementation prints no backtrace.
-
-// The backtrace arrays are stored in "thread_back_trace" variable.
-// Maybe to use thread local storage is better and should save memorys.
-
-#ifndef BASE_STACKTRACE_INSTRUMENT_INL_H_
-#define BASE_STACKTRACE_INSTRUMENT_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-#include <execinfo.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include "gperftools/stacktrace.h"
-
-#define gettid() syscall(__NR_gettid)
-#ifndef __x86_64__
-#define MAX_THREAD (32768)
-#else
-#define MAX_THREAD (65536)
-#endif
-#define MAX_DEPTH  (30)
-#define ATTRIBUTE_NOINSTRUMENT __attribute__ ((no_instrument_function))
-
-typedef struct {
-  int   stack_depth;
-  void* frame[MAX_DEPTH];
-}BACK_TRACE;
-
-static BACK_TRACE thread_back_trace[MAX_THREAD];
-extern "C" {
-void __cyg_profile_func_enter(void *func_address,
-                              void *call_site) ATTRIBUTE_NOINSTRUMENT;
-void __cyg_profile_func_enter(void *func_address, void *call_site) {
-  (void)func_address;
-
-  BACK_TRACE* backtrace = thread_back_trace + gettid();
-  int stack_depth = backtrace->stack_depth;
-  backtrace->stack_depth = stack_depth + 1;
-  if ( stack_depth >= MAX_DEPTH ) {
-    return;
-  }
-  backtrace->frame[stack_depth] = call_site;
-}
-
-void __cyg_profile_func_exit(void *func_address,
-                             void *call_site) ATTRIBUTE_NOINSTRUMENT;
-void __cyg_profile_func_exit(void *func_address, void *call_site) {
-  (void)func_address;
-  (void)call_site;
-
-  BACK_TRACE* backtrace = thread_back_trace + gettid();
-  int stack_depth = backtrace->stack_depth;
-  backtrace->stack_depth = stack_depth - 1;
-  if ( stack_depth >= MAX_DEPTH ) {
-    return;
-  }
-  backtrace->frame[stack_depth] = 0;
-}
-}  // extern "C"
-
-static int cyg_backtrace(void **buffer, int size) {
-  BACK_TRACE* backtrace = thread_back_trace + gettid();
-  int stack_depth = backtrace->stack_depth;
-  if ( stack_depth >= MAX_DEPTH ) {
-    stack_depth = MAX_DEPTH;
-  }
-  int nSize = (size > stack_depth) ? stack_depth : size;
-  for (int i = 0; i < nSize; i++) {
-  buffer[i] = backtrace->frame[nSize - i - 1];
-  }
-
-  return nSize;
-}
-
-#endif  // BASE_STACKTRACE_INSTRUMENT_INL_H_
-
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-  static const int kStackLength = 64;
-  void * stack[kStackLength];
-  int size;
-  memset(stack, 0, sizeof(stack));
-
-  size = cyg_backtrace(stack, kStackLength);
-  skip_count += 2;  // we want to skip the current and parent frame as well
-  int result_count = size - skip_count;
-  if (result_count < 0)
-    result_count = 0;
-  if (result_count > max_depth)
-    result_count = max_depth;
-  for (int i = 0; i < result_count; i++)
-    result[i] = stack[i + skip_count];
-
-#if IS_STACK_FRAMES
-  // No implementation for finding out the stack frame sizes yet.
-  memset(sizes, 0, sizeof(*sizes) * result_count);
-#endif
-
-  return result_count;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_libgcc-inl.h b/contrib/libtcmalloc/src/stacktrace_libgcc-inl.h
deleted file mode 100644
index ce9cf5196ad..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_libgcc-inl.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2016, gperftools Contributors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This file implements backtrace capturing via libgcc's
-// _Unwind_Backtrace. This generally works almost always. It will fail
-// sometimes when we're trying to capture backtrace from signal
-// handler (i.e. in cpu profiler) while some C++ code is throwing
-// exception.
-
-#ifndef BASE_STACKTRACE_LIBGCC_INL_H_
-#define BASE_STACKTRACE_LIBGCC_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-extern "C" {
-#include <assert.h>
-#include <string.h>   // for memset()
-}
-
-#include <unwind.h>
-
-#include "gperftools/stacktrace.h"
-
-struct libgcc_backtrace_data {
-  void **array;
-  int skip;
-  int pos;
-  int limit;
-};
-
-static _Unwind_Reason_Code libgcc_backtrace_helper(struct _Unwind_Context *ctx,
-                                                   void *_data) {
-  libgcc_backtrace_data *data =
-    reinterpret_cast<libgcc_backtrace_data *>(_data);
-
-  if (data->skip > 0) {
-    data->skip--;
-    return _URC_NO_REASON;
-  }
-
-  if (data->pos < data->limit) {
-    void *ip = reinterpret_cast<void *>(_Unwind_GetIP(ctx));;
-    data->array[data->pos++] = ip;
-  }
-
-  return _URC_NO_REASON;
-}
-
-#endif  // BASE_STACKTRACE_LIBGCC_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-  libgcc_backtrace_data data;
-  data.array = result;
-  // we're also skipping current and parent's frame
-  data.skip = skip_count + 2;
-  data.pos = 0;
-  data.limit = max_depth;
-
-  _Unwind_Backtrace(libgcc_backtrace_helper, &data);
-
-  if (data.pos > 1 && data.array[data.pos - 1] == NULL)
-    --data.pos;
-
-#if IS_STACK_FRAMES
-  // No implementation for finding out the stack frame sizes.
-  memset(sizes, 0, sizeof(*sizes) * data.pos);
-#endif
-
-  return data.pos;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_libunwind-inl.h b/contrib/libtcmalloc/src/stacktrace_libunwind-inl.h
deleted file mode 100644
index e8257af6c2c..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_libunwind-inl.h
+++ /dev/null
@@ -1,152 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Arun Sharma
-//
-// Produce stack trace using libunwind
-
-#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_
-#define BASE_STACKTRACE_LIBINWIND_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-// We only need local unwinder.
-#define UNW_LOCAL_ONLY
-
-extern "C" {
-#include <assert.h>
-#include <string.h>   // for memset()
-#include <libunwind.h>
-}
-#include "gperftools/stacktrace.h"
-
-#include "base/basictypes.h"
-#include "base/logging.h"
-
-// Sometimes, we can try to get a stack trace from within a stack
-// trace, because libunwind can call mmap (maybe indirectly via an
-// internal mmap based memory allocator), and that mmap gets trapped
-// and causes a stack-trace request.  If were to try to honor that
-// recursive request, we'd end up with infinite recursion or deadlock.
-// Luckily, it's safe to ignore those subsequent traces.  In such
-// cases, we return 0 to indicate the situation.
-static __thread int recursive ATTR_INITIAL_EXEC;
-
-#if defined(TCMALLOC_ENABLE_UNWIND_FROM_UCONTEXT) && (defined(__i386__) || defined(__x86_64__)) && defined(__GNU_LIBRARY__)
-#define BASE_STACKTRACE_UNW_CONTEXT_IS_UCONTEXT 1
-#endif
-
-#endif  // BASE_STACKTRACE_LIBINWIND_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-  void *ip;
-  int n = 0;
-  unw_cursor_t cursor;
-  unw_context_t uc;
-#if IS_STACK_FRAMES
-  unw_word_t sp = 0, next_sp = 0;
-#endif
-
-  if (recursive) {
-    return 0;
-  }
-  ++recursive;
-
-#if (IS_WITH_CONTEXT && defined(BASE_STACKTRACE_UNW_CONTEXT_IS_UCONTEXT))
-  if (ucp) {
-    uc = *(static_cast<unw_context_t *>(const_cast<void *>(ucp)));
-    /* this is a bit weird. profiler.cc calls us with signal's ucontext
-     * yet passing us 2 as skip_count and essentially assuming we won't
-     * use ucontext. */
-    /* In order to fix that I'm going to assume that if ucp is
-     * non-null we're asked to ignore skip_count in case we're
-     * able to use ucp */
-    skip_count = 0;
-  } else {
-    unw_getcontext(&uc);
-    skip_count += 2;         // Do not include current and parent frame
-  }
-#else
-  unw_getcontext(&uc);
-  skip_count += 2;         // Do not include current and parent frame
-#endif
-
-  /*int ret =*/ unw_init_local(&cursor, &uc);
-  //assert(ret >= 0);
-
-  while (skip_count--) {
-    if (unw_step(&cursor) <= 0) {
-      goto out;
-    }
-#if IS_STACK_FRAMES
-    if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) {
-      goto out;
-    }
-#endif
-  }
-
-  while (n < max_depth) {
-    if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
-      break;
-    }
-#if IS_STACK_FRAMES
-    sizes[n] = 0;
-#endif
-    result[n++] = ip;
-    if (unw_step(&cursor) <= 0) {
-      break;
-    }
-#if IS_STACK_FRAMES
-    sp = next_sp;
-    if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) {
-      break;
-    }
-    sizes[n - 1] = next_sp - sp;
-#endif
-  }
-out:
-  --recursive;
-  return n;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_x86-inl.h b/contrib/libtcmalloc/src/stacktrace_x86-inl.h
deleted file mode 100644
index 46eb5d82d71..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_x86-inl.h
+++ /dev/null
@@ -1,354 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Produce stack trace
-
-#ifndef BASE_STACKTRACE_X86_INL_H_
-#define BASE_STACKTRACE_X86_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-#include "config.h"
-#include <stdlib.h>   // for NULL
-#include <assert.h>
-#if defined(HAVE_SYS_UCONTEXT_H)
-#include <sys/ucontext.h>
-#elif defined(HAVE_UCONTEXT_H)
-#include <ucontext.h>  // for ucontext_t
-#elif defined(HAVE_CYGWIN_SIGNAL_H)
-// cygwin/signal.h has a buglet where it uses pthread_attr_t without
-// #including <pthread.h> itself.  So we have to do it.
-# ifdef HAVE_PTHREAD
-# include <pthread.h>
-# endif
-#include <cygwin/signal.h>
-typedef ucontext ucontext_t;
-#endif
-#ifdef HAVE_STDINT_H
-#include <stdint.h>   // for uintptr_t
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h> // for msync
-#include "base/vdso_support.h"
-#endif
-
-#include "gperftools/stacktrace.h"
-
-#if defined(__linux__) && defined(__i386__) && defined(__ELF__) && defined(HAVE_MMAP)
-// Count "push %reg" instructions in VDSO __kernel_vsyscall(),
-// preceding "syscall" or "sysenter".
-// If __kernel_vsyscall uses frame pointer, answer 0.
-//
-// kMaxBytes tells how many instruction bytes of __kernel_vsyscall
-// to analyze before giving up. Up to kMaxBytes+1 bytes of
-// instructions could be accessed.
-//
-// Here are known __kernel_vsyscall instruction sequences:
-//
-// SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S).
-// Used on Intel.
-//  0xffffe400 <__kernel_vsyscall+0>:       push   %ecx
-//  0xffffe401 <__kernel_vsyscall+1>:       push   %edx
-//  0xffffe402 <__kernel_vsyscall+2>:       push   %ebp
-//  0xffffe403 <__kernel_vsyscall+3>:       mov    %esp,%ebp
-//  0xffffe405 <__kernel_vsyscall+5>:       sysenter
-//
-// SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S).
-// Used on AMD.
-//  0xffffe400 <__kernel_vsyscall+0>:       push   %ebp
-//  0xffffe401 <__kernel_vsyscall+1>:       mov    %ecx,%ebp
-//  0xffffe403 <__kernel_vsyscall+3>:       syscall
-//
-// i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S)
-//  0xffffe400 <__kernel_vsyscall+0>:       int $0x80
-//  0xffffe401 <__kernel_vsyscall+1>:       ret
-//
-static const int kMaxBytes = 10;
-
-// We use assert()s instead of DCHECK()s -- this is too low level
-// for DCHECK().
-
-static int CountPushInstructions(const unsigned char *const addr) {
-  int result = 0;
-  for (int i = 0; i < kMaxBytes; ++i) {
-    if (addr[i] == 0x89) {
-      // "mov reg,reg"
-      if (addr[i + 1] == 0xE5) {
-        // Found "mov %esp,%ebp".
-        return 0;
-      }
-      ++i;  // Skip register encoding byte.
-    } else if (addr[i] == 0x0F &&
-               (addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) {
-      // Found "sysenter" or "syscall".
-      return result;
-    } else if ((addr[i] & 0xF0) == 0x50) {
-      // Found "push %reg".
-      ++result;
-    } else if (addr[i] == 0xCD && addr[i + 1] == 0x80) {
-      // Found "int $0x80"
-      assert(result == 0);
-      return 0;
-    } else {
-      // Unexpected instruction.
-      assert(0 == "unexpected instruction in __kernel_vsyscall");
-      return 0;
-    }
-  }
-  // Unexpected: didn't find SYSENTER or SYSCALL in
-  // [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval.
-  assert(0 == "did not find SYSENTER or SYSCALL in __kernel_vsyscall");
-  return 0;
-}
-#endif
-
-// Given a pointer to a stack frame, locate and return the calling
-// stackframe, or return NULL if no stackframe can be found. Perform sanity
-// checks (the strictness of which is controlled by the boolean parameter
-// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
-template<bool STRICT_UNWINDING, bool WITH_CONTEXT>
-static void **NextStackFrame(void **old_sp, const void *uc) {
-  void **new_sp = (void **) *old_sp;
-
-#if defined(__linux__) && defined(__i386__) && defined(HAVE_VDSO_SUPPORT)
-  if (WITH_CONTEXT && uc != NULL) {
-    // How many "push %reg" instructions are there at __kernel_vsyscall?
-    // This is constant for a given kernel and processor, so compute
-    // it only once.
-    static int num_push_instructions = -1;  // Sentinel: not computed yet.
-    // Initialize with sentinel value: __kernel_rt_sigreturn can not possibly
-    // be there.
-    static const unsigned char *kernel_rt_sigreturn_address = NULL;
-    static const unsigned char *kernel_vsyscall_address = NULL;
-    if (num_push_instructions == -1) {
-      base::VDSOSupport vdso;
-      if (vdso.IsPresent()) {
-        base::VDSOSupport::SymbolInfo rt_sigreturn_symbol_info;
-        base::VDSOSupport::SymbolInfo vsyscall_symbol_info;
-        if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5",
-                               STT_FUNC, &rt_sigreturn_symbol_info) ||
-            !vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5",
-                               STT_FUNC, &vsyscall_symbol_info) ||
-            rt_sigreturn_symbol_info.address == NULL ||
-            vsyscall_symbol_info.address == NULL) {
-          // Unexpected: 32-bit VDSO is present, yet one of the expected
-          // symbols is missing or NULL.
-          assert(0 == "VDSO is present, but doesn't have expected symbols");
-          num_push_instructions = 0;
-        } else {
-          kernel_rt_sigreturn_address =
-              reinterpret_cast<const unsigned char *>(
-                  rt_sigreturn_symbol_info.address);
-          kernel_vsyscall_address =
-              reinterpret_cast<const unsigned char *>(
-                  vsyscall_symbol_info.address);
-          num_push_instructions =
-              CountPushInstructions(kernel_vsyscall_address);
-        }
-      } else {
-        num_push_instructions = 0;
-      }
-    }
-    if (num_push_instructions != 0 && kernel_rt_sigreturn_address != NULL &&
-        old_sp[1] == kernel_rt_sigreturn_address) {
-      const ucontext_t *ucv = static_cast<const ucontext_t *>(uc);
-      // This kernel does not use frame pointer in its VDSO code,
-      // and so %ebp is not suitable for unwinding.
-      void **const reg_ebp =
-          reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]);
-      const unsigned char *const reg_eip =
-          reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]);
-      if (new_sp == reg_ebp &&
-          kernel_vsyscall_address <= reg_eip &&
-          reg_eip - kernel_vsyscall_address < kMaxBytes) {
-        // We "stepped up" to __kernel_vsyscall, but %ebp is not usable.
-        // Restore from 'ucv' instead.
-        void **const reg_esp =
-            reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]);
-        // Check that alleged %esp is not NULL and is reasonably aligned.
-        if (reg_esp &&
-            ((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) {
-          // Check that alleged %esp is actually readable. This is to prevent
-          // "double fault" in case we hit the first fault due to e.g. stack
-          // corruption.
-          //
-          // page_size is linker-initalized to avoid async-unsafe locking
-          // that GCC would otherwise insert (__cxa_guard_acquire etc).
-          static int page_size;
-          if (page_size == 0) {
-            // First time through.
-            page_size = getpagesize();
-          }
-          void *const reg_esp_aligned =
-              reinterpret_cast<void *>(
-                  (uintptr_t)(reg_esp + num_push_instructions - 1) &
-                  ~(page_size - 1));
-          if (msync(reg_esp_aligned, page_size, MS_ASYNC) == 0) {
-            // Alleged %esp is readable, use it for further unwinding.
-            new_sp = reinterpret_cast<void **>(
-                reg_esp[num_push_instructions - 1]);
-          }
-        }
-      }
-    }
-  }
-#endif
-
-  // Check that the transition from frame pointer old_sp to frame
-  // pointer new_sp isn't clearly bogus
-  if (STRICT_UNWINDING) {
-    // With the stack growing downwards, older stack frame must be
-    // at a greater address that the current one.
-    if (new_sp <= old_sp) return NULL;
-    // Assume stack frames larger than 100,000 bytes are bogus.
-    if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
-  } else {
-    // In the non-strict mode, allow discontiguous stack frames.
-    // (alternate-signal-stacks for example).
-    if (new_sp == old_sp) return NULL;
-    if (new_sp > old_sp) {
-      // And allow frames upto about 1MB.
-      const uintptr_t delta = (uintptr_t)new_sp - (uintptr_t)old_sp;
-      const uintptr_t acceptable_delta = 1000000;
-      if (delta > acceptable_delta) {
-        return NULL;
-      }
-    }
-  }
-  if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
-#ifdef __i386__
-  // On 64-bit machines, the stack pointer can be very close to
-  // 0xffffffff, so we explicitly check for a pointer into the
-  // last two pages in the address space
-  if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
-#endif
-#ifdef HAVE_MMAP
-  if (!STRICT_UNWINDING) {
-    // Lax sanity checks cause a crash on AMD-based machines with
-    // VDSO-enabled kernels.
-    // Make an extra sanity check to insure new_sp is readable.
-    // Note: NextStackFrame<false>() is only called while the program
-    //       is already on its last leg, so it's ok to be slow here.
-    static int page_size = getpagesize();
-    void *new_sp_aligned = (void *)((uintptr_t)new_sp & ~(page_size - 1));
-    if (msync(new_sp_aligned, page_size, MS_ASYNC) == -1)
-      return NULL;
-  }
-#endif
-  return new_sp;
-}
-
-#endif  // BASE_STACKTRACE_X86_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-
-static int GET_STACK_TRACE_OR_FRAMES {
-  void **sp;
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
-  // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
-  // It's always correct on llvm, and the techniques below aren't (in
-  // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
-  // so we also prefer __builtin_frame_address when running under llvm.
-  sp = reinterpret_cast<void**>(__builtin_frame_address(0));
-#elif defined(__i386__)
-  // Stack frame format:
-  //    sp[0]   pointer to previous frame
-  //    sp[1]   caller address
-  //    sp[2]   first argument
-  //    ...
-  // NOTE: This will break under llvm, since result is a copy and not in sp[2]
-  sp = (void **)&result - 2;
-#elif defined(__x86_64__)
-  unsigned long rbp;
-  // Move the value of the register %rbp into the local variable rbp.
-  // We need 'volatile' to prevent this instruction from getting moved
-  // around during optimization to before function prologue is done.
-  // An alternative way to achieve this
-  // would be (before this __asm__ instruction) to call Noop() defined as
-  //   static void Noop() __attribute__ ((noinline));  // prevent inlining
-  //   static void Noop() { asm(""); }  // prevent optimizing-away
-  __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
-  // Arguments are passed in registers on x86-64, so we can't just
-  // offset from &result
-  sp = (void **) rbp;
-#else
-# error Using stacktrace_x86-inl.h on a non x86 architecture!
-#endif
-
-  skip_count++; // skip parent's frame due to indirection in stacktrace.cc
-
-  int n = 0;
-  while (sp && n < max_depth) {
-    if (*(sp+1) == reinterpret_cast<void *>(0)) {
-      // In 64-bit code, we often see a frame that
-      // points to itself and has a return address of 0.
-      break;
-    }
-#if !IS_WITH_CONTEXT
-    const void *const ucp = NULL;
-#endif
-    void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp);
-    if (skip_count > 0) {
-      skip_count--;
-    } else {
-      result[n] = *(sp+1);
-#if IS_STACK_FRAMES
-      if (next_sp > sp) {
-        sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
-      } else {
-        // A frame-size of 0 is used to indicate unknown frame size.
-        sizes[n] = 0;
-      }
-#endif
-      n++;
-    }
-    sp = next_sp;
-  }
-  return n;
-}
diff --git a/contrib/libtcmalloc/src/static_vars.cc b/contrib/libtcmalloc/src/static_vars.cc
deleted file mode 100644
index 1e29d339996..00000000000
--- a/contrib/libtcmalloc/src/static_vars.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Ken Ashcraft <opensource@google.com>
-
-#include "config.h"
-#include "static_vars.h"
-#include <stddef.h>                     // for NULL
-#include <new>                          // for operator new
-#ifdef HAVE_PTHREAD
-#include <pthread.h>                    // for pthread_atfork
-#endif
-#include "internal_logging.h"  // for CHECK_CONDITION
-#include "common.h"
-#include "sampler.h"           // for Sampler
-#include "getenv_safe.h"       // TCMallocGetenvSafe
-#include "base/googleinit.h"
-#include "maybe_threads.h"
-
-namespace tcmalloc {
-
-#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
-// These following two functions are registered via pthread_atfork to make
-// sure the central_cache locks remain in a consisten state in the forked
-// version of the thread.
-
-void CentralCacheLockAll()
-{
-  Static::pageheap_lock()->Lock();
-  for (int i = 0; i < kNumClasses; ++i)
-    Static::central_cache()[i].Lock();
-}
-
-void CentralCacheUnlockAll()
-{
-  for (int i = 0; i < kNumClasses; ++i)
-    Static::central_cache()[i].Unlock();
-  Static::pageheap_lock()->Unlock();
-}
-#endif
-
-SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED);
-SizeMap Static::sizemap_;
-CentralFreeListPadded Static::central_cache_[kNumClasses];
-PageHeapAllocator<Span> Static::span_allocator_;
-PageHeapAllocator<StackTrace> Static::stacktrace_allocator_;
-Span Static::sampled_objects_;
-PageHeapAllocator<StackTraceTable::Bucket> Static::bucket_allocator_;
-StackTrace* Static::growth_stacks_ = NULL;
-PageHeap* Static::pageheap_ = NULL;
-
-
-void Static::InitStaticVars() {
-  sizemap_.Init();
-  span_allocator_.Init();
-  span_allocator_.New(); // Reduce cache conflicts
-  span_allocator_.New(); // Reduce cache conflicts
-  stacktrace_allocator_.Init();
-  bucket_allocator_.Init();
-  // Do a bit of sanitizing: make sure central_cache is aligned properly
-  CHECK_CONDITION((sizeof(central_cache_[0]) % 64) == 0);
-  for (int i = 0; i < kNumClasses; ++i) {
-    central_cache_[i].Init(i);
-  }
-
-  // It's important to have PageHeap allocated, not in static storage,
-  // so that HeapLeakChecker does not consider all the byte patterns stored
-  // in is caches as pointers that are sources of heap object liveness,
-  // which leads to it missing some memory leaks.
-  pageheap_ = new (MetaDataAlloc(sizeof(PageHeap))) PageHeap;
-
-  bool aggressive_decommit =
-    tcmalloc::commandlineflags::StringToBool(
-      TCMallocGetenvSafe("TCMALLOC_AGGRESSIVE_DECOMMIT"), true);
-
-  pageheap_->SetAggressiveDecommit(aggressive_decommit);
-
-  DLL_Init(&sampled_objects_);
-  Sampler::InitStatics();
-}
-
-
-#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) && !defined(__APPLE__)
-
-static inline
-void SetupAtForkLocksHandler()
-{
-  perftools_pthread_atfork(
-    CentralCacheLockAll,    // parent calls before fork
-    CentralCacheUnlockAll,  // parent calls after fork
-    CentralCacheUnlockAll); // child calls after fork
-}
-REGISTER_MODULE_INITIALIZER(tcmalloc_fork_handler, SetupAtForkLocksHandler());
-
-#endif
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/static_vars.h b/contrib/libtcmalloc/src/static_vars.h
deleted file mode 100644
index d6dfa334ab2..00000000000
--- a/contrib/libtcmalloc/src/static_vars.h
+++ /dev/null
@@ -1,115 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Ken Ashcraft <opensource@google.com>
-//
-// Static variables shared by multiple classes.
-
-#ifndef TCMALLOC_STATIC_VARS_H_
-#define TCMALLOC_STATIC_VARS_H_
-
-#include "config.h"
-#include "base/spinlock.h"
-#include "central_freelist.h"
-#include "common.h"
-#include "page_heap.h"
-#include "page_heap_allocator.h"
-#include "span.h"
-#include "stack_trace_table.h"
-
-namespace tcmalloc {
-
-class Static {
- public:
-  // Linker initialized, so this lock can be accessed at any time.
-  static SpinLock* pageheap_lock() { return &pageheap_lock_; }
-
-  // Must be called before calling any of the accessors below.
-  static void InitStaticVars();
-
-  // Central cache -- an array of free-lists, one per size-class.
-  // We have a separate lock per free-list to reduce contention.
-  static CentralFreeListPadded* central_cache() { return central_cache_; }
-
-  static SizeMap* sizemap() { return &sizemap_; }
-
-  //////////////////////////////////////////////////////////////////////
-  // In addition to the explicit initialization comment, the variables below
-  // must be protected by pageheap_lock.
-
-  // Page-level allocator.
-  static PageHeap* pageheap() { return pageheap_; }
-
-  static PageHeapAllocator<Span>* span_allocator() { return &span_allocator_; }
-
-  static PageHeapAllocator<StackTrace>* stacktrace_allocator() {
-    return &stacktrace_allocator_;
-  }
-
-  static StackTrace* growth_stacks() { return growth_stacks_; }
-  static void set_growth_stacks(StackTrace* s) { growth_stacks_ = s; }
-
-  // State kept for sampled allocations (/pprof/heap support)
-  static Span* sampled_objects() { return &sampled_objects_; }
-  static PageHeapAllocator<StackTraceTable::Bucket>* bucket_allocator() {
-    return &bucket_allocator_;
-  }
-
-  // Check if InitStaticVars() has been run.
-  static bool IsInited() { return pageheap() != NULL; }
-
- private:
-  static SpinLock pageheap_lock_;
-
-  // These static variables require explicit initialization.  We cannot
-  // count on their constructors to do any initialization because other
-  // static variables may try to allocate memory before these variables
-  // can run their constructors.
-
-  static SizeMap sizemap_;
-  static CentralFreeListPadded central_cache_[kNumClasses];
-  static PageHeapAllocator<Span> span_allocator_;
-  static PageHeapAllocator<StackTrace> stacktrace_allocator_;
-  static Span sampled_objects_;
-  static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_;
-
-  // Linked list of stack traces recorded every time we allocated memory
-  // from the system.  Useful for finding allocation sites that cause
-  // increase in the footprint of the system.  The linked list pointer
-  // is stored in trace->stack[kMaxStackDepth-1].
-  static StackTrace* growth_stacks_;
-
-  static PageHeap* pageheap_;
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_STATIC_VARS_H_
diff --git a/contrib/libtcmalloc/src/symbolize.cc b/contrib/libtcmalloc/src/symbolize.cc
deleted file mode 100644
index a27106e8bce..00000000000
--- a/contrib/libtcmalloc/src/symbolize.cc
+++ /dev/null
@@ -1,285 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2009, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein
-//
-// This forks out to pprof to do the actual symbolizing.  We might
-// be better off writing our own in C++.
-
-#include "config.h"
-#include "symbolize.h"
-#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>   // for write()
-#endif
-#ifdef HAVE_SYS_SOCKET_H
-#include <sys/socket.h>   // for socketpair() -- needed by Symbolize
-#endif
-#ifdef HAVE_SYS_WAIT_H
-#include <sys/wait.h>   // for wait() -- needed by Symbolize
-#endif
-#ifdef HAVE_POLL_H
-#include <poll.h>
-#endif
-#ifdef __MACH__
-#include <mach-o/dyld.h>   // for GetProgramInvocationName()
-#include <limits.h>        // for PATH_MAX
-#endif
-#if defined(__CYGWIN__) || defined(__CYGWIN32__)
-#include <io.h>            // for get_osfhandle()
-#endif
-#include <string>
-#include "base/commandlineflags.h"
-#include "base/logging.h"
-#include "base/sysinfo.h"
-
-using std::string;
-using tcmalloc::DumpProcSelfMaps;   // from sysinfo.h
-
-
-DEFINE_string(symbolize_pprof,
-              EnvToString("PPROF_PATH", "pprof"),
-              "Path to pprof to call for reporting function names.");
-
-// heap_profile_table_pprof may be referenced after destructors are
-// called (since that's when leak-checking is done), so we make
-// a more-permanent copy that won't ever get destroyed.
-static string* g_pprof_path = new string(FLAGS_symbolize_pprof);
-
-// Returns NULL if we're on an OS where we can't get the invocation name.
-// Using a static var is ok because we're not called from a thread.
-static const char* GetProgramInvocationName() {
-#if defined(HAVE_PROGRAM_INVOCATION_NAME)
-#ifdef __UCLIBC__
-  extern const char* program_invocation_name; // uclibc provides this
-#else
-  extern char* program_invocation_name;  // gcc provides this
-#endif
-  return program_invocation_name;
-#elif defined(__MACH__)
-  // We don't want to allocate memory for this since we may be
-  // calculating it when memory is corrupted.
-  static char program_invocation_name[PATH_MAX];
-  if (program_invocation_name[0] == '\0') {  // first time calculating
-    uint32_t length = sizeof(program_invocation_name);
-    if (_NSGetExecutablePath(program_invocation_name, &length))
-      return NULL;
-  }
-  return program_invocation_name;
-#else
-  return NULL;   // figure out a way to get argv[0]
-#endif
-}
-
-// Prints an error message when you can't run Symbolize().
-static void PrintError(const char* reason) {
-  RAW_LOG(ERROR,
-          "*** WARNING: Cannot convert addresses to symbols in output below.\n"
-          "*** Reason: %s\n"
-          "*** If you cannot fix this, try running pprof directly.\n",
-          reason);
-}
-
-void SymbolTable::Add(const void* addr) {
-  symbolization_table_[addr] = "";
-}
-
-const char* SymbolTable::GetSymbol(const void* addr) {
-  return symbolization_table_[addr];
-}
-
-// Updates symbolization_table with the pointers to symbol names corresponding
-// to its keys. The symbol names are stored in out, which is allocated and
-// freed by the caller of this routine.
-// Note that the forking/etc is not thread-safe or re-entrant.  That's
-// ok for the purpose we need -- reporting leaks detected by heap-checker
-// -- but be careful if you decide to use this routine for other purposes.
-// Returns number of symbols read on error.  If can't symbolize, returns 0
-// and emits an error message about why.
-int SymbolTable::Symbolize() {
-#if !defined(HAVE_UNISTD_H)  || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
-  PrintError("Perftools does not know how to call a sub-process on this O/S");
-  return 0;
-#else
-  const char* argv0 = GetProgramInvocationName();
-  if (argv0 == NULL) {  // can't call symbolize if we can't figure out our name
-    PrintError("Cannot figure out the name of this executable (argv0)");
-    return 0;
-  }
-  if (access(g_pprof_path->c_str(), R_OK) != 0) {
-    PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
-    return 0;
-  }
-
-  // All this work is to do two-way communication.  ugh.
-  int *child_in = NULL;   // file descriptors
-  int *child_out = NULL;  // for now, we don't worry about child_err
-  int child_fds[5][2];    // socketpair may be called up to five times below
-
-  // The client program may close its stdin and/or stdout and/or stderr
-  // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
-  // In this case the communication between the forked processes may be broken
-  // if either the parent or the child tries to close or duplicate these
-  // descriptors. The loop below produces two pairs of file descriptors, each
-  // greater than 2 (stderr).
-  for (int i = 0; i < 5; i++) {
-    if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
-      for (int j = 0; j < i; j++) {
-        close(child_fds[j][0]);
-        close(child_fds[j][1]);
-        PrintError("Cannot create a socket pair");
-      }
-      return 0;
-    } else {
-      if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
-        if (child_in == NULL) {
-          child_in = child_fds[i];
-        } else {
-          child_out = child_fds[i];
-          for (int j = 0; j < i; j++) {
-            if (child_fds[j] == child_in) continue;
-            close(child_fds[j][0]);
-            close(child_fds[j][1]);
-          }
-          break;
-        }
-      }
-    }
-  }
-
-  switch (fork()) {
-    case -1: {  // error
-      close(child_in[0]);
-      close(child_in[1]);
-      close(child_out[0]);
-      close(child_out[1]);
-      PrintError("Unknown error calling fork()");
-      return 0;
-    }
-    case 0: {  // child
-      close(child_in[1]);   // child uses the 0's, parent uses the 1's
-      close(child_out[1]);  // child uses the 0's, parent uses the 1's
-      close(0);
-      close(1);
-      if (dup2(child_in[0], 0) == -1) _exit(1);
-      if (dup2(child_out[0], 1) == -1) _exit(2);
-      // Unset vars that might cause trouble when we fork
-      unsetenv("CPUPROFILE");
-      unsetenv("HEAPPROFILE");
-      unsetenv("HEAPCHECK");
-      unsetenv("PERFTOOLS_VERBOSE");
-      execlp(g_pprof_path->c_str(), g_pprof_path->c_str(),
-             "--symbols", argv0, NULL);
-      _exit(3);  // if execvp fails, it's bad news for us
-    }
-    default: {  // parent
-      close(child_in[0]);   // child uses the 0's, parent uses the 1's
-      close(child_out[0]);  // child uses the 0's, parent uses the 1's
-#ifdef HAVE_POLL_H
-      // Waiting for 1ms seems to give the OS time to notice any errors.
-      poll(0, 0, 1);
-      // For maximum safety, we check to make sure the execlp
-      // succeeded before trying to write.  (Otherwise we'll get a
-      // SIGPIPE.)  For systems without poll.h, we'll just skip this
-      // check, and trust that the user set PPROF_PATH correctly!
-      struct pollfd pfd = { child_in[1], POLLOUT, 0 };
-      if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
-          (pfd.revents & (POLLHUP|POLLERR))) {
-        PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
-        return 0;
-      }
-#endif
-#if defined(__CYGWIN__) || defined(__CYGWIN32__)
-      // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd.  Convert.
-      const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]);
-      DumpProcSelfMaps(symbols_handle);
-#else
-      DumpProcSelfMaps(child_in[1]);  // what pprof expects on stdin
-#endif
-
-      // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each
-      // address to feed to pprof.
-      const int kOutBufSize = 24 * symbolization_table_.size();
-      char *pprof_buffer = new char[kOutBufSize];
-      int written = 0;
-      for (SymbolMap::const_iterator iter = symbolization_table_.begin();
-           iter != symbolization_table_.end(); ++iter) {
-        written += snprintf(pprof_buffer + written, kOutBufSize - written,
-                 // pprof expects format to be 0xXXXXXX
-                 "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first));
-      }
-      write(child_in[1], pprof_buffer, strlen(pprof_buffer));
-      close(child_in[1]);             // that's all we need to write
-
-      const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size();
-      int total_bytes_read = 0;
-      delete[] symbol_buffer_;
-      symbol_buffer_ = new char[kSymbolBufferSize];
-      memset(symbol_buffer_, '\0', kSymbolBufferSize);
-      while (1) {
-        int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read,
-                              kSymbolBufferSize - total_bytes_read);
-        if (bytes_read < 0) {
-          close(child_out[1]);
-          PrintError("Cannot read data from pprof");
-          return 0;
-        } else if (bytes_read == 0) {
-          close(child_out[1]);
-          wait(NULL);
-          break;
-        } else {
-          total_bytes_read += bytes_read;
-        }
-      }
-      // We have successfully read the output of pprof into out.  Make sure
-      // the last symbol is full (we can tell because it ends with a \n).
-      if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n')
-        return 0;
-      // make the symbolization_table_ values point to the output vector
-      SymbolMap::iterator fill = symbolization_table_.begin();
-      int num_symbols = 0;
-      const char *current_name = symbol_buffer_;
-      for (int i = 0; i < total_bytes_read; i++) {
-        if (symbol_buffer_[i] == '\n') {
-          fill->second = current_name;
-          symbol_buffer_[i] = '\0';
-          current_name = symbol_buffer_ + i + 1;
-          fill++;
-          num_symbols++;
-        }
-      }
-      return num_symbols;
-    }
-  }
-  PrintError("Unkown error (should never occur!)");
-  return 0;  // shouldn't be reachable
-#endif
-}
diff --git a/contrib/libtcmalloc/src/symbolize.h b/contrib/libtcmalloc/src/symbolize.h
deleted file mode 100644
index 728d073308a..00000000000
--- a/contrib/libtcmalloc/src/symbolize.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2009, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein
-
-#ifndef TCMALLOC_SYMBOLIZE_H_
-#define TCMALLOC_SYMBOLIZE_H_
-
-#include "config.h"
-#ifdef HAVE_STDINT_H
-#include <stdint.h>  // for uintptr_t
-#endif
-#include <stddef.h>  // for NULL
-#include <map>
-
-using std::map;
-
-// SymbolTable encapsulates the address operations necessary for stack trace
-// symbolization. A common use-case is to Add() the addresses from one or
-// several stack traces to a table, call Symbolize() once and use GetSymbol()
-// to get the symbol names for pretty-printing the stack traces.
-class SymbolTable {
- public:
-  SymbolTable()
-    : symbol_buffer_(NULL) {}
-  ~SymbolTable() {
-    delete[] symbol_buffer_;
-  }
-
-  // Adds an address to the table. This may overwrite a currently known symbol
-  // name, so Add() should not generally be called after Symbolize().
-  void Add(const void* addr);
-
-  // Returns the symbol name for addr, if the given address was added before
-  // the last successful call to Symbolize(). Otherwise may return an empty
-  // c-string.
-  const char* GetSymbol(const void* addr);
-
-  // Obtains the symbol names for the addresses stored in the table and returns
-  // the number of addresses actually symbolized.
-  int Symbolize();
-
- private:
-  typedef map<const void*, const char*> SymbolMap;
-
-  // An average size of memory allocated for a stack trace symbol.
-  static const int kSymbolSize = 1024;
-
-  // Map from addresses to symbol names.
-  SymbolMap symbolization_table_;
-
-  // Pointer to the buffer that stores the symbol names.
-  char *symbol_buffer_;
-};
-
-#endif  // TCMALLOC_SYMBOLIZE_H_
diff --git a/contrib/libtcmalloc/src/system-alloc.cc b/contrib/libtcmalloc/src/system-alloc.cc
deleted file mode 100644
index 1356513c1ce..00000000000
--- a/contrib/libtcmalloc/src/system-alloc.cc
+++ /dev/null
@@ -1,567 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-
-#include "config.h"
-#include <errno.h>                      // for EAGAIN, errno
-#include <fcntl.h>                      // for open, O_RDWR
-#include <stddef.h>                     // for size_t, NULL, ptrdiff_t
-#if defined HAVE_STDINT_H
-#include <stdint.h>                     // for uintptr_t, intptr_t
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h>                   // for munmap, mmap, MADV_DONTNEED, etc
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>                     // for sbrk, getpagesize, off_t
-#endif
-#include <new>                          // for operator new
-#include <gperftools/malloc_extension.h>
-#include "base/basictypes.h"
-#include "base/commandlineflags.h"
-#include "base/spinlock.h"              // for SpinLockHolder, SpinLock, etc
-#include "common.h"
-#include "internal_logging.h"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-function"
-
-// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
-// form of the name instead.
-#ifndef MAP_ANONYMOUS
-# define MAP_ANONYMOUS MAP_ANON
-#endif
-
-// Linux added support for MADV_FREE in 4.5 but we aren't ready to use it
-// yet. Among other things, using compile-time detection leads to poor
-// results when compiling on a system with MADV_FREE and running on a
-// system without it. See https://github.com/gperftools/gperftools/issues/780.
-#if defined(__linux__) && defined(MADV_FREE) && !defined(TCMALLOC_USE_MADV_FREE)
-# undef MADV_FREE
-#endif
-
-// MADV_FREE is specifically designed for use by malloc(), but only
-// FreeBSD supports it; in linux we fall back to the somewhat inferior
-// MADV_DONTNEED.
-#if !defined(MADV_FREE) && defined(MADV_DONTNEED)
-# define MADV_FREE  MADV_DONTNEED
-#endif
-
-// Solaris has a bug where it doesn't declare madvise() for C++.
-//    http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0
-#if defined(__sun) && defined(__SVR4)
-# include <sys/types.h>    // for caddr_t
-  extern "C" { extern int madvise(caddr_t, size_t, int); }
-#endif
-
-// Set kDebugMode mode so that we can have use C++ conditionals
-// instead of preprocessor conditionals.
-#ifdef NDEBUG
-static const bool kDebugMode = false;
-#else
-static const bool kDebugMode = true;
-#endif
-
-// TODO(sanjay): Move the code below into the tcmalloc namespace
-using tcmalloc::kLog;
-using tcmalloc::Log;
-
-// Anonymous namespace to avoid name conflicts on "CheckAddressBits".
-namespace {
-
-// Check that no bit is set at position ADDRESS_BITS or higher.
-template <int ADDRESS_BITS> bool CheckAddressBits(uintptr_t ptr) {
-  return (ptr >> ADDRESS_BITS) == 0;
-}
-
-// Specialize for the bit width of a pointer to avoid undefined shift.
-template <> bool CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) {
-  return true;
-}
-
-}  // Anonymous namespace to avoid name conflicts on "CheckAddressBits".
-
-COMPILE_ASSERT(kAddressBits <= 8 * sizeof(void*),
-               address_bits_larger_than_pointer_size);
-
-static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
-
-#if defined(HAVE_MMAP) || defined(MADV_FREE)
-// Page size is initialized on demand (only needed for mmap-based allocators)
-static size_t pagesize = 0;
-#endif
-
-// The current system allocator
-SysAllocator* sys_alloc = NULL;
-
-// Number of bytes taken from system.
-size_t TCMalloc_SystemTaken = 0;
-
-// Configuration parameters.
-DEFINE_int32(malloc_devmem_start,
-             EnvToInt("TCMALLOC_DEVMEM_START", 0),
-             "Physical memory starting location in MB for /dev/mem allocation."
-             "  Setting this to 0 disables /dev/mem allocation");
-DEFINE_int32(malloc_devmem_limit,
-             EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0),
-             "Physical memory limit location in MB for /dev/mem allocation."
-             "  Setting this to 0 means no limit.");
-DEFINE_bool(malloc_skip_sbrk,
-            EnvToBool("TCMALLOC_SKIP_SBRK", false),
-            "Whether sbrk can be used to obtain memory.");
-DEFINE_bool(malloc_skip_mmap,
-            EnvToBool("TCMALLOC_SKIP_MMAP", false),
-            "Whether mmap can be used to obtain memory.");
-DEFINE_bool(malloc_disable_memory_release,
-            EnvToBool("TCMALLOC_DISABLE_MEMORY_RELEASE", false),
-            "Whether MADV_FREE/MADV_DONTNEED should be used"
-            " to return unused memory to the system.");
-
-// static allocators
-class SbrkSysAllocator : public SysAllocator {
-public:
-  SbrkSysAllocator() : SysAllocator() {
-  }
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-};
-static union {
-  char buf[sizeof(SbrkSysAllocator)];
-  void *ptr;
-} sbrk_space;
-
-class MmapSysAllocator : public SysAllocator {
-public:
-  MmapSysAllocator() : SysAllocator() {
-  }
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-};
-static union {
-  char buf[sizeof(MmapSysAllocator)];
-  void *ptr;
-} mmap_space;
-
-class DevMemSysAllocator : public SysAllocator {
-public:
-  DevMemSysAllocator() : SysAllocator() {
-  }
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-};
-
-class DefaultSysAllocator : public SysAllocator {
- public:
-  DefaultSysAllocator() : SysAllocator() {
-    for (int i = 0; i < kMaxAllocators; i++) {
-      failed_[i] = true;
-      allocs_[i] = NULL;
-      names_[i] = NULL;
-    }
-  }
-  void SetChildAllocator(SysAllocator* alloc, unsigned int index,
-                         const char* name) {
-    if (index < kMaxAllocators && alloc != NULL) {
-      allocs_[index] = alloc;
-      failed_[index] = false;
-      names_[index] = name;
-    }
-  }
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-
- private:
-  static const int kMaxAllocators = 2;
-  bool failed_[kMaxAllocators];
-  SysAllocator* allocs_[kMaxAllocators];
-  const char* names_[kMaxAllocators];
-};
-static union {
-  char buf[sizeof(DefaultSysAllocator)];
-  void *ptr;
-} default_space;
-static const char sbrk_name[] = "SbrkSysAllocator";
-static const char mmap_name[] = "MmapSysAllocator";
-
-
-void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
-                              size_t alignment) {
-#if !defined(HAVE_SBRK) || defined(__UCLIBC__)
-  return NULL;
-#else
-  // Check if we should use sbrk allocation.
-  // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized
-  // state) and eventually gets initialized to the specified value.  Note
-  // that this code runs for a while before the flags are initialized.
-  // That means that even if this flag is set to true, some (initial)
-  // memory will be allocated with sbrk before the flag takes effect.
-  if (FLAGS_malloc_skip_sbrk) {
-    return NULL;
-  }
-
-  // sbrk will release memory if passed a negative number, so we do
-  // a strict check here
-  if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL;
-
-  // This doesn't overflow because TCMalloc_SystemAlloc has already
-  // tested for overflow at the alignment boundary.
-  size = ((size + alignment - 1) / alignment) * alignment;
-
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
-  if (actual_size) {
-    *actual_size = size;
-  }
-
-  // Check that we we're not asking for so much more memory that we'd
-  // wrap around the end of the virtual address space.  (This seems
-  // like something sbrk() should check for us, and indeed opensolaris
-  // does, but glibc does not:
-  //    http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/sys/sbrk.c?a=true
-  //    http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc
-  // Without this check, sbrk may succeed when it ought to fail.)
-  if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) {
-    return NULL;
-  }
-
-  void* result = sbrk(size);
-  if (result == reinterpret_cast<void*>(-1)) {
-    return NULL;
-  }
-
-  // Is it aligned?
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
-  if ((ptr & (alignment-1)) == 0)  return result;
-
-  // Try to get more memory for alignment
-  size_t extra = alignment - (ptr & (alignment-1));
-  void* r2 = sbrk(extra);
-  if (reinterpret_cast<uintptr_t>(r2) == (ptr + size)) {
-    // Contiguous with previous result
-    return reinterpret_cast<void*>(ptr + extra);
-  }
-
-  // Give up and ask for "size + alignment - 1" bytes so
-  // that we can find an aligned region within it.
-  result = sbrk(size + alignment - 1);
-  if (result == reinterpret_cast<void*>(-1)) {
-    return NULL;
-  }
-  ptr = reinterpret_cast<uintptr_t>(result);
-  if ((ptr & (alignment-1)) != 0) {
-    ptr += alignment - (ptr & (alignment-1));
-  }
-  return reinterpret_cast<void*>(ptr);
-#endif  // HAVE_SBRK
-}
-
-void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size,
-                              size_t alignment) {
-#ifndef HAVE_MMAP
-  return NULL;
-#else
-  // Check if we should use mmap allocation.
-  // FLAGS_malloc_skip_mmap starts out as false (its uninitialized
-  // state) and eventually gets initialized to the specified value.  Note
-  // that this code runs for a while before the flags are initialized.
-  // Chances are we never get here before the flags are initialized since
-  // sbrk is used until the heap is exhausted (before mmap is used).
-  if (FLAGS_malloc_skip_mmap) {
-    return NULL;
-  }
-
-  // Enforce page alignment
-  if (pagesize == 0) pagesize = getpagesize();
-  if (alignment < pagesize) alignment = pagesize;
-  size_t aligned_size = ((size + alignment - 1) / alignment) * alignment;
-  if (aligned_size < size) {
-    return NULL;
-  }
-  size = aligned_size;
-
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
-  if (actual_size) {
-    *actual_size = size;
-  }
-
-  // Ask for extra memory if alignment > pagesize
-  size_t extra = 0;
-  if (alignment > pagesize) {
-    extra = alignment - pagesize;
-  }
-
-  // Note: size + extra does not overflow since:
-  //            size + alignment < (1<<NBITS).
-  // and        extra <= alignment
-  // therefore  size + extra < (1<<NBITS)
-  void* result = mmap(NULL, size + extra,
-                      PROT_READ|PROT_WRITE,
-                      MAP_PRIVATE|MAP_ANONYMOUS,
-                      -1, 0);
-  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
-    return NULL;
-  }
-
-  // Adjust the return memory so it is aligned
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
-  size_t adjust = 0;
-  if ((ptr & (alignment - 1)) != 0) {
-    adjust = alignment - (ptr & (alignment - 1));
-  }
-
-  // Return the unused memory to the system
-  if (adjust > 0) {
-    munmap(reinterpret_cast<void*>(ptr), adjust);
-  }
-  if (adjust < extra) {
-    munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust);
-  }
-
-  ptr += adjust;
-  return reinterpret_cast<void*>(ptr);
-#endif  // HAVE_MMAP
-}
-
-void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size,
-                                size_t alignment) {
-#ifndef HAVE_MMAP
-  return NULL;
-#else
-  static bool initialized = false;
-  static off_t physmem_base;  // next physical memory address to allocate
-  static off_t physmem_limit; // maximum physical address allowed
-  static int physmem_fd;      // file descriptor for /dev/mem
-
-  // Check if we should use /dev/mem allocation.  Note that it may take
-  // a while to get this flag initialized, so meanwhile we fall back to
-  // the next allocator.  (It looks like 7MB gets allocated before
-  // this flag gets initialized -khr.)
-  if (FLAGS_malloc_devmem_start == 0) {
-    // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to
-    // try us again next time.
-    return NULL;
-  }
-
-  if (!initialized) {
-    physmem_fd = open("/dev/mem", O_RDWR);
-    if (physmem_fd < 0) {
-      return NULL;
-    }
-    physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL;
-    physmem_limit = FLAGS_malloc_devmem_limit*1024LL*1024LL;
-    initialized = true;
-  }
-
-  // Enforce page alignment
-  if (pagesize == 0) pagesize = getpagesize();
-  if (alignment < pagesize) alignment = pagesize;
-  size_t aligned_size = ((size + alignment - 1) / alignment) * alignment;
-  if (aligned_size < size) {
-    return NULL;
-  }
-  size = aligned_size;
-
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
-  if (actual_size) {
-    *actual_size = size;
-  }
-
-  // Ask for extra memory if alignment > pagesize
-  size_t extra = 0;
-  if (alignment > pagesize) {
-    extra = alignment - pagesize;
-  }
-
-  // check to see if we have any memory left
-  if (physmem_limit != 0 &&
-      ((size + extra) > (physmem_limit - physmem_base))) {
-    return NULL;
-  }
-
-  // Note: size + extra does not overflow since:
-  //            size + alignment < (1<<NBITS).
-  // and        extra <= alignment
-  // therefore  size + extra < (1<<NBITS)
-  void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ,
-                      MAP_SHARED, physmem_fd, physmem_base);
-  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
-    return NULL;
-  }
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
-
-  // Adjust the return memory so it is aligned
-  size_t adjust = 0;
-  if ((ptr & (alignment - 1)) != 0) {
-    adjust = alignment - (ptr & (alignment - 1));
-  }
-
-  // Return the unused virtual memory to the system
-  if (adjust > 0) {
-    munmap(reinterpret_cast<void*>(ptr), adjust);
-  }
-  if (adjust < extra) {
-    munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust);
-  }
-
-  ptr += adjust;
-  physmem_base += adjust + size;
-
-  return reinterpret_cast<void*>(ptr);
-#endif  // HAVE_MMAP
-}
-
-void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size,
-                                 size_t alignment) {
-  for (int i = 0; i < kMaxAllocators; i++) {
-    if (!failed_[i] && allocs_[i] != NULL) {
-      void* result = allocs_[i]->Alloc(size, actual_size, alignment);
-      if (result != NULL) {
-        return result;
-      }
-      failed_[i] = true;
-    }
-  }
-  // After both failed, reset "failed_" to false so that a single failed
-  // allocation won't make the allocator never work again.
-  for (int i = 0; i < kMaxAllocators; i++) {
-    failed_[i] = false;
-  }
-  return NULL;
-}
-
-ATTRIBUTE_WEAK ATTRIBUTE_NOINLINE
-SysAllocator *tc_get_sysalloc_override(SysAllocator *def)
-{
-  return def;
-}
-
-static bool system_alloc_inited = false;
-void InitSystemAllocators(void) {
-  MmapSysAllocator *mmap = new (mmap_space.buf) MmapSysAllocator();
-  SbrkSysAllocator *sbrk = new (sbrk_space.buf) SbrkSysAllocator();
-
-  // In 64-bit debug mode, place the mmap allocator first since it
-  // allocates pointers that do not fit in 32 bits and therefore gives
-  // us better testing of code's 64-bit correctness.  It also leads to
-  // less false negatives in heap-checking code.  (Numbers are less
-  // likely to look like pointers and therefore the conservative gc in
-  // the heap-checker is less likely to misinterpret a number as a
-  // pointer).
-  DefaultSysAllocator *sdef = new (default_space.buf) DefaultSysAllocator();
-  if (kDebugMode && sizeof(void*) > 4) {
-    sdef->SetChildAllocator(mmap, 0, mmap_name);
-    sdef->SetChildAllocator(sbrk, 1, sbrk_name);
-  } else {
-    sdef->SetChildAllocator(sbrk, 0, sbrk_name);
-    sdef->SetChildAllocator(mmap, 1, mmap_name);
-  }
-
-  sys_alloc = tc_get_sysalloc_override(sdef);
-}
-
-void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size,
-                           size_t alignment) {
-  // Discard requests that overflow
-  if (size + alignment < size) return NULL;
-
-  SpinLockHolder lock_holder(&spinlock);
-
-  if (!system_alloc_inited) {
-    InitSystemAllocators();
-    system_alloc_inited = true;
-  }
-
-  // Enforce minimum alignment
-  if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner);
-
-  size_t actual_size_storage;
-  if (actual_size == NULL) {
-    actual_size = &actual_size_storage;
-  }
-
-  void* result = sys_alloc->Alloc(size, actual_size, alignment);
-  if (result != NULL) {
-    CHECK_CONDITION(
-      CheckAddressBits<kAddressBits>(
-        reinterpret_cast<uintptr_t>(result) + *actual_size - 1));
-    TCMalloc_SystemTaken += *actual_size;
-  }
-  return result;
-}
-
-bool TCMalloc_SystemRelease(void* start, size_t length) {
-#ifdef MADV_FREE
-  if (FLAGS_malloc_devmem_start) {
-    // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been
-    // mapping /dev/mem for heap memory.
-    return false;
-  }
-  if (FLAGS_malloc_disable_memory_release) return false;
-  if (pagesize == 0) pagesize = getpagesize();
-  const size_t pagemask = pagesize - 1;
-
-  size_t new_start = reinterpret_cast<size_t>(start);
-  size_t end = new_start + length;
-  size_t new_end = end;
-
-  // Round up the starting address and round down the ending address
-  // to be page aligned:
-  new_start = (new_start + pagesize - 1) & ~pagemask;
-  new_end = new_end & ~pagemask;
-
-  ASSERT((new_start & pagemask) == 0);
-  ASSERT((new_end & pagemask) == 0);
-  ASSERT(new_start >= reinterpret_cast<size_t>(start));
-  ASSERT(new_end <= end);
-
-  if (new_end > new_start) {
-    int result;
-    do {
-      result = madvise(reinterpret_cast<char*>(new_start),
-          new_end - new_start, MADV_FREE);
-    } while (result == -1 && errno == EAGAIN);
-
-    return result != -1;
-  }
-#endif
-  return false;
-}
-
-void TCMalloc_SystemCommit(void* start, size_t length) {
-  // Nothing to do here.  TCMalloc_SystemRelease does not alter pages
-  // such that they need to be re-committed before they can be used by the
-  // application.
-}
-
-#pragma GCC diagnostic pop
diff --git a/contrib/libtcmalloc/src/system-alloc.h b/contrib/libtcmalloc/src/system-alloc.h
deleted file mode 100644
index 2c06c183d63..00000000000
--- a/contrib/libtcmalloc/src/system-alloc.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Routine that uses sbrk/mmap to allocate memory from the system.
-// Useful for implementing malloc.
-
-#ifndef TCMALLOC_SYSTEM_ALLOC_H_
-#define TCMALLOC_SYSTEM_ALLOC_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-
-class SysAllocator;
-
-// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment
-//
-// Allocate and return "N" bytes of zeroed memory.
-//
-// If actual_bytes is NULL then the returned memory is exactly the
-// requested size.  If actual bytes is non-NULL then the allocator
-// may optionally return more bytes than asked for (i.e. return an
-// entire "huge" page if a huge page allocator is in use).
-//
-// The returned pointer is a multiple of "alignment" if non-zero. The
-// returned pointer will always be aligned suitably for holding a
-// void*, double, or size_t. In addition, if this platform defines
-// CACHELINE_ALIGNED, the return pointer will always be cacheline
-// aligned.
-//
-// Returns NULL when out of memory.
-extern PERFTOOLS_DLL_DECL
-void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes,
-			   size_t alignment = 0);
-
-// This call is a hint to the operating system that the pages
-// contained in the specified range of memory will not be used for a
-// while, and can be released for use by other processes or the OS.
-// Pages which are released in this way may be destroyed (zeroed) by
-// the OS.  The benefit of this function is that it frees memory for
-// use by the system, the cost is that the pages are faulted back into
-// the address space next time they are touched, which can impact
-// performance.  (Only pages fully covered by the memory region will
-// be released, partial pages will not.)
-//
-// Returns false if release failed or not supported.
-extern PERFTOOLS_DLL_DECL
-bool TCMalloc_SystemRelease(void* start, size_t length);
-
-// Called to ressurect memory which has been previously released
-// to the system via TCMalloc_SystemRelease.  An attempt to
-// commit a page that is already committed does not cause this
-// function to fail.
-extern PERFTOOLS_DLL_DECL
-void TCMalloc_SystemCommit(void* start, size_t length);
-
-// The current system allocator.
-extern PERFTOOLS_DLL_DECL SysAllocator* sys_alloc;
-
-// Number of bytes taken from system.
-extern PERFTOOLS_DLL_DECL size_t TCMalloc_SystemTaken;
-
-#endif /* TCMALLOC_SYSTEM_ALLOC_H_ */
diff --git a/contrib/libtcmalloc/src/tcmalloc.cc b/contrib/libtcmalloc/src/tcmalloc.cc
deleted file mode 100644
index b52524b1361..00000000000
--- a/contrib/libtcmalloc/src/tcmalloc.cc
+++ /dev/null
@@ -1,1842 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// A malloc that uses a per-thread cache to satisfy small malloc requests.
-// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
-//
-// See doc/tcmalloc.html for a high-level
-// description of how this malloc works.
-//
-// SYNCHRONIZATION
-//  1. The thread-specific lists are accessed without acquiring any locks.
-//     This is safe because each such list is only accessed by one thread.
-//  2. We have a lock per central free-list, and hold it while manipulating
-//     the central free list for a particular size.
-//  3. The central page allocator is protected by "pageheap_lock".
-//  4. The pagemap (which maps from page-number to descriptor),
-//     can be read without holding any locks, and written while holding
-//     the "pageheap_lock".
-//  5. To improve performance, a subset of the information one can get
-//     from the pagemap is cached in a data structure, pagemap_cache_,
-//     that atomically reads and writes its entries.  This cache can be
-//     read and written without locking.
-//
-//     This multi-threaded access to the pagemap is safe for fairly
-//     subtle reasons.  We basically assume that when an object X is
-//     allocated by thread A and deallocated by thread B, there must
-//     have been appropriate synchronization in the handoff of object
-//     X from thread A to thread B.  The same logic applies to pagemap_cache_.
-//
-// THE PAGEID-TO-SIZECLASS CACHE
-// Hot PageID-to-sizeclass mappings are held by pagemap_cache_.  If this cache
-// returns 0 for a particular PageID then that means "no information," not that
-// the sizeclass is 0.  The cache may have stale information for pages that do
-// not hold the beginning of any free()'able object.  Staleness is eliminated
-// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
-// do_memalign() for all other relevant pages.
-//
-// PAGEMAP
-// -------
-// Page map contains a mapping from page id to Span.
-//
-// If Span s occupies pages [p..q],
-//      pagemap[p] == s
-//      pagemap[q] == s
-//      pagemap[p+1..q-1] are undefined
-//      pagemap[p-1] and pagemap[q+1] are defined:
-//         NULL if the corresponding page is not yet in the address space.
-//         Otherwise it points to a Span.  This span may be free
-//         or allocated.  If free, it is in one of pageheap's freelist.
-//
-// TODO: Bias reclamation to larger addresses
-// TODO: implement mallinfo/mallopt
-// TODO: Better testing
-//
-// 9/28/2003 (new page-level allocator replaces ptmalloc2):
-// * malloc/free of small objects goes from ~300 ns to ~50 ns.
-// * allocation of a reasonably complicated struct
-//   goes from about 1100 ns to about 300 ns.
-
-#include "config.h"
-#include <gperftools/tcmalloc.h>
-
-#include <errno.h>                      // for ENOMEM, EINVAL, errno
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-#include <stddef.h>                     // for size_t, NULL
-#include <stdlib.h>                     // for getenv
-#include <string.h>                     // for strcmp, memset, strlen, etc
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>                     // for getpagesize, write, etc
-#endif
-#include <algorithm>                    // for max, min
-#include <limits>                       // for numeric_limits
-#include <new>                          // for nothrow_t (ptr only), etc
-#include <vector>                       // for vector
-
-#include <gperftools/malloc_extension.h>
-#include <gperftools/malloc_hook.h>         // for MallocHook
-#include "base/basictypes.h"            // for int64
-#include "base/commandlineflags.h"      // for RegisterFlagValidator, etc
-#include "base/dynamic_annotations.h"   // for RunningOnValgrind
-#include "base/spinlock.h"              // for SpinLockHolder
-#include "central_freelist.h"  // for CentralFreeListPadded
-#include "common.h"            // for StackTrace, kPageShift, etc
-#include "internal_logging.h"  // for ASSERT, TCMalloc_Printer, etc
-#include "linked_list.h"       // for SLL_SetNext
-#include "malloc_hook-inl.h"       // for MallocHook::InvokeNewHook, etc
-#include "page_heap.h"         // for PageHeap, PageHeap::Stats
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "span.h"              // for Span, DLL_Prepend, etc
-#include "stack_trace_table.h"  // for StackTraceTable
-#include "static_vars.h"       // for Static
-#include "system-alloc.h"      // for DumpSystemAllocatorStats, etc
-#include "tcmalloc_guard.h"    // for TCMallocGuard
-#include "thread_cache.h"      // for ThreadCache
-
-#ifdef __clang__
-// clang's apparent focus on code size somehow causes it to ignore
-// normal inline directives even for few functions which inlining is
-// key for performance. In order to get performance of clang's
-// generated code closer to normal, we're forcing inlining via
-// attribute.
-#define ALWAYS_INLINE inline __attribute__((always_inline))
-#else
-#define ALWAYS_INLINE inline
-#endif
-
-#include "maybe_emergency_malloc.h"
-
-#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS)
-# define WIN32_DO_PATCHING 1
-#endif
-
-// Some windows file somewhere (at least on cygwin) #define's small (!)
-#undef small
-
-using STL_NAMESPACE::max;
-using STL_NAMESPACE::numeric_limits;
-using STL_NAMESPACE::vector;
-
-#include "libc_override.h"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-function"
-
-using tcmalloc::AlignmentForSize;
-using tcmalloc::kLog;
-using tcmalloc::kCrash;
-using tcmalloc::kCrashWithStats;
-using tcmalloc::Log;
-using tcmalloc::PageHeap;
-using tcmalloc::PageHeapAllocator;
-using tcmalloc::SizeMap;
-using tcmalloc::Span;
-using tcmalloc::StackTrace;
-using tcmalloc::Static;
-using tcmalloc::ThreadCache;
-
-DECLARE_double(tcmalloc_release_rate);
-
-// For windows, the printf we use to report large allocs is
-// potentially dangerous: it could cause a malloc that would cause an
-// infinite loop.  So by default we set the threshold to a huge number
-// on windows, so this bad situation will never trigger.  You can
-// always set TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD manually if you
-// want this functionality.
-#ifdef _WIN32
-const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 62;
-#else
-const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 30;
-#endif
-DEFINE_int64(tcmalloc_large_alloc_report_threshold,
-             EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD",
-                        kDefaultLargeAllocReportThreshold),
-             "Allocations larger than this value cause a stack "
-             "trace to be dumped to stderr.  The threshold for "
-             "dumping stack traces is increased by a factor of 1.125 "
-             "every time we print a message so that the threshold "
-             "automatically goes up by a factor of ~1000 every 60 "
-             "messages.  This bounds the amount of extra logging "
-             "generated by this flag.  Default value of this flag "
-             "is very large and therefore you should see no extra "
-             "logging unless the flag is overridden.  Set to 0 to "
-             "disable reporting entirely.");
-
-
-// We already declared these functions in tcmalloc.h, but we have to
-// declare them again to give them an ATTRIBUTE_SECTION: we want to
-// put all callers of MallocHook::Invoke* in this module into
-// ATTRIBUTE_SECTION(google_malloc) section, so that
-// MallocHook::GetCallerStackTrace can function accurately.
-#ifndef _WIN32   // windows doesn't have attribute_section, so don't bother
-extern "C" {
-  void* tc_malloc(size_t size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_free(void* ptr) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_cfree(void* ptr) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-
-  void* tc_memalign(size_t __alignment, size_t __size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  int tc_posix_memalign(void** ptr, size_t align, size_t size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_valloc(size_t __size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_pvalloc(size_t __size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-
-  void tc_malloc_stats(void) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  int tc_mallopt(int cmd, int value) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-#ifdef HAVE_STRUCT_MALLINFO
-  struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-#endif
-
-  void* tc_new(size_t size)
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_delete(void* p) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_newarray(size_t size)
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_deletearray(void* p) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-
-  // And the nothrow variants of these:
-  void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  // Surprisingly, standard C++ library implementations use a
-  // nothrow-delete internally.  See, eg:
-  // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html
-  void tc_delete_nothrow(void* ptr, const std::nothrow_t&) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-
-  // Some non-standard extensions that we support.
-
-  // This is equivalent to
-  //    OS X: malloc_size()
-  //    glibc: malloc_usable_size()
-  //    Windows: _msize()
-  size_t tc_malloc_size(void* p) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-}  // extern "C"
-#endif  // #ifndef _WIN32
-
-// ----------------------- IMPLEMENTATION -------------------------------
-
-static int tc_new_mode = 0;  // See tc_set_new_mode().
-
-// Routines such as free() and realloc() catch some erroneous pointers
-// passed to them, and invoke the below when they do.  (An erroneous pointer
-// won't be caught if it's within a valid span or a stale span for which
-// the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing
-// required) kind of exception handling for these routines.
-namespace {
-void InvalidFree(void* ptr) {
-  if (tcmalloc::IsEmergencyPtr(ptr)) {
-    tcmalloc::EmergencyFree(ptr);
-    return;
-  }
-  Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr);
-}
-
-size_t InvalidGetSizeForRealloc(const void* old_ptr) {
-  Log(kCrash, __FILE__, __LINE__,
-      "Attempt to realloc invalid pointer", old_ptr);
-  return 0;
-}
-
-size_t InvalidGetAllocatedSize(const void* ptr) {
-  Log(kCrash, __FILE__, __LINE__,
-      "Attempt to get the size of an invalid pointer", ptr);
-  return 0;
-}
-}  // unnamed namespace
-
-// Extract interesting stats
-struct TCMallocStats {
-  uint64_t thread_bytes;      // Bytes in thread caches
-  uint64_t central_bytes;     // Bytes in central cache
-  uint64_t transfer_bytes;    // Bytes in central transfer cache
-  uint64_t metadata_bytes;    // Bytes alloced for metadata
-  PageHeap::Stats pageheap;   // Stats from page heap
-};
-
-// Get stats into "r".  Also, if class_count != NULL, class_count[k]
-// will be set to the total number of objects of size class k in the
-// central cache, transfer cache, and per-thread caches. If small_spans
-// is non-NULL, it is filled.  Same for large_spans.
-static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
-                         PageHeap::SmallSpanStats* small_spans,
-                         PageHeap::LargeSpanStats* large_spans) {
-  r->central_bytes = 0;
-  r->transfer_bytes = 0;
-  for (int cl = 0; cl < kNumClasses; ++cl) {
-    const int length = Static::central_cache()[cl].length();
-    const int tc_length = Static::central_cache()[cl].tc_length();
-    const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes();
-    const size_t size = static_cast<uint64_t>(
-        Static::sizemap()->ByteSizeForClass(cl));
-    r->central_bytes += (size * length) + cache_overhead;
-    r->transfer_bytes += (size * tc_length);
-    if (class_count) {
-      // Sum the lengths of all per-class freelists, except the per-thread
-      // freelists, which get counted when we call GetThreadStats(), below.
-      class_count[cl] = length + tc_length;
-    }
-
-  }
-
-  // Add stats from per-thread heaps
-  r->thread_bytes = 0;
-  { // scope
-    SpinLockHolder h(Static::pageheap_lock());
-    ThreadCache::GetThreadStats(&r->thread_bytes, class_count);
-    r->metadata_bytes = tcmalloc::metadata_system_bytes();
-    r->pageheap = Static::pageheap()->stats();
-    if (small_spans != NULL) {
-      Static::pageheap()->GetSmallSpanStats(small_spans);
-    }
-    if (large_spans != NULL) {
-      Static::pageheap()->GetLargeSpanStats(large_spans);
-    }
-  }
-}
-
-static double PagesToMiB(uint64_t pages) {
-  return (pages << kPageShift) / 1048576.0;
-}
-
-// WRITE stats to "out"
-static void DumpStats(TCMalloc_Printer* out, int level) {
-  TCMallocStats stats;
-  uint64_t class_count[kNumClasses];
-  PageHeap::SmallSpanStats small;
-  PageHeap::LargeSpanStats large;
-  if (level >= 2) {
-    ExtractStats(&stats, class_count, &small, &large);
-  } else {
-    ExtractStats(&stats, NULL, NULL, NULL);
-  }
-
-  static const double MiB = 1048576.0;
-
-  const uint64_t virtual_memory_used = (stats.pageheap.system_bytes
-                                        + stats.metadata_bytes);
-  const uint64_t physical_memory_used = (virtual_memory_used
-                                         - stats.pageheap.unmapped_bytes);
-  const uint64_t bytes_in_use_by_app = (physical_memory_used
-                                        - stats.metadata_bytes
-                                        - stats.pageheap.free_bytes
-                                        - stats.central_bytes
-                                        - stats.transfer_bytes
-                                        - stats.thread_bytes);
-
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-  out->printf(
-      "NOTE:  SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
-#endif
-  out->printf(
-      "------------------------------------------------\n"
-      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n"
-      "MALLOC:   ------------\n"
-      "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n"
-      "MALLOC:   ------------\n"
-      "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n"
-      "MALLOC:\n"
-      "MALLOC:   %12" PRIu64 "              Spans in use\n"
-      "MALLOC:   %12" PRIu64 "              Thread heaps in use\n"
-      "MALLOC:   %12" PRIu64 "              Tcmalloc page size\n"
-      "------------------------------------------------\n"
-      "Call ReleaseFreeMemory() to release freelist memory to the OS"
-      " (via madvise()).\n"
-      "Bytes released to the OS take up virtual address space"
-      " but no physical memory.\n",
-      bytes_in_use_by_app, bytes_in_use_by_app / MiB,
-      stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
-      stats.central_bytes, stats.central_bytes / MiB,
-      stats.transfer_bytes, stats.transfer_bytes / MiB,
-      stats.thread_bytes, stats.thread_bytes / MiB,
-      stats.metadata_bytes, stats.metadata_bytes / MiB,
-      physical_memory_used, physical_memory_used / MiB,
-      stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB,
-      virtual_memory_used, virtual_memory_used / MiB,
-      uint64_t(Static::span_allocator()->inuse()),
-      uint64_t(ThreadCache::HeapsInUse()),
-      uint64_t(kPageSize));
-
-  if (level >= 2) {
-    out->printf("------------------------------------------------\n");
-    out->printf("Total size of freelists for per-thread caches,\n");
-    out->printf("transfer cache, and central cache, by size class\n");
-    out->printf("------------------------------------------------\n");
-    uint64_t cumulative = 0;
-    for (int cl = 0; cl < kNumClasses; ++cl) {
-      if (class_count[cl] > 0) {
-        uint64_t class_bytes =
-            class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
-        cumulative += class_bytes;
-        out->printf("class %3d [ %8" PRIuS " bytes ] : "
-                "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n",
-                cl, Static::sizemap()->ByteSizeForClass(cl),
-                class_count[cl],
-                class_bytes / MiB,
-                cumulative / MiB);
-      }
-    }
-
-    // append page heap info
-    int nonempty_sizes = 0;
-    for (int s = 0; s < kMaxPages; s++) {
-      if (small.normal_length[s] + small.returned_length[s] > 0) {
-        nonempty_sizes++;
-      }
-    }
-    out->printf("------------------------------------------------\n");
-    out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n",
-                nonempty_sizes, stats.pageheap.free_bytes / MiB,
-                stats.pageheap.unmapped_bytes / MiB);
-    out->printf("------------------------------------------------\n");
-    uint64_t total_normal = 0;
-    uint64_t total_returned = 0;
-    for (int s = 0; s < kMaxPages; s++) {
-      const int n_length = small.normal_length[s];
-      const int r_length = small.returned_length[s];
-      if (n_length + r_length > 0) {
-        uint64_t n_pages = s * n_length;
-        uint64_t r_pages = s * r_length;
-        total_normal += n_pages;
-        total_returned += r_pages;
-        out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
-                    "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
-                    s,
-                    (n_length + r_length),
-                    PagesToMiB(n_pages + r_pages),
-                    PagesToMiB(total_normal + total_returned),
-                    PagesToMiB(r_pages),
-                    PagesToMiB(total_returned));
-      }
-    }
-
-    total_normal += large.normal_pages;
-    total_returned += large.returned_pages;
-    out->printf(">255   large * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
-                "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
-                static_cast<unsigned int>(large.spans),
-                PagesToMiB(large.normal_pages + large.returned_pages),
-                PagesToMiB(total_normal + total_returned),
-                PagesToMiB(large.returned_pages),
-                PagesToMiB(total_returned));
-  }
-}
-
-static void PrintStats(int level) {
-  const int kBufferSize = 16 << 10;
-  char* buffer = new char[kBufferSize];
-  TCMalloc_Printer printer(buffer, kBufferSize);
-  DumpStats(&printer, level);
-  write(STDERR_FILENO, buffer, strlen(buffer));
-  delete[] buffer;
-}
-
-static void** DumpHeapGrowthStackTraces() {
-  // Count how much space we need
-  int needed_slots = 0;
-  {
-    SpinLockHolder h(Static::pageheap_lock());
-    for (StackTrace* t = Static::growth_stacks();
-         t != NULL;
-         t = reinterpret_cast<StackTrace*>(
-             t->stack[tcmalloc::kMaxStackDepth-1])) {
-      needed_slots += 3 + t->depth;
-    }
-    needed_slots += 100;            // Slop in case list grows
-    needed_slots += needed_slots/8; // An extra 12.5% slop
-  }
-
-  void** result = new void*[needed_slots];
-  if (result == NULL) {
-    Log(kLog, __FILE__, __LINE__,
-        "tcmalloc: allocation failed for stack trace slots",
-        needed_slots * sizeof(*result));
-    return NULL;
-  }
-
-  SpinLockHolder h(Static::pageheap_lock());
-  int used_slots = 0;
-  for (StackTrace* t = Static::growth_stacks();
-       t != NULL;
-       t = reinterpret_cast<StackTrace*>(
-           t->stack[tcmalloc::kMaxStackDepth-1])) {
-    ASSERT(used_slots < needed_slots);  // Need to leave room for terminator
-    if (used_slots + 3 + t->depth >= needed_slots) {
-      // No more room
-      break;
-    }
-
-    result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
-    result[used_slots+1] = reinterpret_cast<void*>(t->size);
-    result[used_slots+2] = reinterpret_cast<void*>(t->depth);
-    for (int d = 0; d < t->depth; d++) {
-      result[used_slots+3+d] = t->stack[d];
-    }
-    used_slots += 3 + t->depth;
-  }
-  result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
-  return result;
-}
-
-static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) {
-  PageID page = 1;  // Some code may assume that page==0 is never used
-  bool done = false;
-  while (!done) {
-    // Accumulate a small number of ranges in a local buffer
-    static const int kNumRanges = 16;
-    static base::MallocRange ranges[kNumRanges];
-    int n = 0;
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      while (n < kNumRanges) {
-        if (!Static::pageheap()->GetNextRange(page, &ranges[n])) {
-          done = true;
-          break;
-        } else {
-          uintptr_t limit = ranges[n].address + ranges[n].length;
-          page = (limit + kPageSize - 1) >> kPageShift;
-          n++;
-        }
-      }
-    }
-
-    for (int i = 0; i < n; i++) {
-      (*func)(arg, &ranges[i]);
-    }
-  }
-}
-
-// TCMalloc's support for extra malloc interfaces
-class TCMallocImplementation : public MallocExtension {
- private:
-  // ReleaseToSystem() might release more than the requested bytes because
-  // the page heap releases at the span granularity, and spans are of wildly
-  // different sizes.  This member keeps track of the extra bytes bytes
-  // released so that the app can periodically call ReleaseToSystem() to
-  // release memory at a constant rate.
-  // NOTE: Protected by Static::pageheap_lock().
-  size_t extra_bytes_released_;
-
- public:
-  TCMallocImplementation()
-      : extra_bytes_released_(0) {
-  }
-
-  virtual void GetStats(char* buffer, int buffer_length) {
-    ASSERT(buffer_length > 0);
-    TCMalloc_Printer printer(buffer, buffer_length);
-
-    // Print level one stats unless lots of space is available
-    if (buffer_length < 10000) {
-      DumpStats(&printer, 1);
-    } else {
-      DumpStats(&printer, 2);
-    }
-  }
-
-  // We may print an extra, tcmalloc-specific warning message here.
-  virtual void GetHeapSample(MallocExtensionWriter* writer) {
-    if (FLAGS_tcmalloc_sample_parameter == 0) {
-      const char* const kWarningMsg =
-          "%warn\n"
-          "%warn This heap profile does not have any data in it, because\n"
-          "%warn the application was run with heap sampling turned off.\n"
-          "%warn To get useful data from GetHeapSample(), you must\n"
-          "%warn set the environment variable TCMALLOC_SAMPLE_PARAMETER to\n"
-          "%warn a positive sampling period, such as 524288.\n"
-          "%warn\n";
-      writer->append(kWarningMsg, strlen(kWarningMsg));
-    }
-    MallocExtension::GetHeapSample(writer);
-  }
-
-  virtual void** ReadStackTraces(int* sample_period) {
-    tcmalloc::StackTraceTable table;
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      Span* sampled = Static::sampled_objects();
-      for (Span* s = sampled->next; s != sampled; s = s->next) {
-        table.AddTrace(*reinterpret_cast<StackTrace*>(s->objects));
-      }
-    }
-    *sample_period = ThreadCache::GetCache()->GetSamplePeriod();
-    return table.ReadStackTracesAndClear(); // grabs and releases pageheap_lock
-  }
-
-  virtual void** ReadHeapGrowthStackTraces() {
-    return DumpHeapGrowthStackTraces();
-  }
-
-  virtual size_t GetThreadCacheSize() {
-    ThreadCache* tc = ThreadCache::GetCacheIfPresent();
-    if (!tc)
-      return 0;
-    return tc->Size();
-  }
-
-  virtual void MarkThreadTemporarilyIdle() {
-    ThreadCache::BecomeTemporarilyIdle();
-  }
-
-  virtual void Ranges(void* arg, RangeFunction func) {
-    IterateOverRanges(arg, func);
-  }
-
-  virtual bool GetNumericProperty(const char* name, size_t* value) {
-    ASSERT(name != NULL);
-
-    if (strcmp(name, "generic.current_allocated_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.pageheap.system_bytes
-               - stats.thread_bytes
-               - stats.central_bytes
-               - stats.transfer_bytes
-               - stats.pageheap.free_bytes
-               - stats.pageheap.unmapped_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "generic.heap_size") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.pageheap.system_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
-      // Kept for backwards compatibility.  Now defined externally as:
-      //    pageheap_free_bytes + pageheap_unmapped_bytes.
-      SpinLockHolder l(Static::pageheap_lock());
-      PageHeap::Stats stats = Static::pageheap()->stats();
-      *value = stats.free_bytes + stats.unmapped_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.central_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.transfer_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.thread_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) {
-      SpinLockHolder l(Static::pageheap_lock());
-      *value = Static::pageheap()->stats().free_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) {
-      SpinLockHolder l(Static::pageheap_lock());
-      *value = Static::pageheap()->stats().unmapped_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
-      SpinLockHolder l(Static::pageheap_lock());
-      *value = ThreadCache::overall_thread_cache_size();
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.thread_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) {
-      *value = size_t(Static::pageheap()->GetAggressiveDecommit());
-      return true;
-    }
-
-    return false;
-  }
-
-  virtual bool SetNumericProperty(const char* name, size_t value) {
-    ASSERT(name != NULL);
-
-    if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
-      SpinLockHolder l(Static::pageheap_lock());
-      ThreadCache::set_overall_thread_cache_size(value);
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) {
-      Static::pageheap()->SetAggressiveDecommit(value != 0);
-      return true;
-    }
-
-    return false;
-  }
-
-  virtual void MarkThreadIdle() {
-    ThreadCache::BecomeIdle();
-  }
-
-  virtual void MarkThreadBusy();  // Implemented below
-
-  virtual SysAllocator* GetSystemAllocator() {
-    SpinLockHolder h(Static::pageheap_lock());
-    return sys_alloc;
-  }
-
-  virtual void SetSystemAllocator(SysAllocator* alloc) {
-    SpinLockHolder h(Static::pageheap_lock());
-    sys_alloc = alloc;
-  }
-
-  virtual void ReleaseToSystem(size_t num_bytes) {
-    SpinLockHolder h(Static::pageheap_lock());
-    if (num_bytes <= extra_bytes_released_) {
-      // We released too much on a prior call, so don't release any
-      // more this time.
-      extra_bytes_released_ = extra_bytes_released_ - num_bytes;
-      return;
-    }
-    num_bytes = num_bytes - extra_bytes_released_;
-    // num_bytes might be less than one page.  If we pass zero to
-    // ReleaseAtLeastNPages, it won't do anything, so we release a whole
-    // page now and let extra_bytes_released_ smooth it out over time.
-    Length num_pages = max<Length>(num_bytes >> kPageShift, 1);
-    size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages(
-        num_pages) << kPageShift;
-    if (bytes_released > num_bytes) {
-      extra_bytes_released_ = bytes_released - num_bytes;
-    } else {
-      // The PageHeap wasn't able to release num_bytes.  Don't try to
-      // compensate with a big release next time.  Specifically,
-      // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX).
-      extra_bytes_released_ = 0;
-    }
-  }
-
-  virtual void SetMemoryReleaseRate(double rate) {
-    FLAGS_tcmalloc_release_rate = rate;
-  }
-
-  virtual double GetMemoryReleaseRate() {
-    return FLAGS_tcmalloc_release_rate;
-  }
-  virtual size_t GetEstimatedAllocatedSize(size_t size) {
-    if (size <= kMaxSize) {
-      const size_t cl = Static::sizemap()->SizeClass(size);
-      const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl);
-      return alloc_size;
-    } else {
-      return tcmalloc::pages(size) << kPageShift;
-    }
-  }
-
-  // This just calls GetSizeWithCallback, but because that's in an
-  // unnamed namespace, we need to move the definition below it in the
-  // file.
-  virtual size_t GetAllocatedSize(const void* ptr);
-
-  // This duplicates some of the logic in GetSizeWithCallback, but is
-  // faster.  This is important on OS X, where this function is called
-  // on every allocation operation.
-  virtual Ownership GetOwnership(const void* ptr) {
-    const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-    // The rest of tcmalloc assumes that all allocated pointers use at
-    // most kAddressBits bits.  If ptr doesn't, then it definitely
-    // wasn't alloacted by tcmalloc.
-    if ((p >> (kAddressBits - kPageShift)) > 0) {
-      return kNotOwned;
-    }
-    size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
-    if (cl != 0) {
-      return kOwned;
-    }
-    const Span *span = Static::pageheap()->GetDescriptor(p);
-    return span ? kOwned : kNotOwned;
-  }
-
-  virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) {
-    static const char* kCentralCacheType = "tcmalloc.central";
-    static const char* kTransferCacheType = "tcmalloc.transfer";
-    static const char* kThreadCacheType = "tcmalloc.thread";
-    static const char* kPageHeapType = "tcmalloc.page";
-    static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped";
-    static const char* kLargeSpanType = "tcmalloc.large";
-    static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped";
-
-    v->clear();
-
-    // central class information
-    int64 prev_class_size = 0;
-    for (int cl = 1; cl < kNumClasses; ++cl) {
-      size_t class_size = Static::sizemap()->ByteSizeForClass(cl);
-      MallocExtension::FreeListInfo i;
-      i.min_object_size = prev_class_size + 1;
-      i.max_object_size = class_size;
-      i.total_bytes_free =
-          Static::central_cache()[cl].length() * class_size;
-      i.type = kCentralCacheType;
-      v->push_back(i);
-
-      // transfer cache
-      i.total_bytes_free =
-          Static::central_cache()[cl].tc_length() * class_size;
-      i.type = kTransferCacheType;
-      v->push_back(i);
-
-      prev_class_size = Static::sizemap()->ByteSizeForClass(cl);
-    }
-
-    // Add stats from per-thread heaps
-    uint64_t class_count[kNumClasses];
-    memset(class_count, 0, sizeof(class_count));
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      uint64_t thread_bytes = 0;
-      ThreadCache::GetThreadStats(&thread_bytes, class_count);
-    }
-
-    prev_class_size = 0;
-    for (int cl = 1; cl < kNumClasses; ++cl) {
-      MallocExtension::FreeListInfo i;
-      i.min_object_size = prev_class_size + 1;
-      i.max_object_size = Static::sizemap()->ByteSizeForClass(cl);
-      i.total_bytes_free =
-          class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
-      i.type = kThreadCacheType;
-      v->push_back(i);
-    }
-
-    // append page heap info
-    PageHeap::SmallSpanStats small;
-    PageHeap::LargeSpanStats large;
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      Static::pageheap()->GetSmallSpanStats(&small);
-      Static::pageheap()->GetLargeSpanStats(&large);
-    }
-
-    // large spans: mapped
-    MallocExtension::FreeListInfo span_info;
-    span_info.type = kLargeSpanType;
-    span_info.max_object_size = (numeric_limits<size_t>::max)();
-    span_info.min_object_size = kMaxPages << kPageShift;
-    span_info.total_bytes_free = large.normal_pages << kPageShift;
-    v->push_back(span_info);
-
-    // large spans: unmapped
-    span_info.type = kLargeUnmappedSpanType;
-    span_info.total_bytes_free = large.returned_pages << kPageShift;
-    v->push_back(span_info);
-
-    // small spans
-    for (int s = 1; s < kMaxPages; s++) {
-      MallocExtension::FreeListInfo i;
-      i.max_object_size = (s << kPageShift);
-      i.min_object_size = ((s - 1) << kPageShift);
-
-      i.type = kPageHeapType;
-      i.total_bytes_free = (s << kPageShift) * small.normal_length[s];
-      v->push_back(i);
-
-      i.type = kPageHeapUnmappedType;
-      i.total_bytes_free = (s << kPageShift) * small.returned_length[s];
-      v->push_back(i);
-    }
-  }
-};
-
-// The constructor allocates an object to ensure that initialization
-// runs before main(), and therefore we do not have a chance to become
-// multi-threaded before initialization.  We also create the TSD key
-// here.  Presumably by the time this constructor runs, glibc is in
-// good enough shape to handle pthread_key_create().
-//
-// The constructor also takes the opportunity to tell STL to use
-// tcmalloc.  We want to do this early, before construct time, so
-// all user STL allocations go through tcmalloc (which works really
-// well for STL).
-//
-// The destructor prints stats when the program exits.
-static int tcmallocguard_refcount = 0;  // no lock needed: runs before main()
-TCMallocGuard::TCMallocGuard() {
-  if (tcmallocguard_refcount++ == 0) {
-    ReplaceSystemAlloc();    // defined in libc_override_*.h
-    tc_free(tc_malloc(1));
-    ThreadCache::InitTSD();
-    tc_free(tc_malloc(1));
-    // Either we, or debugallocation.cc, or valgrind will control memory
-    // management.  We register our extension if we're the winner.
-#ifdef TCMALLOC_USING_DEBUGALLOCATION
-    // Let debugallocation register its extension.
-#else
-    if (RunningOnValgrind()) {
-      // Let Valgrind uses its own malloc (so don't register our extension).
-    } else {
-      MallocExtension::Register(new TCMallocImplementation);
-    }
-#endif
-  }
-}
-
-TCMallocGuard::~TCMallocGuard() {
-  if (--tcmallocguard_refcount == 0) {
-    const char* env = NULL;
-    if (!RunningOnValgrind()) {
-      // Valgrind uses it's own malloc so we cannot do MALLOCSTATS
-      env = getenv("MALLOCSTATS");
-    }
-    if (env != NULL) {
-      int level = atoi(env);
-      if (level < 1) level = 1;
-      PrintStats(level);
-    }
-  }
-}
-#ifndef WIN32_OVERRIDE_ALLOCATORS
-static TCMallocGuard module_enter_exit_hook;
-#endif
-
-//-------------------------------------------------------------------
-// Helpers for the exported routines below
-//-------------------------------------------------------------------
-
-static inline bool CheckCachedSizeClass(void *ptr) {
-  PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
-  return cached_value == 0 ||
-      cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
-}
-
-static inline void* CheckedMallocResult(void *result) {
-  ASSERT(result == NULL || CheckCachedSizeClass(result));
-  return result;
-}
-
-static inline void* SpanToMallocResult(Span *span) {
-  Static::pageheap()->CacheSizeClass(span->start, 0);
-  return
-      CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
-}
-
-static void* DoSampledAllocation(size_t size) {
-#ifndef NO_TCMALLOC_SAMPLES
-  // Grab the stack trace outside the heap lock
-  StackTrace tmp;
-  tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1);
-  tmp.size = size;
-
-  SpinLockHolder h(Static::pageheap_lock());
-  // Allocate span
-  Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
-  if (UNLIKELY(span == NULL)) {
-    return NULL;
-  }
-
-  // Allocate stack trace
-  StackTrace *stack = Static::stacktrace_allocator()->New();
-  if (UNLIKELY(stack == NULL)) {
-    // Sampling failed because of lack of memory
-    return span;
-  }
-  *stack = tmp;
-  span->sample = 1;
-  span->objects = stack;
-  tcmalloc::DLL_Prepend(Static::sampled_objects(), span);
-
-  return SpanToMallocResult(span);
-#else
-  abort();
-#endif
-}
-
-namespace {
-
-typedef void* (*malloc_fn)(void *arg);
-
-SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED);
-
-void* handle_oom(malloc_fn retry_fn,
-                 void* retry_arg,
-                 bool from_operator,
-                 bool nothrow) {
-  if (!from_operator && !tc_new_mode) {
-    // we're out of memory in C library function (malloc etc) and no
-    // "new mode" forced on us. Just return NULL
-    return NULL;
-  }
-  // we're OOM in operator new or "new mode" is set. We might have to
-  // call new_handle and maybe retry allocation.
-
-  for (;;) {
-    // Get the current new handler.  NB: this function is not
-    // thread-safe.  We make a feeble stab at making it so here, but
-    // this lock only protects against tcmalloc interfering with
-    // itself, not with other libraries calling set_new_handler.
-    std::new_handler nh;
-    {
-      SpinLockHolder h(&set_new_handler_lock);
-      nh = std::set_new_handler(0);
-      (void) std::set_new_handler(nh);
-    }
-#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
-    if (!nh) {
-      return NULL;
-    }
-    // Since exceptions are disabled, we don't really know if new_handler
-    // failed.  Assume it will abort if it fails.
-    (*nh)();
-#else
-    // If no new_handler is established, the allocation failed.
-    if (!nh) {
-      if (nothrow) {
-        return NULL;
-      }
-      throw std::bad_alloc();
-    }
-    // Otherwise, try the new_handler.  If it returns, retry the
-    // allocation.  If it throws std::bad_alloc, fail the allocation.
-    // if it throws something else, don't interfere.
-    try {
-      (*nh)();
-    } catch (const std::bad_alloc&) {
-      if (!nothrow) throw;
-      return NULL;
-    }
-#endif  // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
-
-    // we get here if new_handler returns successfully. So we retry
-    // allocation.
-    void* rv = retry_fn(retry_arg);
-    if (rv != NULL) {
-      return rv;
-    }
-
-    // if allocation failed again we go to next loop iteration
-  }
-}
-
-// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with
-// automatic increases factored in.
-static int64_t large_alloc_threshold =
-  (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold
-   ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold);
-
-static void ReportLargeAlloc(Length num_pages, void* result) {
-  StackTrace stack;
-  stack.depth = GetStackTrace(stack.stack, tcmalloc::kMaxStackDepth, 1);
-
-  static const int N = 1000;
-  char buffer[N];
-  TCMalloc_Printer printer(buffer, N);
-  printer.printf("tcmalloc: large alloc %" PRIu64 " bytes == %p @ ",
-                 static_cast<uint64>(num_pages) << kPageShift,
-                 result);
-  for (int i = 0; i < stack.depth; i++) {
-    printer.printf(" %p", stack.stack[i]);
-  }
-  printer.printf("\n");
-  write(STDERR_FILENO, buffer, strlen(buffer));
-}
-
-void* do_memalign(size_t align, size_t size);
-
-struct retry_memaligh_data {
-  size_t align;
-  size_t size;
-};
-
-static void *retry_do_memalign(void *arg) {
-  retry_memaligh_data *data = static_cast<retry_memaligh_data *>(arg);
-  return do_memalign(data->align, data->size);
-}
-
-static void *maybe_do_cpp_memalign_slow(size_t align, size_t size) {
-  retry_memaligh_data data;
-  data.align = align;
-  data.size = size;
-  return handle_oom(retry_do_memalign, &data,
-                    false, true);
-}
-
-inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) {
-  void *rv = do_memalign(align, size);
-  if (LIKELY(rv != NULL)) {
-    return rv;
-  }
-  return maybe_do_cpp_memalign_slow(align, size);
-}
-
-// Must be called with the page lock held.
-inline bool should_report_large(Length num_pages) {
-  const int64 threshold = large_alloc_threshold;
-  if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
-    // Increase the threshold by 1/8 every time we generate a report.
-    // We cap the threshold at 8GiB to avoid overflow problems.
-    large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
-                             ? threshold + threshold/8 : 8ll<<30);
-    return true;
-  }
-  return false;
-}
-
-// Helper for do_malloc().
-inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
-  void* result;
-  bool report_large;
-
-  Length num_pages = tcmalloc::pages(size);
-
-  // NOTE: we're passing original size here as opposed to rounded-up
-  // size as we do in do_malloc_small. The difference is small here
-  // (at most 4k out of at least 256k). And not rounding up saves us
-  // from possibility of overflow, which rounding up could produce.
-  //
-  // See https://github.com/gperftools/gperftools/issues/723
-  if (heap->SampleAllocation(size)) {
-    result = DoSampledAllocation(size);
-
-    SpinLockHolder h(Static::pageheap_lock());
-    report_large = should_report_large(num_pages);
-  } else {
-    SpinLockHolder h(Static::pageheap_lock());
-    Span* span = Static::pageheap()->New(num_pages);
-    result = (UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span));
-    report_large = should_report_large(num_pages);
-  }
-
-  if (report_large) {
-    ReportLargeAlloc(num_pages, result);
-  }
-  return result;
-}
-
-ALWAYS_INLINE void* do_malloc_small(ThreadCache* heap, size_t size) {
-  ASSERT(Static::IsInited());
-  ASSERT(heap != NULL);
-  size_t cl = Static::sizemap()->SizeClass(size);
-  size = Static::sizemap()->class_to_size(cl);
-
-  if (UNLIKELY(heap->SampleAllocation(size))) {
-    return DoSampledAllocation(size);
-  } else {
-    // The common case, and also the simplest.  This just pops the
-    // size-appropriate freelist, after replenishing it if it's empty.
-    return CheckedMallocResult(heap->Allocate(size, cl));
-  }
-}
-
-ALWAYS_INLINE void* do_malloc(size_t size) {
-  if (ThreadCache::have_tls) {
-    if (LIKELY(size < ThreadCache::MinSizeForSlowPath())) {
-      return do_malloc_small(ThreadCache::GetCacheWhichMustBePresent(), size);
-    }
-    if (UNLIKELY(ThreadCache::IsUseEmergencyMalloc())) {
-      return tcmalloc::EmergencyMalloc(size);
-    }
-  }
-
-  if (size <= kMaxSize) {
-    return do_malloc_small(ThreadCache::GetCache(), size);
-  } else {
-    return do_malloc_pages(ThreadCache::GetCache(), size);
-  }
-}
-
-static void *retry_malloc(void* size) {
-  return do_malloc(reinterpret_cast<size_t>(size));
-}
-
-ALWAYS_INLINE void* do_malloc_or_cpp_alloc(size_t size) {
-  void *rv = do_malloc(size);
-  if (LIKELY(rv != NULL)) {
-    return rv;
-  }
-  return handle_oom(retry_malloc, reinterpret_cast<void *>(size),
-                    false, true);
-}
-
-ALWAYS_INLINE void* do_calloc(size_t n, size_t elem_size) {
-  // Overflow check
-  const size_t size = n * elem_size;
-  if (elem_size != 0 && size / elem_size != n) return NULL;
-
-  void* result = do_malloc_or_cpp_alloc(size);
-  if (result != NULL) {
-    memset(result, 0, size);
-  }
-  return result;
-}
-
-// If ptr is NULL, do nothing.  Otherwise invoke the given function.
-inline void free_null_or_invalid(void* ptr, void (*invalid_free_fn)(void*)) {
-  if (ptr != NULL) {
-    (*invalid_free_fn)(ptr);
-  }
-}
-
-// Helper for do_free_with_callback(), below.  Inputs:
-//   ptr is object to be freed
-//   invalid_free_fn is a function that gets invoked on certain "bad frees"
-//   heap is the ThreadCache for this thread, or NULL if it isn't known
-//   heap_must_be_valid is whether heap is known to be non-NULL
-//
-// This function may only be used after Static::IsInited() is true.
-//
-// We can usually detect the case where ptr is not pointing to a page that
-// tcmalloc is using, and in those cases we invoke invalid_free_fn.
-//
-// To maximize speed in the common case, we usually get here with
-// heap_must_be_valid being a manifest constant equal to true.
-ALWAYS_INLINE void do_free_helper(void* ptr,
-                                  void (*invalid_free_fn)(void*),
-                                  ThreadCache* heap,
-                                  bool heap_must_be_valid,
-                                  bool use_hint,
-                                  size_t size_hint) {
-  ASSERT((Static::IsInited() && heap != NULL) || !heap_must_be_valid);
-  if (!heap_must_be_valid && !Static::IsInited()) {
-    // We called free() before malloc().  This can occur if the
-    // (system) malloc() is called before tcmalloc is loaded, and then
-    // free() is called after tcmalloc is loaded (and tc_free has
-    // replaced free), but before the global constructor has run that
-    // sets up the tcmalloc data structures.
-    free_null_or_invalid(ptr, invalid_free_fn);
-    return;
-  }
-  Span* span = NULL;
-  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  size_t cl;
-  if (use_hint && Static::sizemap()->MaybeSizeClass(size_hint, &cl)) {
-    goto non_zero;
-  }
-
-  cl = Static::pageheap()->GetSizeClassIfCached(p);
-  if (UNLIKELY(cl == 0)) {
-    span = Static::pageheap()->GetDescriptor(p);
-    if (UNLIKELY(!span)) {
-      // span can be NULL because the pointer passed in is NULL or invalid
-      // (not something returned by malloc or friends), or because the
-      // pointer was allocated with some other allocator besides
-      // tcmalloc.  The latter can happen if tcmalloc is linked in via
-      // a dynamic library, but is not listed last on the link line.
-      // In that case, libraries after it on the link line will
-      // allocate with libc malloc, but free with tcmalloc's free.
-      free_null_or_invalid(ptr, invalid_free_fn);
-      return;
-    }
-    cl = span->sizeclass;
-    Static::pageheap()->CacheSizeClass(p, cl);
-  }
-
-  ASSERT(ptr != NULL);
-  if (LIKELY(cl != 0)) {
-  non_zero:
-    ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
-    if (heap_must_be_valid || heap != NULL) {
-      heap->Deallocate(ptr, cl);
-    } else {
-      // Delete directly into central cache
-      tcmalloc::SLL_SetNext(ptr, NULL);
-      Static::central_cache()[cl].InsertRange(ptr, ptr, 1);
-    }
-  } else {
-    SpinLockHolder h(Static::pageheap_lock());
-    ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
-    ASSERT(span != NULL && span->start == p);
-    if (span->sample) {
-      StackTrace* st = reinterpret_cast<StackTrace*>(span->objects);
-      tcmalloc::DLL_Remove(span);
-      Static::stacktrace_allocator()->Delete(st);
-      span->objects = NULL;
-    }
-    Static::pageheap()->Delete(span);
-  }
-}
-
-// Helper for the object deletion (free, delete, etc.).  Inputs:
-//   ptr is object to be freed
-//   invalid_free_fn is a function that gets invoked on certain "bad frees"
-//
-// We can usually detect the case where ptr is not pointing to a page that
-// tcmalloc is using, and in those cases we invoke invalid_free_fn.
-ALWAYS_INLINE void do_free_with_callback(void* ptr,
-                                         void (*invalid_free_fn)(void*),
-                                         bool use_hint, size_t size_hint) {
-  ThreadCache* heap = NULL;
-  heap = ThreadCache::GetCacheIfPresent();
-  if (LIKELY(heap)) {
-    do_free_helper(ptr, invalid_free_fn, heap, true, use_hint, size_hint);
-  } else {
-    do_free_helper(ptr, invalid_free_fn, heap, false, use_hint, size_hint);
-  }
-}
-
-// The default "do_free" that uses the default callback.
-ALWAYS_INLINE void do_free(void* ptr) {
-  return do_free_with_callback(ptr, &InvalidFree, false, 0);
-}
-
-// NOTE: some logic here is duplicated in GetOwnership (above), for
-// speed.  If you change this function, look at that one too.
-inline size_t GetSizeWithCallback(const void* ptr,
-                                  size_t (*invalid_getsize_fn)(const void*)) {
-  if (ptr == NULL)
-    return 0;
-  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
-  if (cl != 0) {
-    return Static::sizemap()->ByteSizeForClass(cl);
-  } else {
-    const Span *span = Static::pageheap()->GetDescriptor(p);
-    if (UNLIKELY(span == NULL)) {  // means we do not own this memory
-      return (*invalid_getsize_fn)(ptr);
-    } else if (span->sizeclass != 0) {
-      Static::pageheap()->CacheSizeClass(p, span->sizeclass);
-      return Static::sizemap()->ByteSizeForClass(span->sizeclass);
-    } else {
-      return span->length << kPageShift;
-    }
-  }
-}
-
-// This lets you call back to a given function pointer if ptr is invalid.
-// It is used primarily by windows code which wants a specialized callback.
-ALWAYS_INLINE void* do_realloc_with_callback(
-    void* old_ptr, size_t new_size,
-    void (*invalid_free_fn)(void*),
-    size_t (*invalid_get_size_fn)(const void*)) {
-  // Get the size of the old entry
-  const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
-
-  // Reallocate if the new size is larger than the old size,
-  // or if the new size is significantly smaller than the old size.
-  // We do hysteresis to avoid resizing ping-pongs:
-  //    . If we need to grow, grow to max(new_size, old_size * 1.X)
-  //    . Don't shrink unless new_size < old_size * 0.Y
-  // X and Y trade-off time for wasted space.  For now we do 1.25 and 0.5.
-  const size_t lower_bound_to_grow = old_size + old_size / 4ul;
-  const size_t upper_bound_to_shrink = old_size / 2ul;
-  if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) {
-    // Need to reallocate.
-    void* new_ptr = NULL;
-
-    if (new_size > old_size && new_size < lower_bound_to_grow) {
-      new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
-    }
-    if (new_ptr == NULL) {
-      // Either new_size is not a tiny increment, or last do_malloc failed.
-      new_ptr = do_malloc_or_cpp_alloc(new_size);
-    }
-    if (UNLIKELY(new_ptr == NULL)) {
-      return NULL;
-    }
-    MallocHook::InvokeNewHook(new_ptr, new_size);
-    memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
-    MallocHook::InvokeDeleteHook(old_ptr);
-    // We could use a variant of do_free() that leverages the fact
-    // that we already know the sizeclass of old_ptr.  The benefit
-    // would be small, so don't bother.
-    do_free_with_callback(old_ptr, invalid_free_fn, false, 0);
-    return new_ptr;
-  } else {
-    // We still need to call hooks to report the updated size:
-    MallocHook::InvokeDeleteHook(old_ptr);
-    MallocHook::InvokeNewHook(old_ptr, new_size);
-    return old_ptr;
-  }
-}
-
-ALWAYS_INLINE void* do_realloc(void* old_ptr, size_t new_size) {
-  return do_realloc_with_callback(old_ptr, new_size,
-                                  &InvalidFree, &InvalidGetSizeForRealloc);
-}
-
-// For use by exported routines below that want specific alignments
-//
-// Note: this code can be slow for alignments > 16, and can
-// significantly fragment memory.  The expectation is that
-// memalign/posix_memalign/valloc/pvalloc will not be invoked very
-// often.  This requirement simplifies our implementation and allows
-// us to tune for expected allocation patterns.
-void* do_memalign(size_t align, size_t size) {
-  ASSERT((align & (align - 1)) == 0);
-  ASSERT(align > 0);
-  if (size + align < size) return NULL;         // Overflow
-
-  // Fall back to malloc if we would already align this memory access properly.
-  if (align <= AlignmentForSize(size)) {
-    void* p = do_malloc(size);
-    ASSERT((reinterpret_cast<uintptr_t>(p) % align) == 0);
-    return p;
-  }
-
-  if (UNLIKELY(Static::pageheap() == NULL)) ThreadCache::InitModule();
-
-  // Allocate at least one byte to avoid boundary conditions below
-  if (size == 0) size = 1;
-
-  if (size <= kMaxSize && align < kPageSize) {
-    // Search through acceptable size classes looking for one with
-    // enough alignment.  This depends on the fact that
-    // InitSizeClasses() currently produces several size classes that
-    // are aligned at powers of two.  We will waste time and space if
-    // we miss in the size class array, but that is deemed acceptable
-    // since memalign() should be used rarely.
-    int cl = Static::sizemap()->SizeClass(size);
-    while (cl < kNumClasses &&
-           ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) {
-      cl++;
-    }
-    if (cl < kNumClasses) {
-      ThreadCache* heap = ThreadCache::GetCache();
-      size = Static::sizemap()->class_to_size(cl);
-      return CheckedMallocResult(heap->Allocate(size, cl));
-    }
-  }
-
-  // We will allocate directly from the page heap
-  SpinLockHolder h(Static::pageheap_lock());
-
-  if (align <= kPageSize) {
-    // Any page-level allocation will be fine
-    // TODO: We could put the rest of this page in the appropriate
-    // TODO: cache but it does not seem worth it.
-    Span* span = Static::pageheap()->New(tcmalloc::pages(size));
-    return UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span);
-  }
-
-  // Allocate extra pages and carve off an aligned portion
-  const Length alloc = tcmalloc::pages(size + align);
-  Span* span = Static::pageheap()->New(alloc);
-  if (UNLIKELY(span == NULL)) return NULL;
-
-  // Skip starting portion so that we end up aligned
-  Length skip = 0;
-  while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
-    skip++;
-  }
-  ASSERT(skip < alloc);
-  if (skip > 0) {
-    Span* rest = Static::pageheap()->Split(span, skip);
-    Static::pageheap()->Delete(span);
-    span = rest;
-  }
-
-  // Skip trailing portion that we do not need to return
-  const Length needed = tcmalloc::pages(size);
-  ASSERT(span->length >= needed);
-  if (span->length > needed) {
-    Span* trailer = Static::pageheap()->Split(span, needed);
-    Static::pageheap()->Delete(trailer);
-  }
-  return SpanToMallocResult(span);
-}
-
-// Helpers for use by exported routines below:
-
-inline void do_malloc_stats() {
-  PrintStats(1);
-}
-
-inline int do_mallopt(int cmd, int value) {
-  return 1;     // Indicates error
-}
-
-#ifdef HAVE_STRUCT_MALLINFO
-inline struct mallinfo do_mallinfo() {
-  TCMallocStats stats;
-  ExtractStats(&stats, NULL, NULL, NULL);
-
-  // Just some of the fields are filled in.
-  struct mallinfo info;
-  memset(&info, 0, sizeof(info));
-
-  // Unfortunately, the struct contains "int" field, so some of the
-  // size values will be truncated.
-  info.arena     = static_cast<int>(stats.pageheap.system_bytes);
-  info.fsmblks   = static_cast<int>(stats.thread_bytes
-                                    + stats.central_bytes
-                                    + stats.transfer_bytes);
-  info.fordblks  = static_cast<int>(stats.pageheap.free_bytes +
-                                    stats.pageheap.unmapped_bytes);
-  info.uordblks  = static_cast<int>(stats.pageheap.system_bytes
-                                    - stats.thread_bytes
-                                    - stats.central_bytes
-                                    - stats.transfer_bytes
-                                    - stats.pageheap.free_bytes
-                                    - stats.pageheap.unmapped_bytes);
-
-  return info;
-}
-#endif  // HAVE_STRUCT_MALLINFO
-
-inline void* cpp_alloc(size_t size, bool nothrow) {
-  void* p = do_malloc(size);
-  if (LIKELY(p)) {
-    return p;
-  }
-  return handle_oom(retry_malloc, reinterpret_cast<void *>(size),
-                    true, nothrow);
-}
-
-}  // end unnamed namespace
-
-// As promised, the definition of this function, declared above.
-size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) {
-  if (ptr == NULL)
-    return 0;
-  ASSERT(TCMallocImplementation::GetOwnership(ptr)
-         != TCMallocImplementation::kNotOwned);
-  return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
-}
-
-void TCMallocImplementation::MarkThreadBusy() {
-  // Allocate to force the creation of a thread cache, but avoid
-  // invoking any hooks.
-  do_free(do_malloc(0));
-}
-
-//-------------------------------------------------------------------
-// Exported routines
-//-------------------------------------------------------------------
-
-extern "C" PERFTOOLS_DLL_DECL const char* tc_version(
-    int* major, int* minor, const char** patch) PERFTOOLS_THROW {
-  if (major) *major = TC_VERSION_MAJOR;
-  if (minor) *minor = TC_VERSION_MINOR;
-  if (patch) *patch = TC_VERSION_PATCH;
-  return TC_VERSION_STRING;
-}
-
-// This function behaves similarly to MSVC's _set_new_mode.
-// If flag is 0 (default), calls to malloc will behave normally.
-// If flag is 1, calls to malloc will behave like calls to new,
-// and the std_new_handler will be invoked on failure.
-// Returns the previous mode.
-extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW {
-  int old_mode = tc_new_mode;
-  tc_new_mode = flag;
-  return old_mode;
-}
-
-#ifndef TCMALLOC_USING_DEBUGALLOCATION  // debugallocation.cc defines its own
-
-#if defined(__GNUC__) && defined(__ELF__) && !defined(TCMALLOC_NO_ALIASES)
-#define TC_ALIAS(name) __attribute__((alias(#name)))
-#endif
-
-// CAVEAT: The code structure below ensures that MallocHook methods are always
-//         called from the stack frame of the invoked allocation function.
-//         heap-checker.cc depends on this to start a stack trace from
-//         the call to the (de)allocation function.
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW {
-  void* result = do_malloc_or_cpp_alloc(size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(ptr);
-  do_free(ptr);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW {
-  if ((reinterpret_cast<uintptr_t>(ptr) & (kPageSize-1)) == 0) {
-    tc_free(ptr);
-    return;
-  }
-  MallocHook::InvokeDeleteHook(ptr);
-  do_free_with_callback(ptr, &InvalidFree, true, size);
-}
-
-#ifdef TC_ALIAS
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void *p, size_t size) throw()
-  TC_ALIAS(tc_free_sized);
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void *p, size_t size) throw()
-  TC_ALIAS(tc_free_sized);
-
-#else
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void *p, size_t size) throw() {
-  tc_free_sized(p, size);
-}
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void *p, size_t size) throw() {
-  tc_free_sized(p, size);
-}
-
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n,
-                                              size_t elem_size) PERFTOOLS_THROW {
-  if (ThreadCache::IsUseEmergencyMalloc()) {
-    return tcmalloc::EmergencyCalloc(n, elem_size);
-  }
-  void* result = do_calloc(n, elem_size);
-  MallocHook::InvokeNewHook(result, n * elem_size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(ptr);
-  do_free(ptr);
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr,
-                                               size_t new_size) PERFTOOLS_THROW {
-  if (old_ptr == NULL) {
-    void* result = do_malloc_or_cpp_alloc(new_size);
-    MallocHook::InvokeNewHook(result, new_size);
-    return result;
-  }
-  if (new_size == 0) {
-    MallocHook::InvokeDeleteHook(old_ptr);
-    do_free(old_ptr);
-    return NULL;
-  }
-  if (UNLIKELY(tcmalloc::IsEmergencyPtr(old_ptr))) {
-    return tcmalloc::EmergencyRealloc(old_ptr, new_size);
-  }
-  return do_realloc(old_ptr, new_size);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) {
-  void* p = cpp_alloc(size, false);
-  // We keep this next instruction out of cpp_alloc for a reason: when
-  // it's in, and new just calls cpp_alloc, the optimizer may fold the
-  // new call into cpp_alloc, which messes up our whole section-based
-  // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
-  // isn't the last thing this fn calls, and prevents the folding.
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW {
-  void* p = cpp_alloc(size, true);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(p);
-  do_free(p);
-}
-#endif
-
-// Standard C++ library implementations define and use this
-// (via ::operator delete(ptr, nothrow)).
-// But it's really the same as normal delete, so we just do the same thing.
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(p);
-  do_free(p);
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size)
-#ifdef TC_ALIAS
-TC_ALIAS(tc_new);
-#else
-{
-  void* p = cpp_alloc(size, false);
-  // We keep this next instruction out of cpp_alloc for a reason: when
-  // it's in, and new just calls cpp_alloc, the optimizer may fold the
-  // new call into cpp_alloc, which messes up our whole section-based
-  // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
-  // isn't the last thing this fn calls, and prevents the folding.
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&)
-    PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_new_nothrow);
-#else
-{
-  void* p = cpp_alloc(size, true);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(p);
-  do_free(p);
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(p);
-  do_free(p);
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align,
-                                                size_t size) PERFTOOLS_THROW {
-  void* result = do_memalign_or_cpp_memalign(align, size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(
-    void** result_ptr, size_t align, size_t size) PERFTOOLS_THROW {
-  if (((align % sizeof(void*)) != 0) ||
-      ((align & (align - 1)) != 0) ||
-      (align == 0)) {
-    return EINVAL;
-  }
-
-  void* result = do_memalign_or_cpp_memalign(align, size);
-  MallocHook::InvokeNewHook(result, size);
-  if (UNLIKELY(result == NULL)) {
-    return ENOMEM;
-  } else {
-    *result_ptr = result;
-    return 0;
-  }
-}
-
-static size_t pagesize = 0;
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) PERFTOOLS_THROW {
-  // Allocate page-aligned object of length >= size bytes
-  if (pagesize == 0) pagesize = getpagesize();
-  void* result = do_memalign_or_cpp_memalign(pagesize, size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) PERFTOOLS_THROW {
-  // Round up size to a multiple of pagesize
-  if (pagesize == 0) pagesize = getpagesize();
-  if (size == 0) {     // pvalloc(0) should allocate one page, according to
-    size = pagesize;   // http://man.free4web.biz/man3/libmpatrol.3.html
-  }
-  size = (size + pagesize - 1) & ~(pagesize - 1);
-  void* result = do_memalign_or_cpp_memalign(pagesize, size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW {
-  do_malloc_stats();
-}
-
-extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW {
-  return do_mallopt(cmd, value);
-}
-
-#ifdef HAVE_STRUCT_MALLINFO
-extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW {
-  return do_mallinfo();
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW {
-  return MallocExtension::instance()->GetAllocatedSize(ptr);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size)  PERFTOOLS_THROW {
-  void* result = do_malloc(size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-#pragma GCC diagnostic pop
-
-#endif  // TCMALLOC_USING_DEBUGALLOCATION
diff --git a/contrib/libtcmalloc/src/tcmalloc.h b/contrib/libtcmalloc/src/tcmalloc.h
deleted file mode 100644
index 70d567268c2..00000000000
--- a/contrib/libtcmalloc/src/tcmalloc.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Some obscure memory-allocation routines may not be declared on all
-// systems.  In those cases, we'll just declare them ourselves.
-// This file is meant to be used only internally, for unittests.
-
-#include "config.h"
-
-#ifndef _XOPEN_SOURCE
-# define _XOPEN_SOURCE 600  // for posix_memalign
-#endif
-#include <stdlib.h>         // for posix_memalign
-// FreeBSD has malloc.h, but complains if you use it
-#if defined(HAVE_MALLOC_H) && !defined(__FreeBSD__)
-#include <malloc.h>         // for memalign, valloc, pvalloc
-#endif
-
-// __THROW is defined in glibc systems.  It means, counter-intuitively,
-// "This function will never throw an exception."  It's an optional
-// optimization tool, but we may need to use it to match glibc prototypes.
-#ifndef __THROW    // I guess we're not on a glibc system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-#if !HAVE_CFREE_SYMBOL
-extern "C" void cfree(void* ptr) __THROW;
-#endif
-#if !HAVE_POSIX_MEMALIGN_SYMBOL
-extern "C" int posix_memalign(void** ptr, size_t align, size_t size) __THROW;
-#endif
-#if !HAVE_MEMALIGN_SYMBOL
-extern "C" void* memalign(size_t __alignment, size_t __size) __THROW;
-#endif
-#if !HAVE_VALLOC_SYMBOL
-extern "C" void* valloc(size_t __size) __THROW;
-#endif
-#if !HAVE_PVALLOC_SYMBOL
-extern "C" void* pvalloc(size_t __size) __THROW;
-#endif
diff --git a/contrib/libtcmalloc/src/tcmalloc_guard.h b/contrib/libtcmalloc/src/tcmalloc_guard.h
deleted file mode 100644
index 84952bac2ea..00000000000
--- a/contrib/libtcmalloc/src/tcmalloc_guard.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein
-//
-// We expose the TCMallocGuard class -- which initializes the tcmalloc
-// allocator -- so classes that need to be sure tcmalloc is loaded
-// before they do stuff -- notably heap-profiler -- can.  To use this
-// create a static TCMallocGuard instance at the top of a file where
-// you need tcmalloc to be initialized before global constructors run.
-
-#ifndef TCMALLOC_TCMALLOC_GUARD_H_
-#define TCMALLOC_TCMALLOC_GUARD_H_
-
-class TCMallocGuard {
- public:
-  TCMallocGuard();
-  ~TCMallocGuard();
-};
-
-#endif  // TCMALLOC_TCMALLOC_GUARD_H_
diff --git a/contrib/libtcmalloc/src/third_party/valgrind.h b/contrib/libtcmalloc/src/third_party/valgrind.h
deleted file mode 100644
index 577c59ab0cd..00000000000
--- a/contrib/libtcmalloc/src/third_party/valgrind.h
+++ /dev/null
@@ -1,3924 +0,0 @@
-/* -*- c -*-
-   ----------------------------------------------------------------
-
-   Notice that the following BSD-style license applies to this one
-   file (valgrind.h) only.  The rest of Valgrind is licensed under the
-   terms of the GNU General Public License, version 2, unless
-   otherwise indicated.  See the COPYING file in the source
-   distribution for details.
-
-   ----------------------------------------------------------------
-
-   This file is part of Valgrind, a dynamic binary instrumentation
-   framework.
-
-   Copyright (C) 2000-2008 Julian Seward.  All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-   1. Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-   2. The origin of this software must not be misrepresented; you must 
-      not claim that you wrote the original software.  If you use this 
-      software in a product, an acknowledgment in the product 
-      documentation would be appreciated but is not required.
-
-   3. Altered source versions must be plainly marked as such, and must
-      not be misrepresented as being the original software.
-
-   4. The name of the author may not be used to endorse or promote 
-      products derived from this software without specific prior written 
-      permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
-   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   ----------------------------------------------------------------
-
-   Notice that the above BSD-style license applies to this one file
-   (valgrind.h) only.  The entire rest of Valgrind is licensed under
-   the terms of the GNU General Public License, version 2.  See the
-   COPYING file in the source distribution for details.
-
-   ---------------------------------------------------------------- 
-*/
-
-
-/* This file is for inclusion into client (your!) code.
-
-   You can use these macros to manipulate and query Valgrind's 
-   execution inside your own programs.
-
-   The resulting executables will still run without Valgrind, just a
-   little bit more slowly than they otherwise would, but otherwise
-   unchanged.  When not running on valgrind, each client request
-   consumes very few (eg. 7) instructions, so the resulting performance
-   loss is negligible unless you plan to execute client requests
-   millions of times per second.  Nevertheless, if that is still a
-   problem, you can compile with the NVALGRIND symbol defined (gcc
-   -DNVALGRIND) so that client requests are not even compiled in.  */
-
-#ifndef __VALGRIND_H
-#define __VALGRIND_H
-
-#include <stdarg.h>
-
-/* Nb: this file might be included in a file compiled with -ansi.  So
-   we can't use C++ style "//" comments nor the "asm" keyword (instead
-   use "__asm__"). */
-
-/* Derive some tags indicating what the target platform is.  Note
-   that in this file we're using the compiler's CPP symbols for
-   identifying architectures, which are different to the ones we use
-   within the rest of Valgrind.  Note, __powerpc__ is active for both
-   32 and 64-bit PPC, whereas __powerpc64__ is only active for the
-   latter (on Linux, that is). */
-#undef PLAT_x86_linux
-#undef PLAT_amd64_linux
-#undef PLAT_ppc32_linux
-#undef PLAT_ppc64_linux
-#undef PLAT_ppc32_aix5
-#undef PLAT_ppc64_aix5
-
-#if !defined(_AIX) && defined(__i386__)
-#  define PLAT_x86_linux 1
-#elif !defined(_AIX) && defined(__x86_64__)
-#  define PLAT_amd64_linux 1
-#elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__)
-#  define PLAT_ppc32_linux 1
-#elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__)
-#  define PLAT_ppc64_linux 1
-#elif defined(_AIX) && defined(__64BIT__)
-#  define PLAT_ppc64_aix5 1
-#elif defined(_AIX) && !defined(__64BIT__)
-#  define PLAT_ppc32_aix5 1
-#endif
-
-
-/* If we're not compiling for our target platform, don't generate
-   any inline asms.  */
-#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \
-    && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \
-    && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5)
-#  if !defined(NVALGRIND)
-#    define NVALGRIND 1
-#  endif
-#endif
-
-
-/* ------------------------------------------------------------------ */
-/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS.  There is nothing */
-/* in here of use to end-users -- skip to the next section.           */
-/* ------------------------------------------------------------------ */
-
-#if defined(NVALGRIND)
-
-/* Define NVALGRIND to completely remove the Valgrind magic sequence
-   from the compiled code (analogous to NDEBUG's effects on
-   assert()) */
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-   {                                                              \
-      (_zzq_rlval) = (_zzq_default);                              \
-   }
-
-#else  /* ! NVALGRIND */
-
-/* The following defines the magic code sequences which the JITter
-   spots and handles magically.  Don't look too closely at them as
-   they will rot your brain.
-
-   The assembly code sequences for all architectures is in this one
-   file.  This is because this file must be stand-alone, and we don't
-   want to have multiple files.
-
-   For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default
-   value gets put in the return slot, so that everything works when
-   this is executed not under Valgrind.  Args are passed in a memory
-   block, and so there's no intrinsic limit to the number that could
-   be passed, but it's currently five.
-   
-   The macro args are: 
-      _zzq_rlval    result lvalue
-      _zzq_default  default value (result returned when running on real CPU)
-      _zzq_request  request code
-      _zzq_arg1..5  request params
-
-   The other two macros are used to support function wrapping, and are
-   a lot simpler.  VALGRIND_GET_NR_CONTEXT returns the value of the
-   guest's NRADDR pseudo-register and whatever other information is
-   needed to safely run the call original from the wrapper: on
-   ppc64-linux, the R2 value at the divert point is also needed.  This
-   information is abstracted into a user-visible type, OrigFn.
-
-   VALGRIND_CALL_NOREDIR_* behaves the same as the following on the
-   guest, but guarantees that the branch instruction will not be
-   redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64:
-   branch-and-link-to-r11.  VALGRIND_CALL_NOREDIR is just text, not a
-   complete inline asm, since it needs to be combined with more magic
-   inline asm stuff to be useful.
-*/
-
-/* ------------------------- x86-linux ------------------------- */
-
-#if defined(PLAT_x86_linux)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "roll $3,  %%edi ; roll $13, %%edi\n\t"      \
-                     "roll $29, %%edi ; roll $19, %%edi\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-  { volatile unsigned int _zzq_args[6];                           \
-    volatile unsigned int _zzq_result;                            \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %EDX = client_request ( %EAX ) */         \
-                     "xchgl %%ebx,%%ebx"                          \
-                     : "=d" (_zzq_result)                         \
-                     : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    volatile unsigned int __addr;                                 \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %EAX = guest_NRADDR */                    \
-                     "xchgl %%ecx,%%ecx"                          \
-                     : "=a" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_CALL_NOREDIR_EAX                                 \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* call-noredir *%EAX */                     \
-                     "xchgl %%edx,%%edx\n\t"
-#endif /* PLAT_x86_linux */
-
-/* ------------------------ amd64-linux ------------------------ */
-
-#if defined(PLAT_amd64_linux)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rolq $3,  %%rdi ; rolq $13, %%rdi\n\t"      \
-                     "rolq $61, %%rdi ; rolq $51, %%rdi\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-  { volatile unsigned long long int _zzq_args[6];                 \
-    volatile unsigned long long int _zzq_result;                  \
-    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
-    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %RDX = client_request ( %RAX ) */         \
-                     "xchgq %%rbx,%%rbx"                          \
-                     : "=d" (_zzq_result)                         \
-                     : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    volatile unsigned long long int __addr;                       \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %RAX = guest_NRADDR */                    \
-                     "xchgq %%rcx,%%rcx"                          \
-                     : "=a" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_CALL_NOREDIR_RAX                                 \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* call-noredir *%RAX */                     \
-                     "xchgq %%rdx,%%rdx\n\t"
-#endif /* PLAT_amd64_linux */
-
-/* ------------------------ ppc32-linux ------------------------ */
-
-#if defined(PLAT_ppc32_linux)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned int  _zzq_args[6];                          \
-             unsigned int  _zzq_result;                           \
-             unsigned int* _zzq_ptr;                              \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 3,%1\n\t" /*default*/                    \
-                     "mr 4,%2\n\t" /*ptr*/                        \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"     /*result*/                     \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_default), "b" (_zzq_ptr)         \
-                     : "cc", "memory", "r3", "r4");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    unsigned int __addr;                                          \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory", "r3"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-#endif /* PLAT_ppc32_linux */
-
-/* ------------------------ ppc64-linux ------------------------ */
-
-#if defined(PLAT_ppc64_linux)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-      unsigned long long int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
-                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned long long int  _zzq_args[6];                \
-    register unsigned long long int  _zzq_result __asm__("r3");   \
-    register unsigned long long int* _zzq_ptr __asm__("r4");      \
-    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
-    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1"                                   \
-                     : "=r" (_zzq_result)                         \
-                     : "0" (_zzq_default), "r" (_zzq_ptr)         \
-                     : "cc", "memory");                           \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned long long int __addr __asm__("r3");         \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2"                                   \
-                     : "=r" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4"                                   \
-                     : "=r" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc64_linux */
-
-/* ------------------------ ppc32-aix5 ------------------------- */
-
-#if defined(PLAT_ppc32_aix5)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-      unsigned int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned int  _zzq_args[7];                          \
-    register unsigned int  _zzq_result;                           \
-    register unsigned int* _zzq_ptr;                              \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    _zzq_args[6] = (unsigned int)(_zzq_default);                  \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 4,%1\n\t"                                \
-                     "lwz 3, 24(4)\n\t"                           \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_ptr)                             \
-                     : "r3", "r4", "cc", "memory");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned int __addr;                                 \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc32_aix5 */
-
-/* ------------------------ ppc64-aix5 ------------------------- */
-
-#if defined(PLAT_ppc64_aix5)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-      unsigned long long int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
-                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned long long int  _zzq_args[7];                \
-    register unsigned long long int  _zzq_result;                 \
-    register unsigned long long int* _zzq_ptr;                    \
-    _zzq_args[0] = (unsigned int long long)(_zzq_request);        \
-    _zzq_args[1] = (unsigned int long long)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned int long long)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned int long long)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned int long long)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned int long long)(_zzq_arg5);           \
-    _zzq_args[6] = (unsigned int long long)(_zzq_default);        \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 4,%1\n\t"                                \
-                     "ld 3, 48(4)\n\t"                            \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_ptr)                             \
-                     : "r3", "r4", "cc", "memory");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned long long int __addr;                       \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc64_aix5 */
-
-/* Insert assembly code for other platforms here... */
-
-#endif /* NVALGRIND */
-
-
-/* ------------------------------------------------------------------ */
-/* PLATFORM SPECIFICS for FUNCTION WRAPPING.  This is all very        */
-/* ugly.  It's the least-worst tradeoff I can think of.               */
-/* ------------------------------------------------------------------ */
-
-/* This section defines magic (a.k.a appalling-hack) macros for doing
-   guaranteed-no-redirection macros, so as to get from function
-   wrappers to the functions they are wrapping.  The whole point is to
-   construct standard call sequences, but to do the call itself with a
-   special no-redirect call pseudo-instruction that the JIT
-   understands and handles specially.  This section is long and
-   repetitious, and I can't see a way to make it shorter.
-
-   The naming scheme is as follows:
-
-      CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc}
-
-   'W' stands for "word" and 'v' for "void".  Hence there are
-   different macros for calling arity 0, 1, 2, 3, 4, etc, functions,
-   and for each, the possibility of returning a word-typed result, or
-   no result.
-*/
-
-/* Use these to write the name of your wrapper.  NOTE: duplicates
-   VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */
-
-#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname)                    \
-   _vgwZU_##soname##_##fnname
-
-#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname)                    \
-   _vgwZZ_##soname##_##fnname
-
-/* Use this macro from within a wrapper function to collect the
-   context (address and possibly other info) of the original function.
-   Once you have that you can then use it in one of the CALL_FN_
-   macros.  The type of the argument _lval is OrigFn. */
-#define VALGRIND_GET_ORIG_FN(_lval)  VALGRIND_GET_NR_CONTEXT(_lval)
-
-/* Derivatives of the main macros below, for calling functions
-   returning void. */
-
-#define CALL_FN_v_v(fnptr)                                        \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_v(_junk,fnptr); } while (0)
-
-#define CALL_FN_v_W(fnptr, arg1)                                  \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_W(_junk,fnptr,arg1); } while (0)
-
-#define CALL_FN_v_WW(fnptr, arg1,arg2)                            \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0)
-
-#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3)                      \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0)
-
-/* ------------------------- x86-linux ------------------------- */
-
-#if defined(PLAT_x86_linux)
-
-/* These regs are trashed by the hidden call.  No need to mention eax
-   as gcc can already see that, plus causes gcc to bomb. */
-#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx"
-
-/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned
-   long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      __asm__ volatile(                                           \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $4, %%esp\n"                                       \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      __asm__ volatile(                                           \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $8, %%esp\n"                                       \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      __asm__ volatile(                                           \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $12, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      __asm__ volatile(                                           \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $16, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      __asm__ volatile(                                           \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $20, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      __asm__ volatile(                                           \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $24, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      __asm__ volatile(                                           \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $28, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      __asm__ volatile(                                           \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $32, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      __asm__ volatile(                                           \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $36, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      __asm__ volatile(                                           \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $40, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
-                                  arg6,arg7,arg8,arg9,arg10,      \
-                                  arg11)                          \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      __asm__ volatile(                                           \
-         "pushl 44(%%eax)\n\t"                                    \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $44, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
-                                  arg6,arg7,arg8,arg9,arg10,      \
-                                  arg11,arg12)                    \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      _argvec[12] = (unsigned long)(arg12);                       \
-      __asm__ volatile(                                           \
-         "pushl 48(%%eax)\n\t"                                    \
-         "pushl 44(%%eax)\n\t"                                    \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $48, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_x86_linux */
-
-/* ------------------------ amd64-linux ------------------------ */
-
-#if defined(PLAT_amd64_linux)
-
-/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi",       \
-                            "rdi", "r8", "r9", "r10", "r11"
-
-/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned
-   long) == 8. */
-
-/* NB 9 Sept 07.  There is a nasty kludge here in all these CALL_FN_
-   macros.  In order not to trash the stack redzone, we need to drop
-   %rsp by 128 before the hidden call, and restore afterwards.  The
-   nastyness is that it is only by luck that the stack still appears
-   to be unwindable during the hidden call - since then the behaviour
-   of any routine using this macro does not match what the CFI data
-   says.  Sigh.
-
-   Why is this important?  Imagine that a wrapper has a stack
-   allocated local, and passes to the hidden call, a pointer to it.
-   Because gcc does not know about the hidden call, it may allocate
-   that local in the redzone.  Unfortunately the hidden call may then
-   trash it before it comes to use it.  So we must step clear of the
-   redzone, for the duration of the hidden call, to make it safe.
-
-   Probably the same problem afflicts the other redzone-style ABIs too
-   (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is
-   self describing (none of this CFI nonsense) so at least messing
-   with the stack pointer doesn't give a danger of non-unwindable
-   stack. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         "addq $128,%%rsp\n\t"                                    \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $8, %%rsp\n"                                       \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $16, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $24, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $32, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 88(%%rax)\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $40, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      _argvec[12] = (unsigned long)(arg12);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 96(%%rax)\n\t"                                    \
-         "pushq 88(%%rax)\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $48, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_amd64_linux */
-
-/* ------------------------ ppc32-linux ------------------------ */
-
-#if defined(PLAT_ppc32_linux)
-
-/* This is useful for finding out about the on-stack stuff:
-
-   extern int f9  ( int,int,int,int,int,int,int,int,int );
-   extern int f10 ( int,int,int,int,int,int,int,int,int,int );
-   extern int f11 ( int,int,int,int,int,int,int,int,int,int,int );
-   extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int );
-
-   int g9 ( void ) {
-      return f9(11,22,33,44,55,66,77,88,99);
-   }
-   int g10 ( void ) {
-      return f10(11,22,33,44,55,66,77,88,99,110);
-   }
-   int g11 ( void ) {
-      return f11(11,22,33,44,55,66,77,88,99,110,121);
-   }
-   int g12 ( void ) {
-      return f12(11,22,33,44,55,66,77,88,99,110,121,132);
-   }
-*/
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* These CALL_FN_ macros assume that on ppc32-linux, 
-   sizeof(unsigned long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-16\n\t"                                       \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,16\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-16\n\t"                                       \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,16\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      _argvec[11] = (unsigned long)arg11;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-32\n\t"                                       \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,16(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,32\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      _argvec[11] = (unsigned long)arg11;                         \
-      _argvec[12] = (unsigned long)arg12;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-32\n\t"                                       \
-         /* arg12 */                                              \
-         "lwz 3,48(11)\n\t"                                       \
-         "stw 3,20(1)\n\t"                                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,16(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,32\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc32_linux */
-
-/* ------------------------ ppc64-linux ------------------------ */
-
-#if defined(PLAT_ppc64_linux)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
-   long) == 8. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-128\n\t"  /* expand stack frame */            \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,128"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-128\n\t"  /* expand stack frame */            \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,128"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-144\n\t"  /* expand stack frame */            \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,144"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-144\n\t"  /* expand stack frame */            \
-         /* arg12 */                                              \
-         "ld  3,96(11)\n\t"                                       \
-         "std 3,136(1)\n\t"                                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,144"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc64_linux */
-
-/* ------------------------ ppc32-aix5 ------------------------- */
-
-#if defined(PLAT_ppc32_aix5)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* Expand the stack frame, copying enough info that unwinding
-   still works.  Trashes r3. */
-
-#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
-         "addi 1,1,-" #_n_fr "\n\t"                               \
-         "lwz  3," #_n_fr "(1)\n\t"                               \
-         "stw  3,0(1)\n\t"
-
-#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
-         "addi 1,1," #_n_fr "\n\t"
-
-/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned
-   long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t" /* arg2->r4 */                       \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(64)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(64)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,64(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(72)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
-         /* arg12 */                                              \
-         "lwz 3,48(11)\n\t"                                       \
-         "stw 3,68(1)\n\t"                                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,64(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(72)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc32_aix5 */
-
-/* ------------------------ ppc64-aix5 ------------------------- */
-
-#if defined(PLAT_ppc64_aix5)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* Expand the stack frame, copying enough info that unwinding
-   still works.  Trashes r3. */
-
-#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
-         "addi 1,1,-" #_n_fr "\n\t"                               \
-         "ld   3," #_n_fr "(1)\n\t"                               \
-         "std  3,0(1)\n\t"
-
-#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
-         "addi 1,1," #_n_fr "\n\t"
-
-/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned
-   long) == 8. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(128)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(128)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(144)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
-         /* arg12 */                                              \
-         "ld  3,96(11)\n\t"                                       \
-         "std 3,136(1)\n\t"                                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(144)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc64_aix5 */
-
-
-/* ------------------------------------------------------------------ */
-/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS.               */
-/*                                                                    */
-/* ------------------------------------------------------------------ */
-
-/* Some request codes.  There are many more of these, but most are not
-   exposed to end-user view.  These are the public ones, all of the
-   form 0x1000 + small_number.
-
-   Core ones are in the range 0x00000000--0x0000ffff.  The non-public
-   ones start at 0x2000.
-*/
-
-/* These macros are used by tools -- they must be public, but don't
-   embed them into other programs. */
-#define VG_USERREQ_TOOL_BASE(a,b) \
-   ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16))
-#define VG_IS_TOOL_USERREQ(a, b, v) \
-   (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000))
-
-/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! 
-   This enum comprises an ABI exported by Valgrind to programs
-   which use client requests.  DO NOT CHANGE THE ORDER OF THESE
-   ENTRIES, NOR DELETE ANY -- add new ones at the end. */
-typedef
-   enum { VG_USERREQ__RUNNING_ON_VALGRIND  = 0x1001,
-          VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002,
-
-          /* These allow any function to be called from the simulated
-             CPU but run on the real CPU.  Nb: the first arg passed to
-             the function is always the ThreadId of the running
-             thread!  So CLIENT_CALL0 actually requires a 1 arg
-             function, etc. */
-          VG_USERREQ__CLIENT_CALL0 = 0x1101,
-          VG_USERREQ__CLIENT_CALL1 = 0x1102,
-          VG_USERREQ__CLIENT_CALL2 = 0x1103,
-          VG_USERREQ__CLIENT_CALL3 = 0x1104,
-
-          /* Can be useful in regression testing suites -- eg. can
-             send Valgrind's output to /dev/null and still count
-             errors. */
-          VG_USERREQ__COUNT_ERRORS = 0x1201,
-
-          /* These are useful and can be interpreted by any tool that
-             tracks malloc() et al, by using vg_replace_malloc.c. */
-          VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301,
-          VG_USERREQ__FREELIKE_BLOCK   = 0x1302,
-          /* Memory pool support. */
-          VG_USERREQ__CREATE_MEMPOOL   = 0x1303,
-          VG_USERREQ__DESTROY_MEMPOOL  = 0x1304,
-          VG_USERREQ__MEMPOOL_ALLOC    = 0x1305,
-          VG_USERREQ__MEMPOOL_FREE     = 0x1306,
-          VG_USERREQ__MEMPOOL_TRIM     = 0x1307,
-          VG_USERREQ__MOVE_MEMPOOL     = 0x1308,
-          VG_USERREQ__MEMPOOL_CHANGE   = 0x1309,
-          VG_USERREQ__MEMPOOL_EXISTS   = 0x130a,
-
-          /* Allow printfs to valgrind log. */
-          VG_USERREQ__PRINTF           = 0x1401,
-          VG_USERREQ__PRINTF_BACKTRACE = 0x1402,
-
-          /* Stack support. */
-          VG_USERREQ__STACK_REGISTER   = 0x1501,
-          VG_USERREQ__STACK_DEREGISTER = 0x1502,
-          VG_USERREQ__STACK_CHANGE     = 0x1503
-   } Vg_ClientRequest;
-
-#if !defined(__GNUC__)
-#  define __extension__ /* */
-#endif
-
-/* Returns the number of Valgrinds this code is running under.  That
-   is, 0 if running natively, 1 if running under Valgrind, 2 if
-   running under Valgrind which is running under another Valgrind,
-   etc. */
-#define RUNNING_ON_VALGRIND  __extension__                        \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */,          \
-                               VG_USERREQ__RUNNING_ON_VALGRIND,   \
-                               0, 0, 0, 0, 0);                    \
-    _qzz_res;                                                     \
-   })
-
-
-/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
-   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
-   since it provides a way to make sure valgrind will retranslate the
-   invalidated area.  Returns no value. */
-#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__DISCARD_TRANSLATIONS,  \
-                               _qzz_addr, _qzz_len, 0, 0, 0);     \
-   }
-
-
-/* These requests are for getting Valgrind itself to print something.
-   Possibly with a backtrace.  This is a really ugly hack. */
-
-#if defined(NVALGRIND)
-
-#  define VALGRIND_PRINTF(...)
-#  define VALGRIND_PRINTF_BACKTRACE(...)
-
-#else /* NVALGRIND */
-
-/* Modern GCC will optimize the static routine out if unused,
-   and unused attribute will shut down warnings about it.  */
-static int VALGRIND_PRINTF(const char *format, ...)
-   __attribute__((format(__printf__, 1, 2), __unused__));
-static int
-VALGRIND_PRINTF(const char *format, ...)
-{
-   unsigned long _qzz_res;
-   va_list vargs;
-   va_start(vargs, format);
-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF,
-                              (unsigned long)format, (unsigned long)vargs, 
-                              0, 0, 0);
-   va_end(vargs);
-   return (int)_qzz_res;
-}
-
-static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
-   __attribute__((format(__printf__, 1, 2), __unused__));
-static int
-VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
-{
-   unsigned long _qzz_res;
-   va_list vargs;
-   va_start(vargs, format);
-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE,
-                              (unsigned long)format, (unsigned long)vargs, 
-                              0, 0, 0);
-   va_end(vargs);
-   return (int)_qzz_res;
-}
-
-#endif /* NVALGRIND */
-
-
-/* These requests allow control to move from the simulated CPU to the
-   real CPU, calling an arbitary function.
-   
-   Note that the current ThreadId is inserted as the first argument.
-   So this call:
-
-     VALGRIND_NON_SIMD_CALL2(f, arg1, arg2)
-
-   requires f to have this signature:
-
-     Word f(Word tid, Word arg1, Word arg2)
-
-   where "Word" is a word-sized type.
-
-   Note that these client requests are not entirely reliable.  For example,
-   if you call a function with them that subsequently calls printf(),
-   there's a high chance Valgrind will crash.  Generally, your prospects of
-   these working are made higher if the called function does not refer to
-   any global variables, and does not refer to any libc or other functions
-   (printf et al).  Any kind of entanglement with libc or dynamic linking is
-   likely to have a bad outcome, for tricky reasons which we've grappled
-   with a lot in the past.
-*/
-#define VALGRIND_NON_SIMD_CALL0(_qyy_fn)                          \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL0,          \
-                               _qyy_fn,                           \
-                               0, 0, 0, 0);                       \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1)               \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL1,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, 0, 0, 0);               \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2)    \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL2,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, _qyy_arg2, 0, 0);       \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL3,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, _qyy_arg2,              \
-                               _qyy_arg3, 0);                     \
-    _qyy_res;                                                     \
-   })
-
-
-/* Counts the number of errors that have been recorded by a tool.  Nb:
-   the tool must record the errors with VG_(maybe_record_error)() or
-   VG_(unique_error)() for them to be counted. */
-#define VALGRIND_COUNT_ERRORS                                     \
-   __extension__                                                  \
-   ({unsigned int _qyy_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__COUNT_ERRORS,          \
-                               0, 0, 0, 0, 0);                    \
-    _qyy_res;                                                     \
-   })
-
-/* Mark a block of memory as having been allocated by a malloc()-like
-   function.  `addr' is the start of the usable block (ie. after any
-   redzone) `rzB' is redzone size if the allocator can apply redzones;
-   use '0' if not.  Adding redzones makes it more likely Valgrind will spot
-   block overruns.  `is_zeroed' indicates if the memory is zeroed, as it is
-   for calloc().  Put it immediately after the point where a block is
-   allocated. 
-   
-   If you're using Memcheck: If you're allocating memory via superblocks,
-   and then handing out small chunks of each superblock, if you don't have
-   redzones on your small blocks, it's worth marking the superblock with
-   VALGRIND_MAKE_MEM_NOACCESS when it's created, so that block overruns are
-   detected.  But if you can put redzones on, it's probably better to not do
-   this, so that messages for small overruns are described in terms of the
-   small block rather than the superblock (but if you have a big overrun
-   that skips over a redzone, you could miss an error this way).  See
-   memcheck/tests/custom_alloc.c for an example.
-
-   WARNING: if your allocator uses malloc() or 'new' to allocate
-   superblocks, rather than mmap() or brk(), this will not work properly --
-   you'll likely get assertion failures during leak detection.  This is
-   because Valgrind doesn't like seeing overlapping heap blocks.  Sorry.
-
-   Nb: block must be freed via a free()-like function specified
-   with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */
-#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed)    \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MALLOCLIKE_BLOCK,      \
-                               addr, sizeB, rzB, is_zeroed, 0);   \
-   }
-
-/* Mark a block of memory as having been freed by a free()-like function.
-   `rzB' is redzone size;  it must match that given to
-   VALGRIND_MALLOCLIKE_BLOCK.  Memory not freed will be detected by the leak
-   checker.  Put it immediately after the point where the block is freed. */
-#define VALGRIND_FREELIKE_BLOCK(addr, rzB)                        \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__FREELIKE_BLOCK,        \
-                               addr, rzB, 0, 0, 0);               \
-   }
-
-/* Create a memory pool. */
-#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed)             \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__CREATE_MEMPOOL,        \
-                               pool, rzB, is_zeroed, 0, 0);       \
-   }
-
-/* Destroy a memory pool. */
-#define VALGRIND_DESTROY_MEMPOOL(pool)                            \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__DESTROY_MEMPOOL,       \
-                               pool, 0, 0, 0, 0);                 \
-   }
-
-/* Associate a piece of memory with a memory pool. */
-#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size)                  \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_ALLOC,         \
-                               pool, addr, size, 0, 0);           \
-   }
-
-/* Disassociate a piece of memory from a memory pool. */
-#define VALGRIND_MEMPOOL_FREE(pool, addr)                         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_FREE,          \
-                               pool, addr, 0, 0, 0);              \
-   }
-
-/* Disassociate any pieces outside a particular range. */
-#define VALGRIND_MEMPOOL_TRIM(pool, addr, size)                   \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_TRIM,          \
-                               pool, addr, size, 0, 0);           \
-   }
-
-/* Resize and/or move a piece associated with a memory pool. */
-#define VALGRIND_MOVE_MEMPOOL(poolA, poolB)                       \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MOVE_MEMPOOL,          \
-                               poolA, poolB, 0, 0, 0);            \
-   }
-
-/* Resize and/or move a piece associated with a memory pool. */
-#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size)         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_CHANGE,        \
-                               pool, addrA, addrB, size, 0);      \
-   }
-
-/* Return 1 if a mempool exists, else 0. */
-#define VALGRIND_MEMPOOL_EXISTS(pool)                             \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_EXISTS,        \
-                               pool, 0, 0, 0, 0);                 \
-    _qzz_res;                                                     \
-   })
-
-/* Mark a piece of memory as being a stack. Returns a stack id. */
-#define VALGRIND_STACK_REGISTER(start, end)                       \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_REGISTER,        \
-                               start, end, 0, 0, 0);              \
-    _qzz_res;                                                     \
-   })
-
-/* Unmark the piece of memory associated with a stack id as being a
-   stack. */
-#define VALGRIND_STACK_DEREGISTER(id)                             \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_DEREGISTER,      \
-                               id, 0, 0, 0, 0);                   \
-   }
-
-/* Change the start and end address of the stack id. */
-#define VALGRIND_STACK_CHANGE(id, start, end)                     \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_CHANGE,          \
-                               id, start, end, 0, 0);             \
-   }
-
-
-#undef PLAT_x86_linux
-#undef PLAT_amd64_linux
-#undef PLAT_ppc32_linux
-#undef PLAT_ppc64_linux
-#undef PLAT_ppc32_aix5
-#undef PLAT_ppc64_aix5
-
-#endif   /* __VALGRIND_H */
diff --git a/contrib/libtcmalloc/src/thread_cache.cc b/contrib/libtcmalloc/src/thread_cache.cc
deleted file mode 100644
index 81b3694d563..00000000000
--- a/contrib/libtcmalloc/src/thread_cache.cc
+++ /dev/null
@@ -1,479 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Ken Ashcraft <opensource@google.com>
-
-#include "config.h"
-#include "thread_cache.h"
-#include <errno.h>
-#include <string.h>                     // for memcpy
-#include <algorithm>                    // for max, min
-#include "base/commandlineflags.h"      // for SpinLockHolder
-#include "base/spinlock.h"              // for SpinLockHolder
-#include "getenv_safe.h"                // for TCMallocGetenvSafe
-#include "central_freelist.h"           // for CentralFreeListPadded
-#include "maybe_threads.h"
-
-using std::min;
-using std::max;
-
-// Note: this is initialized manually in InitModule to ensure that
-// it's configured at right time
-//
-// DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
-//              EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
-//                         kDefaultOverallThreadCacheSize),
-//              "Bound on the total amount of bytes allocated to "
-//              "thread caches. This bound is not strict, so it is possible "
-//              "for the cache to go over this bound in certain circumstances. "
-//              "Maximum value of this flag is capped to 1 GB.");
-
-
-namespace tcmalloc {
-
-static bool phinited = false;
-
-volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
-size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
-ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
-PageHeapAllocator<ThreadCache> threadcache_allocator;
-ThreadCache* ThreadCache::thread_heaps_ = NULL;
-int ThreadCache::thread_heap_count_ = 0;
-ThreadCache* ThreadCache::next_memory_steal_ = NULL;
-#ifdef HAVE_TLS
-__thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_
-    ATTR_INITIAL_EXEC
-    = {0, 0};
-#endif
-bool ThreadCache::tsd_inited_ = false;
-pthread_key_t ThreadCache::heap_key_;
-
-void ThreadCache::Init(pthread_t tid) {
-  size_ = 0;
-
-  max_size_ = 0;
-  IncreaseCacheLimitLocked();
-  if (max_size_ == 0) {
-    // There isn't enough memory to go around.  Just give the minimum to
-    // this thread.
-    max_size_ = kMinThreadCacheSize;
-
-    // Take unclaimed_cache_space_ negative.
-    unclaimed_cache_space_ -= kMinThreadCacheSize;
-    ASSERT(unclaimed_cache_space_ < 0);
-  }
-
-  next_ = NULL;
-  prev_ = NULL;
-  tid_  = tid;
-  in_setspecific_ = false;
-  for (size_t cl = 0; cl < kNumClasses; ++cl) {
-    list_[cl].Init();
-  }
-
-  uint32_t sampler_seed;
-  memcpy(&sampler_seed, &tid, sizeof(sampler_seed));
-  sampler_.Init(sampler_seed);
-}
-
-void ThreadCache::Cleanup() {
-  // Put unused memory back into central cache
-  for (int cl = 0; cl < kNumClasses; ++cl) {
-    if (list_[cl].length() > 0) {
-      ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
-    }
-  }
-}
-
-// Remove some objects of class "cl" from central cache and add to thread heap.
-// On success, return the first object for immediate use; otherwise return NULL.
-void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
-  FreeList* list = &list_[cl];
-  ASSERT(list->empty());
-  const int batch_size = Static::sizemap()->num_objects_to_move(cl);
-
-  const int num_to_move = min<int>(list->max_length(), batch_size);
-  void *start, *end;
-  int fetch_count = Static::central_cache()[cl].RemoveRange(
-      &start, &end, num_to_move);
-
-  ASSERT((start == NULL) == (fetch_count == 0));
-  if (--fetch_count >= 0) {
-    size_ += byte_size * fetch_count;
-    list->PushRange(fetch_count, SLL_Next(start), end);
-  }
-
-  // Increase max length slowly up to batch_size.  After that,
-  // increase by batch_size in one shot so that the length is a
-  // multiple of batch_size.
-  if (list->max_length() < batch_size) {
-    list->set_max_length(list->max_length() + 1);
-  } else {
-    // Don't let the list get too long.  In 32 bit builds, the length
-    // is represented by a 16 bit int, so we need to watch out for
-    // integer overflow.
-    int new_length = min<int>(list->max_length() + batch_size,
-                              kMaxDynamicFreeListLength);
-    // The list's max_length must always be a multiple of batch_size,
-    // and kMaxDynamicFreeListLength is not necessarily a multiple
-    // of batch_size.
-    new_length -= new_length % batch_size;
-    ASSERT(new_length % batch_size == 0);
-    list->set_max_length(new_length);
-  }
-  return start;
-}
-
-void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
-  const int batch_size = Static::sizemap()->num_objects_to_move(cl);
-  ReleaseToCentralCache(list, cl, batch_size);
-
-  // If the list is too long, we need to transfer some number of
-  // objects to the central cache.  Ideally, we would transfer
-  // num_objects_to_move, so the code below tries to make max_length
-  // converge on num_objects_to_move.
-
-  if (list->max_length() < batch_size) {
-    // Slow start the max_length so we don't overreserve.
-    list->set_max_length(list->max_length() + 1);
-  } else if (list->max_length() > batch_size) {
-    // If we consistently go over max_length, shrink max_length.  If we don't
-    // shrink it, some amount of memory will always stay in this freelist.
-    list->set_length_overages(list->length_overages() + 1);
-    if (list->length_overages() > kMaxOverages) {
-      ASSERT(list->max_length() > batch_size);
-      list->set_max_length(list->max_length() - batch_size);
-      list->set_length_overages(0);
-    }
-  }
-}
-
-// Remove some objects of class "cl" from thread heap and add to central cache
-void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
-  ASSERT(src == &list_[cl]);
-  if (N > src->length()) N = src->length();
-  size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
-
-  // We return prepackaged chains of the correct size to the central cache.
-  // TODO: Use the same format internally in the thread caches?
-  int batch_size = Static::sizemap()->num_objects_to_move(cl);
-  while (N > batch_size) {
-    void *tail, *head;
-    src->PopRange(batch_size, &head, &tail);
-    Static::central_cache()[cl].InsertRange(head, tail, batch_size);
-    N -= batch_size;
-  }
-  void *tail, *head;
-  src->PopRange(N, &head, &tail);
-  Static::central_cache()[cl].InsertRange(head, tail, N);
-  size_ -= delta_bytes;
-}
-
-// Release idle memory to the central cache
-void ThreadCache::Scavenge() {
-  // If the low-water mark for the free list is L, it means we would
-  // not have had to allocate anything from the central cache even if
-  // we had reduced the free list size by L.  We aim to get closer to
-  // that situation by dropping L/2 nodes from the free list.  This
-  // may not release much memory, but if so we will call scavenge again
-  // pretty soon and the low-water marks will be high on that call.
-  for (int cl = 0; cl < kNumClasses; cl++) {
-    FreeList* list = &list_[cl];
-    const int lowmark = list->lowwatermark();
-    if (lowmark > 0) {
-      const int drop = (lowmark > 1) ? lowmark/2 : 1;
-      ReleaseToCentralCache(list, cl, drop);
-
-      // Shrink the max length if it isn't used.  Only shrink down to
-      // batch_size -- if the thread was active enough to get the max_length
-      // above batch_size, it will likely be that active again.  If
-      // max_length shinks below batch_size, the thread will have to
-      // go through the slow-start behavior again.  The slow-start is useful
-      // mainly for threads that stay relatively idle for their entire
-      // lifetime.
-      const int batch_size = Static::sizemap()->num_objects_to_move(cl);
-      if (list->max_length() > batch_size) {
-        list->set_max_length(
-            max<int>(list->max_length() - batch_size, batch_size));
-      }
-    }
-    list->clear_lowwatermark();
-  }
-
-  IncreaseCacheLimit();
-}
-
-void ThreadCache::IncreaseCacheLimit() {
-  SpinLockHolder h(Static::pageheap_lock());
-  IncreaseCacheLimitLocked();
-}
-
-void ThreadCache::IncreaseCacheLimitLocked() {
-  if (unclaimed_cache_space_ > 0) {
-    // Possibly make unclaimed_cache_space_ negative.
-    unclaimed_cache_space_ -= kStealAmount;
-    max_size_ += kStealAmount;
-    return;
-  }
-  // Don't hold pageheap_lock too long.  Try to steal from 10 other
-  // threads before giving up.  The i < 10 condition also prevents an
-  // infinite loop in case none of the existing thread heaps are
-  // suitable places to steal from.
-  for (int i = 0; i < 10;
-       ++i, next_memory_steal_ = next_memory_steal_->next_) {
-    // Reached the end of the linked list.  Start at the beginning.
-    if (next_memory_steal_ == NULL) {
-      ASSERT(thread_heaps_ != NULL);
-      next_memory_steal_ = thread_heaps_;
-    }
-    if (next_memory_steal_ == this ||
-        next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
-      continue;
-    }
-    next_memory_steal_->max_size_ -= kStealAmount;
-    max_size_ += kStealAmount;
-
-    next_memory_steal_ = next_memory_steal_->next_;
-    return;
-  }
-}
-
-int ThreadCache::GetSamplePeriod() {
-  return sampler_.GetSamplePeriod();
-}
-
-void ThreadCache::InitModule() {
-  SpinLockHolder h(Static::pageheap_lock());
-  if (!phinited) {
-    const char *tcb = TCMallocGetenvSafe("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES");
-    if (tcb) {
-      set_overall_thread_cache_size(strtoll(tcb, NULL, 10));
-    }
-    Static::InitStaticVars();
-    threadcache_allocator.Init();
-    phinited = 1;
-  }
-}
-
-void ThreadCache::InitTSD() {
-  ASSERT(!tsd_inited_);
-  perftools_pthread_key_create(&heap_key_, DestroyThreadCache);
-  tsd_inited_ = true;
-
-#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
-  // We may have used a fake pthread_t for the main thread.  Fix it.
-  pthread_t zero;
-  memset(&zero, 0, sizeof(zero));
-  SpinLockHolder h(Static::pageheap_lock());
-  for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-    if (h->tid_ == zero) {
-      h->tid_ = pthread_self();
-    }
-  }
-#endif
-}
-
-ThreadCache* ThreadCache::CreateCacheIfNecessary() {
-  // Initialize per-thread data if necessary
-  ThreadCache* heap = NULL;
-  {
-    SpinLockHolder h(Static::pageheap_lock());
-    // On some old glibc's, and on freebsd's libc (as of freebsd 8.1),
-    // calling pthread routines (even pthread_self) too early could
-    // cause a segfault.  Since we can call pthreads quite early, we
-    // have to protect against that in such situations by making a
-    // 'fake' pthread.  This is not ideal since it doesn't work well
-    // when linking tcmalloc statically with apps that create threads
-    // before main, so we only do it if we have to.
-#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
-    pthread_t me;
-    if (!tsd_inited_) {
-      memset(&me, 0, sizeof(me));
-    } else {
-      me = pthread_self();
-    }
-#else
-    const pthread_t me = pthread_self();
-#endif
-
-    // This may be a recursive malloc call from pthread_setspecific()
-    // In that case, the heap for this thread has already been created
-    // and added to the linked list.  So we search for that first.
-    for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-      if (h->tid_ == me) {
-        heap = h;
-        break;
-      }
-    }
-
-    if (heap == NULL) heap = NewHeap(me);
-  }
-
-  // We call pthread_setspecific() outside the lock because it may
-  // call malloc() recursively.  We check for the recursive call using
-  // the "in_setspecific_" flag so that we can avoid calling
-  // pthread_setspecific() if we are already inside pthread_setspecific().
-  if (!heap->in_setspecific_ && tsd_inited_) {
-    heap->in_setspecific_ = true;
-    perftools_pthread_setspecific(heap_key_, heap);
-#ifdef HAVE_TLS
-    // Also keep a copy in __thread for faster retrieval
-    threadlocal_data_.heap = heap;
-    SetMinSizeForSlowPath(kMaxSize + 1);
-#endif
-    heap->in_setspecific_ = false;
-  }
-  return heap;
-}
-
-ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
-  // Create the heap and add it to the linked list
-  ThreadCache *heap = threadcache_allocator.New();
-  heap->Init(tid);
-  heap->next_ = thread_heaps_;
-  heap->prev_ = NULL;
-  if (thread_heaps_ != NULL) {
-    thread_heaps_->prev_ = heap;
-  } else {
-    // This is the only thread heap at the momment.
-    ASSERT(next_memory_steal_ == NULL);
-    next_memory_steal_ = heap;
-  }
-  thread_heaps_ = heap;
-  thread_heap_count_++;
-  return heap;
-}
-
-void ThreadCache::BecomeIdle() {
-  if (!tsd_inited_) return;              // No caches yet
-  ThreadCache* heap = GetThreadHeap();
-  if (heap == NULL) return;             // No thread cache to remove
-  if (heap->in_setspecific_) return;    // Do not disturb the active caller
-
-  heap->in_setspecific_ = true;
-  perftools_pthread_setspecific(heap_key_, NULL);
-#ifdef HAVE_TLS
-  // Also update the copy in __thread
-  threadlocal_data_.heap = NULL;
-  SetMinSizeForSlowPath(0);
-#endif
-  heap->in_setspecific_ = false;
-  if (GetThreadHeap() == heap) {
-    // Somehow heap got reinstated by a recursive call to malloc
-    // from pthread_setspecific.  We give up in this case.
-    return;
-  }
-
-  // We can now get rid of the heap
-  DeleteCache(heap);
-}
-
-void ThreadCache::BecomeTemporarilyIdle() {
-  ThreadCache* heap = GetCacheIfPresent();
-  if (heap)
-    heap->Cleanup();
-}
-
-void ThreadCache::DestroyThreadCache(void* ptr) {
-  // Note that "ptr" cannot be NULL since pthread promises not
-  // to invoke the destructor on NULL values, but for safety,
-  // we check anyway.
-  if (ptr == NULL) return;
-#ifdef HAVE_TLS
-  // Prevent fast path of GetThreadHeap() from returning heap.
-  threadlocal_data_.heap = NULL;
-  SetMinSizeForSlowPath(0);
-#endif
-  DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
-}
-
-void ThreadCache::DeleteCache(ThreadCache* heap) {
-  // Remove all memory from heap
-  heap->Cleanup();
-
-  // Remove from linked list
-  SpinLockHolder h(Static::pageheap_lock());
-  if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
-  if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
-  if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
-  thread_heap_count_--;
-
-  if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
-  if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_;
-  unclaimed_cache_space_ += heap->max_size_;
-
-  threadcache_allocator.Delete(heap);
-}
-
-void ThreadCache::RecomputePerThreadCacheSize() {
-  // Divide available space across threads
-  int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
-  size_t space = overall_thread_cache_size_ / n;
-
-  // Limit to allowed range
-  if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
-  if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
-
-  double ratio = space / max<double>(1, per_thread_cache_size_);
-  size_t claimed = 0;
-  for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-    // Increasing the total cache size should not circumvent the
-    // slow-start growth of max_size_.
-    if (ratio < 1.0) {
-        h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
-    }
-    claimed += h->max_size_;
-  }
-  unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
-  per_thread_cache_size_ = space;
-}
-
-void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
-  for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-    *total_bytes += h->Size();
-    if (class_count) {
-      for (int cl = 0; cl < kNumClasses; ++cl) {
-        class_count[cl] += h->freelist_length(cl);
-      }
-    }
-  }
-}
-
-void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
-  // Clip the value to a reasonable range
-  if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
-  if (new_size > (1<<30)) new_size = (1<<30);     // Limit to 1GB
-  overall_thread_cache_size_ = new_size;
-
-  RecomputePerThreadCacheSize();
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/thread_cache.h b/contrib/libtcmalloc/src/thread_cache.h
deleted file mode 100644
index ff7ab1ae77d..00000000000
--- a/contrib/libtcmalloc/src/thread_cache.h
+++ /dev/null
@@ -1,474 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#ifndef TCMALLOC_THREAD_CACHE_H_
-#define TCMALLOC_THREAD_CACHE_H_
-
-#include "config.h"
-#ifdef HAVE_PTHREAD
-#include <pthread.h>                    // for pthread_t, pthread_key_t
-#endif
-#include <stddef.h>                     // for size_t, NULL
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uint32_t, uint64_t
-#endif
-#include <sys/types.h>                  // for ssize_t
-#include "base/commandlineflags.h"
-#include "common.h"
-#include "linked_list.h"
-#include "maybe_threads.h"
-#include "page_heap_allocator.h"
-#include "sampler.h"
-#include "static_vars.h"
-
-#include "common.h"            // for SizeMap, kMaxSize, etc
-#include "internal_logging.h"  // for ASSERT, etc
-#include "linked_list.h"       // for SLL_Pop, SLL_PopRange, etc
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "sampler.h"           // for Sampler
-#include "static_vars.h"       // for Static
-
-DECLARE_int64(tcmalloc_sample_parameter);
-
-namespace tcmalloc {
-
-//-------------------------------------------------------------------
-// Data kept per thread
-//-------------------------------------------------------------------
-
-class ThreadCache {
- public:
-#ifdef HAVE_TLS
-  enum { have_tls = true };
-#else
-  enum { have_tls = false };
-#endif
-
-  // All ThreadCache objects are kept in a linked list (for stats collection)
-  ThreadCache* next_;
-  ThreadCache* prev_;
-
-  void Init(pthread_t tid);
-  void Cleanup();
-
-  // Accessors (mostly just for printing stats)
-  int freelist_length(size_t cl) const { return list_[cl].length(); }
-
-  // Total byte size in cache
-  size_t Size() const { return size_; }
-
-  // Allocate an object of the given size and class. The size given
-  // must be the same as the size of the class in the size map.
-  void* Allocate(size_t size, size_t cl);
-  void Deallocate(void* ptr, size_t size_class);
-
-  void Scavenge();
-
-  int GetSamplePeriod();
-
-  // Record allocation of "k" bytes.  Return true iff allocation
-  // should be sampled
-  bool SampleAllocation(size_t k);
-
-  static void         InitModule();
-  static void         InitTSD();
-  static ThreadCache* GetThreadHeap();
-  static ThreadCache* GetCache();
-  static ThreadCache* GetCacheIfPresent();
-  static ThreadCache* GetCacheWhichMustBePresent();
-  static ThreadCache* CreateCacheIfNecessary();
-  static void         BecomeIdle();
-  static void         BecomeTemporarilyIdle();
-  static size_t       MinSizeForSlowPath();
-  static void         SetMinSizeForSlowPath(size_t size);
-  static void         SetUseEmergencyMalloc();
-  static void         ResetUseEmergencyMalloc();
-  static bool         IsUseEmergencyMalloc();
-
-  static bool IsFastPathAllowed() { return MinSizeForSlowPath() != 0; }
-
-  // Return the number of thread heaps in use.
-  static inline int HeapsInUse();
-
-  // Adds to *total_bytes the total number of bytes used by all thread heaps.
-  // Also, if class_count is not NULL, it must be an array of size kNumClasses,
-  // and this function will increment each element of class_count by the number
-  // of items in all thread-local freelists of the corresponding size class.
-  // REQUIRES: Static::pageheap_lock is held.
-  static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count);
-
-  // Sets the total thread cache size to new_size, recomputing the
-  // individual thread cache sizes as necessary.
-  // REQUIRES: Static::pageheap lock is held.
-  static void set_overall_thread_cache_size(size_t new_size);
-  static size_t overall_thread_cache_size() {
-    return overall_thread_cache_size_;
-  }
-
- private:
-  class FreeList {
-   private:
-    void*    list_;       // Linked list of nodes
-
-#ifdef _LP64
-    // On 64-bit hardware, manipulating 16-bit values may be slightly slow.
-    uint32_t length_;      // Current length.
-    uint32_t lowater_;     // Low water mark for list length.
-    uint32_t max_length_;  // Dynamic max list length based on usage.
-    // Tracks the number of times a deallocation has caused
-    // length_ > max_length_.  After the kMaxOverages'th time, max_length_
-    // shrinks and length_overages_ is reset to zero.
-    uint32_t length_overages_;
-#else
-    // If we aren't using 64-bit pointers then pack these into less space.
-    uint16_t length_;
-    uint16_t lowater_;
-    uint16_t max_length_;
-    uint16_t length_overages_;
-#endif
-
-   public:
-    void Init() {
-      list_ = NULL;
-      length_ = 0;
-      lowater_ = 0;
-      max_length_ = 1;
-      length_overages_ = 0;
-    }
-
-    // Return current length of list
-    size_t length() const {
-      return length_;
-    }
-
-    // Return the maximum length of the list.
-    size_t max_length() const {
-      return max_length_;
-    }
-
-    // Set the maximum length of the list.  If 'new_max' > length(), the
-    // client is responsible for removing objects from the list.
-    void set_max_length(size_t new_max) {
-      max_length_ = new_max;
-    }
-
-    // Return the number of times that length() has gone over max_length().
-    size_t length_overages() const {
-      return length_overages_;
-    }
-
-    void set_length_overages(size_t new_count) {
-      length_overages_ = new_count;
-    }
-
-    // Is list empty?
-    bool empty() const {
-      return list_ == NULL;
-    }
-
-    // Low-water mark management
-    int lowwatermark() const { return lowater_; }
-    void clear_lowwatermark() { lowater_ = length_; }
-
-    void Push(void* ptr) {
-      SLL_Push(&list_, ptr);
-      length_++;
-    }
-
-    void* Pop() {
-      ASSERT(list_ != NULL);
-      length_--;
-      if (length_ < lowater_) lowater_ = length_;
-      return SLL_Pop(&list_);
-    }
-
-    void* Next() {
-      return SLL_Next(&list_);
-    }
-
-    void PushRange(int N, void *start, void *end) {
-      SLL_PushRange(&list_, start, end);
-      length_ += N;
-    }
-
-    void PopRange(int N, void **start, void **end) {
-      SLL_PopRange(&list_, N, start, end);
-      ASSERT(length_ >= N);
-      length_ -= N;
-      if (length_ < lowater_) lowater_ = length_;
-    }
-  };
-
-  // Gets and returns an object from the central cache, and, if possible,
-  // also adds some objects of that size class to this thread cache.
-  void* FetchFromCentralCache(size_t cl, size_t byte_size);
-
-  // Releases some number of items from src.  Adjusts the list's max_length
-  // to eventually converge on num_objects_to_move(cl).
-  void ListTooLong(FreeList* src, size_t cl);
-
-  // Releases N items from this thread cache.
-  void ReleaseToCentralCache(FreeList* src, size_t cl, int N);
-
-  // Increase max_size_ by reducing unclaimed_cache_space_ or by
-  // reducing the max_size_ of some other thread.  In both cases,
-  // the delta is kStealAmount.
-  void IncreaseCacheLimit();
-  // Same as above but requires Static::pageheap_lock() is held.
-  void IncreaseCacheLimitLocked();
-
-  // If TLS is available, we also store a copy of the per-thread object
-  // in a __thread variable since __thread variables are faster to read
-  // than pthread_getspecific().  We still need pthread_setspecific()
-  // because __thread variables provide no way to run cleanup code when
-  // a thread is destroyed.
-  // We also give a hint to the compiler to use the "initial exec" TLS
-  // model.  This is faster than the default TLS model, at the cost that
-  // you cannot dlopen this library.  (To see the difference, look at
-  // the CPU use of __tls_get_addr with and without this attribute.)
-  // Since we don't really use dlopen in google code -- and using dlopen
-  // on a malloc replacement is asking for trouble in any case -- that's
-  // a good tradeoff for us.
-#ifdef HAVE_TLS
-  struct ThreadLocalData {
-    ThreadCache* heap;
-    // min_size_for_slow_path is 0 if heap is NULL or kMaxSize + 1 otherwise.
-    // The latter is the common case and allows allocation to be faster
-    // than it would be otherwise: typically a single branch will
-    // determine that the requested allocation is no more than kMaxSize
-    // and we can then proceed, knowing that global and thread-local tcmalloc
-    // state is initialized.
-    size_t min_size_for_slow_path;
-
-    bool use_emergency_malloc;
-    size_t old_min_size_for_slow_path;
-  };
-  static __thread ThreadLocalData threadlocal_data_ ATTR_INITIAL_EXEC;
-#endif
-
-  // Thread-specific key.  Initialization here is somewhat tricky
-  // because some Linux startup code invokes malloc() before it
-  // is in a good enough state to handle pthread_keycreate().
-  // Therefore, we use TSD keys only after tsd_inited is set to true.
-  // Until then, we use a slow path to get the heap object.
-  static bool tsd_inited_;
-  static pthread_key_t heap_key_;
-
-  // Linked list of heap objects.  Protected by Static::pageheap_lock.
-  static ThreadCache* thread_heaps_;
-  static int thread_heap_count_;
-
-  // A pointer to one of the objects in thread_heaps_.  Represents
-  // the next ThreadCache from which a thread over its max_size_ should
-  // steal memory limit.  Round-robin through all of the objects in
-  // thread_heaps_.  Protected by Static::pageheap_lock.
-  static ThreadCache* next_memory_steal_;
-
-  // Overall thread cache size.  Protected by Static::pageheap_lock.
-  static size_t overall_thread_cache_size_;
-
-  // Global per-thread cache size.  Writes are protected by
-  // Static::pageheap_lock.  Reads are done without any locking, which should be
-  // fine as long as size_t can be written atomically and we don't place
-  // invariants between this variable and other pieces of state.
-  static volatile size_t per_thread_cache_size_;
-
-  // Represents overall_thread_cache_size_ minus the sum of max_size_
-  // across all ThreadCaches.  Protected by Static::pageheap_lock.
-  static ssize_t unclaimed_cache_space_;
-
-  // This class is laid out with the most frequently used fields
-  // first so that hot elements are placed on the same cache line.
-
-  size_t        size_;                  // Combined size of data
-  size_t        max_size_;              // size_ > max_size_ --> Scavenge()
-
-  // We sample allocations, biased by the size of the allocation
-  Sampler       sampler_;               // A sampler
-
-  FreeList      list_[kNumClasses];     // Array indexed by size-class
-
-  pthread_t     tid_;                   // Which thread owns it
-  bool          in_setspecific_;        // In call to pthread_setspecific?
-
-  // Allocate a new heap. REQUIRES: Static::pageheap_lock is held.
-  static ThreadCache* NewHeap(pthread_t tid);
-
-  // Use only as pthread thread-specific destructor function.
-  static void DestroyThreadCache(void* ptr);
-
-  static void DeleteCache(ThreadCache* heap);
-  static void RecomputePerThreadCacheSize();
-
-  // Ensure that this class is cacheline-aligned. This is critical for
-  // performance, as false sharing would negate many of the benefits
-  // of a per-thread cache.
-} CACHELINE_ALIGNED;
-
-// Allocator for thread heaps
-// This is logically part of the ThreadCache class, but MSVC, at
-// least, does not like using ThreadCache as a template argument
-// before the class is fully defined.  So we put it outside the class.
-extern PageHeapAllocator<ThreadCache> threadcache_allocator;
-
-inline int ThreadCache::HeapsInUse() {
-  return threadcache_allocator.inuse();
-}
-
-inline bool ThreadCache::SampleAllocation(size_t k) {
-#ifndef NO_TCMALLOC_SAMPLES
-  return UNLIKELY(FLAGS_tcmalloc_sample_parameter > 0) && sampler_.SampleAllocation(k);
-#else
-  return false;
-#endif
-}
-
-inline void* ThreadCache::Allocate(size_t size, size_t cl) {
-  ASSERT(size <= kMaxSize);
-  ASSERT(size == Static::sizemap()->ByteSizeForClass(cl));
-
-  FreeList* list = &list_[cl];
-  if (UNLIKELY(list->empty())) {
-    return FetchFromCentralCache(cl, size);
-  }
-  size_ -= size;
-  return list->Pop();
-}
-
-inline void ThreadCache::Deallocate(void* ptr, size_t cl) {
-  FreeList* list = &list_[cl];
-  size_ += Static::sizemap()->ByteSizeForClass(cl);
-  ssize_t size_headroom = max_size_ - size_ - 1;
-
-  // This catches back-to-back frees of allocs in the same size
-  // class. A more comprehensive (and expensive) test would be to walk
-  // the entire freelist. But this might be enough to find some bugs.
-  ASSERT(ptr != list->Next());
-
-  list->Push(ptr);
-  ssize_t list_headroom =
-      static_cast<ssize_t>(list->max_length()) - list->length();
-
-  // There are two relatively uncommon things that require further work.
-  // In the common case we're done, and in that case we need a single branch
-  // because of the bitwise-or trick that follows.
-  if (UNLIKELY((list_headroom | size_headroom) < 0)) {
-    if (list_headroom < 0) {
-      ListTooLong(list, cl);
-    }
-    if (size_ >= max_size_) Scavenge();
-  }
-}
-
-inline ThreadCache* ThreadCache::GetThreadHeap() {
-#ifdef HAVE_TLS
-  return threadlocal_data_.heap;
-#else
-  return reinterpret_cast<ThreadCache *>(
-      perftools_pthread_getspecific(heap_key_));
-#endif
-}
-
-inline ThreadCache* ThreadCache::GetCacheWhichMustBePresent() {
-#ifdef HAVE_TLS
-  ASSERT(threadlocal_data_.heap);
-  return threadlocal_data_.heap;
-#else
-  ASSERT(perftools_pthread_getspecific(heap_key_));
-  return reinterpret_cast<ThreadCache *>(
-      perftools_pthread_getspecific(heap_key_));
-#endif
-}
-
-inline ThreadCache* ThreadCache::GetCache() {
-  ThreadCache* ptr = NULL;
-  if (!tsd_inited_) {
-    InitModule();
-  } else {
-    ptr = GetThreadHeap();
-  }
-  if (ptr == NULL) ptr = CreateCacheIfNecessary();
-  return ptr;
-}
-
-// In deletion paths, we do not try to create a thread-cache.  This is
-// because we may be in the thread destruction code and may have
-// already cleaned up the cache for this thread.
-inline ThreadCache* ThreadCache::GetCacheIfPresent() {
-#ifndef HAVE_TLS
-  if (!tsd_inited_) return NULL;
-#endif
-  return GetThreadHeap();
-}
-
-inline size_t ThreadCache::MinSizeForSlowPath() {
-#ifdef HAVE_TLS
-  return threadlocal_data_.min_size_for_slow_path;
-#else
-  return 0;
-#endif
-}
-
-inline void ThreadCache::SetMinSizeForSlowPath(size_t size) {
-#ifdef HAVE_TLS
-  threadlocal_data_.min_size_for_slow_path = size;
-#endif
-}
-
-inline void ThreadCache::SetUseEmergencyMalloc() {
-#ifdef HAVE_TLS
-  threadlocal_data_.old_min_size_for_slow_path = threadlocal_data_.min_size_for_slow_path;
-  threadlocal_data_.min_size_for_slow_path = 0;
-  threadlocal_data_.use_emergency_malloc = true;
-#endif
-}
-
-inline void ThreadCache::ResetUseEmergencyMalloc() {
-#ifdef HAVE_TLS
-  threadlocal_data_.min_size_for_slow_path = threadlocal_data_.old_min_size_for_slow_path;
-  threadlocal_data_.use_emergency_malloc = false;
-#endif
-}
-
-inline bool ThreadCache::IsUseEmergencyMalloc() {
-#if defined(HAVE_TLS) && defined(ENABLE_EMERGENCY_MALLOC)
-  return UNLIKELY(threadlocal_data_.use_emergency_malloc);
-#else
-  return false;
-#endif
-}
-
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_THREAD_CACHE_H_
diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy
index fbd8e6f2099..3bd934fa8e1 160000
--- a/contrib/tiflash-proxy
+++ b/contrib/tiflash-proxy
@@ -1 +1 @@
-Subproject commit fbd8e6f209952d5bb79897d8bce17230acf23495
+Subproject commit 3bd934fa8e1868ffbc29e1c92cda37e0c2049bf7
diff --git a/contrib/tiflash-proxy-cmake/CMakeLists.txt b/contrib/tiflash-proxy-cmake/CMakeLists.txt
index e3e2df379a1..cdbeaa6f11d 100644
--- a/contrib/tiflash-proxy-cmake/CMakeLists.txt
+++ b/contrib/tiflash-proxy-cmake/CMakeLists.txt
@@ -27,6 +27,8 @@ if(TIFLASH_LLVM_TOOLCHAIN AND USE_LIBCXX)
     endif()
     set(TIFLASH_RUST_ENV CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXSTDLIB=c++ ${TIFLASH_RUST_ENV} RUSTFLAGS=${TIFLASH_RUSTFLAGS} PROTOC=${Protobuf_PROTOC_EXECUTABLE} PROTOC_INCLUDE=${Protobuf_INCLUDE_DIR})
     message(STATUS "Enforce LLVM toolchain for rust")
+else()
+    set(TIFLASH_RUST_ENV PROTOC=${Protobuf_PROTOC_EXECUTABLE} PROTOC_INCLUDE=${Protobuf_INCLUDE_DIR})
 endif()
 
 message(STATUS "Using rust env for tiflash-proxy: ${TIFLASH_RUST_ENV}")
diff --git a/contrib/tipb b/contrib/tipb
index 0f4f873beca..3e2483c20a9 160000
--- a/contrib/tipb
+++ b/contrib/tipb
@@ -1 +1 @@
-Subproject commit 0f4f873beca8d5078dde0a23d15ad5ce3188ed0d
+Subproject commit 3e2483c20a9ec3f9ea587c49808c4b66c9716a2c
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index 0df79f89a84..3e38c2a9fdb 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -80,6 +80,7 @@ add_headers_and_sources(dbms src/Storages/DeltaMerge/FilterParser)
 add_headers_and_sources(dbms src/Storages/DeltaMerge/File)
 add_headers_and_sources(dbms src/Storages/DeltaMerge/ColumnFile)
 add_headers_and_sources(dbms src/Storages/DeltaMerge/Delta)
+add_headers_and_sources(dbms src/Storages/DeltaMerge/ReadThread)
 add_headers_and_sources(dbms src/Storages/Distributed)
 add_headers_and_sources(dbms src/Storages/Transaction)
 add_headers_and_sources(dbms src/Storages/Page/V1)
diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp
index 54d4238616f..424f7e72052 100644
--- a/dbms/src/Columns/ColumnString.cpp
+++ b/dbms/src/Columns/ColumnString.cpp
@@ -17,8 +17,10 @@
 #include <Columns/ColumnsCommon.h>
 #include <Common/HashTable/Hash.h>
 #include <DataStreams/ColumnGathererStream.h>
+#include <Functions/CollationOperatorOptimized.h>
 #include <fmt/core.h>
 
+
 /// Used in the `reserve` method, when the number of rows is known, but sizes of elements are not.
 #define APPROX_STRING_SIZE 64
 
@@ -315,58 +317,100 @@ int ColumnString::compareAtWithCollationImpl(size_t n, size_t m, const IColumn &
 
     return collator.compare(
         reinterpret_cast<const char *>(&chars[offsetAt(n)]),
-        sizeAt(n),
+        sizeAt(n) - 1, // Skip last zero byte.
         reinterpret_cast<const char *>(&rhs.chars[rhs.offsetAt(m)]),
-        rhs.sizeAt(m));
+        rhs.sizeAt(m) - 1 // Skip last zero byte.
+    );
 }
 
-
-template <bool positive>
-struct ColumnString::lessWithCollation
+// Derived must implement function `int compare(const char *, size_t, const char *, size_t)`.
+template <bool positive, typename Derived>
+struct ColumnString::LessWithCollation
 {
     const ColumnString & parent;
-    const ICollator & collator;
+    const Derived & inner;
 
-    lessWithCollation(const ColumnString & parent_, const ICollator & collator_)
+    LessWithCollation(const ColumnString & parent_, const Derived & inner_)
         : parent(parent_)
-        , collator(collator_)
+        , inner(inner_)
     {}
 
-    bool operator()(size_t lhs, size_t rhs) const
+    FLATTEN_INLINE_PURE inline bool operator()(size_t lhs, size_t rhs) const
     {
-        int res = collator.compare(
+        int res = inner.compare(
             reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]),
-            parent.sizeAt(lhs),
+            parent.sizeAt(lhs) - 1, // Skip last zero byte.
             reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]),
-            parent.sizeAt(rhs));
+            parent.sizeAt(rhs) - 1 // Skip last zero byte.
+        );
 
-        return positive ? (res < 0) : (res > 0);
+        if constexpr (positive)
+        {
+            return (res < 0);
+        }
+        else
+        {
+            return (res > 0);
+        }
     }
 };
 
-void ColumnString::getPermutationWithCollationImpl(const ICollator & collator, bool reverse, size_t limit, Permutation & res) const
+struct Utf8MB4BinCmp
 {
-    size_t s = offsets.size();
-    res.resize(s);
-    for (size_t i = 0; i < s; ++i)
-        res[i] = i;
-
-    if (limit >= s)
-        limit = 0;
+    static FLATTEN_INLINE_PURE inline int compare(const char * s1, size_t length1, const char * s2, size_t length2)
+    {
+        return DB::BinCollatorCompare<true>(s1, length1, s2, length2);
+    }
+};
 
-    if (limit)
+// common util functions
+template <>
+struct ColumnString::LessWithCollation<false, void>
+{
+    // `CollationCmpImpl` must implement function `int compare(const char *, size_t, const char *, size_t)`.
+    template <typename CollationCmpImpl>
+    static void getPermutationWithCollationImpl(const ColumnString & src, const CollationCmpImpl & collator_cmp_impl, bool reverse, size_t limit, Permutation & res)
     {
-        if (reverse)
-            std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<false>(*this, collator));
+        size_t s = src.offsets.size();
+        res.resize(s);
+        for (size_t i = 0; i < s; ++i)
+            res[i] = i;
+
+        if (limit >= s)
+            limit = 0;
+
+        if (limit)
+        {
+            if (reverse)
+                std::partial_sort(res.begin(), res.begin() + limit, res.end(), LessWithCollation<false, CollationCmpImpl>(src, collator_cmp_impl));
+            else
+                std::partial_sort(res.begin(), res.begin() + limit, res.end(), LessWithCollation<true, CollationCmpImpl>(src, collator_cmp_impl));
+        }
         else
-            std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<true>(*this, collator));
+        {
+            if (reverse)
+                std::sort(res.begin(), res.end(), LessWithCollation<false, CollationCmpImpl>(src, collator_cmp_impl));
+            else
+                std::sort(res.begin(), res.end(), LessWithCollation<true, CollationCmpImpl>(src, collator_cmp_impl));
+        }
     }
-    else
+};
+
+void ColumnString::getPermutationWithCollationImpl(const ICollator & collator, bool reverse, size_t limit, Permutation & res) const
+{
+    using PermutationWithCollationUtils = ColumnString::LessWithCollation<false, void>;
+
+    // optimize path for default collator `UTF8MB4_BIN`
+    if (TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_BIN) == &collator)
     {
-        if (reverse)
-            std::sort(res.begin(), res.end(), lessWithCollation<false>(*this, collator));
-        else
-            std::sort(res.begin(), res.end(), lessWithCollation<true>(*this, collator));
+        Utf8MB4BinCmp cmp_impl;
+        PermutationWithCollationUtils::getPermutationWithCollationImpl(*this, cmp_impl, reverse, limit, res);
+        ///
+        return;
+    }
+
+    {
+        PermutationWithCollationUtils::getPermutationWithCollationImpl(*this, collator, reverse, limit, res);
     }
 }
 
@@ -377,35 +421,73 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash, const TiDB::TiDBCollatorP
     if (hash.getData().size() != s)
         throw Exception(fmt::format("Size of WeakHash32 does not match size of column: column size is {}, hash size is {}", s, hash.getData().size()), ErrorCodes::LOGICAL_ERROR);
 
-    const UInt8 * pos = chars.data();
     UInt32 * hash_data = hash.getData().data();
-    Offset prev_offset = 0;
 
     if (collator != nullptr)
     {
-        for (const auto & offset : offsets)
+        if (collator->getCollatorId() == TiDB::ITiDBCollator::UTF8MB4_BIN)
         {
-            auto str_size = offset - prev_offset;
-            /// Skip last zero byte.
-            auto sort_key = collator->sortKey(reinterpret_cast<const char *>(pos), str_size - 1, sort_key_container);
-            *hash_data = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(sort_key.data), sort_key.size, *hash_data);
-
-            pos += str_size;
-            prev_offset = offset;
+            // Skip last zero byte.
+            LoopOneColumn(chars, offsets, offsets.size(), [&](const std::string_view & view, size_t) {
+                auto sort_key = BinCollatorSortKey<true>(view.data(), view.size());
+                *hash_data = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(sort_key.data), sort_key.size, *hash_data);
+                ++hash_data;
+            });
+        }
+        else
+        {
+            // Skip last zero byte.
+            LoopOneColumn(chars, offsets, offsets.size(), [&](const std::string_view & view, size_t) {
+                auto sort_key = collator->sortKey(view.data(), view.size(), sort_key_container);
+                *hash_data = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(sort_key.data), sort_key.size, *hash_data);
+                ++hash_data;
+            });
+        }
+    }
+    else
+    {
+        // Skip last zero byte.
+        LoopOneColumn(chars, offsets, offsets.size(), [&](const std::string_view & view, size_t) {
+            *hash_data = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(view.data()), view.size(), *hash_data);
             ++hash_data;
+        });
+    }
+}
+
+void ColumnString::updateHashWithValues(IColumn::HashValues & hash_values, const TiDB::TiDBCollatorPtr & collator, String & sort_key_container) const
+{
+    if (collator != nullptr)
+    {
+        if (collator->getCollatorId() == TiDB::ITiDBCollator::UTF8MB4_BIN)
+        {
+            // Skip last zero byte.
+            LoopOneColumn(chars, offsets, offsets.size(), [&hash_values](const std::string_view & view, size_t i) {
+                auto sort_key = BinCollatorSortKey<true>(view.data(), view.size());
+                size_t string_size = sort_key.size;
+                hash_values[i].update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
+                hash_values[i].update(sort_key.data, sort_key.size);
+            });
+        }
+        else
+        {
+            // Skip last zero byte.
+            LoopOneColumn(chars, offsets, offsets.size(), [&](const std::string_view & view, size_t i) {
+                auto sort_key = collator->sortKey(view.data(), view.size(), sort_key_container);
+                size_t string_size = sort_key.size;
+                hash_values[i].update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
+                hash_values[i].update(sort_key.data, sort_key.size);
+            });
         }
     }
     else
     {
-        for (const auto & offset : offsets)
+        for (size_t i = 0; i < offsets.size(); ++i)
         {
-            auto str_size = offset - prev_offset;
-            /// Skip last zero byte.
-            *hash_data = ::updateWeakHash32(pos, str_size - 1, *hash_data);
+            size_t string_size = sizeAt(i);
+            size_t offset = offsetAt(i);
 
-            pos += str_size;
-            prev_offset = offset;
-            ++hash_data;
+            hash_values[i].update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
+            hash_values[i].update(reinterpret_cast<const char *>(&chars[offset]), string_size);
         }
     }
 }
diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h
index 48b02388a6c..2204319e090 100644
--- a/dbms/src/Columns/ColumnString.h
+++ b/dbms/src/Columns/ColumnString.h
@@ -52,8 +52,8 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
     template <bool positive>
     struct less;
 
-    template <bool positive>
-    struct lessWithCollation;
+    template <bool positive, typename Derived>
+    struct LessWithCollation;
 
     ColumnString() = default;
 
@@ -118,7 +118,7 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
 
     void insert(const Field & x) override
     {
-        const String & s = DB::get<const String &>(x);
+        const auto & s = DB::get<const String &>(x);
         const size_t old_size = chars.size();
         const size_t size_to_append = s.size() + 1;
         const size_t new_size = old_size + size_to_append;
@@ -134,7 +134,7 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
 
     void insertFrom(const IColumn & src_, size_t n) override
     {
-        const ColumnString & src = static_cast<const ColumnString &>(src_);
+        const auto & src = static_cast<const ColumnString &>(src_);
 
         if (n != 0)
         {
@@ -213,7 +213,7 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
 
         if (collator != nullptr)
         {
-            /// Skip last zero byte.
+            // Skip last zero byte.
             auto sort_key = collator->sortKey(reinterpret_cast<const char *>(src), string_size - 1, sort_key_container);
             string_size = sort_key.size;
             src = sort_key.data;
@@ -246,7 +246,8 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
         size_t offset = offsetAt(n);
         if (collator != nullptr)
         {
-            auto sort_key = collator->sortKey(reinterpret_cast<const char *>(&chars[offset]), string_size, sort_key_container);
+            // Skip last zero byte.
+            auto sort_key = collator->sortKey(reinterpret_cast<const char *>(&chars[offset]), string_size - 1, sort_key_container);
             string_size = sort_key.size;
             hash.update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
             hash.update(sort_key.data, sort_key.size);
@@ -258,33 +259,7 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
         }
     }
 
-    void updateHashWithValues(IColumn::HashValues & hash_values, const TiDB::TiDBCollatorPtr & collator, String & sort_key_container) const override
-    {
-        if (collator != nullptr)
-        {
-            for (size_t i = 0; i < offsets.size(); ++i)
-            {
-                size_t string_size = sizeAt(i);
-                size_t offset = offsetAt(i);
-
-                auto sort_key = collator->sortKey(reinterpret_cast<const char *>(&chars[offset]), string_size, sort_key_container);
-                string_size = sort_key.size;
-                hash_values[i].update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
-                hash_values[i].update(sort_key.data, sort_key.size);
-            }
-        }
-        else
-        {
-            for (size_t i = 0; i < offsets.size(); ++i)
-            {
-                size_t string_size = sizeAt(i);
-                size_t offset = offsetAt(i);
-
-                hash_values[i].update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
-                hash_values[i].update(reinterpret_cast<const char *>(&chars[offset]), string_size);
-            }
-        }
-    }
+    void updateHashWithValues(IColumn::HashValues & hash_values, const TiDB::TiDBCollatorPtr & collator, String & sort_key_container) const override;
 
     void updateWeakHash32(WeakHash32 & hash, const TiDB::TiDBCollatorPtr &, String &) const override;
 
@@ -302,7 +277,7 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
 
     int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
     {
-        const ColumnString & rhs = static_cast<const ColumnString &>(rhs_);
+        const auto & rhs = static_cast<const ColumnString &>(rhs_);
 
         const size_t size = sizeAt(n);
         const size_t rhs_size = rhs.sizeAt(m);
diff --git a/dbms/src/Common/TiFlashBuildInfo.cpp b/dbms/src/Common/TiFlashBuildInfo.cpp
index f2353256b9b..6841528c065 100644
--- a/dbms/src/Common/TiFlashBuildInfo.cpp
+++ b/dbms/src/Common/TiFlashBuildInfo.cpp
@@ -60,8 +60,6 @@ std::string getEnabledFeatures()
         "jemalloc",
 #elif USE_MIMALLOC
         "mimalloc",
-#elif USE_TCMALLOC
-        "tcmalloc",
 #endif
 
 // mem-profiling
diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h
index c0ce60af01e..5b627783544 100644
--- a/dbms/src/Common/TiFlashMetrics.h
+++ b/dbms/src/Common/TiFlashMetrics.h
@@ -205,8 +205,23 @@ namespace DB
         F(type_thread_hard_limit, {"type", "thread_hard_limit"}),                                                                         \
         F(type_hard_limit_exceeded_count, {"type", "hard_limit_exceeded_count"}))                                                         \
     M(tiflash_task_scheduler_waiting_duration_seconds, "Bucketed histogram of task waiting for scheduling duration", Histogram,           \
-        F(type_task_scheduler_waiting_duration, {{"type", "task_waiting_duration"}}, ExpBuckets{0.001, 2, 20}))
-
+        F(type_task_scheduler_waiting_duration, {{"type", "task_waiting_duration"}}, ExpBuckets{0.001, 2, 20}))                           \
+    M(tiflash_storage_read_thread_counter, "The counter of storage read thread", Counter,                                                 \
+        F(type_sche_no_pool, {"type", "sche_no_pool"}),                                                                                   \
+        F(type_sche_no_slot, {"type", "sche_no_slot"}),                                                                                   \
+        F(type_sche_no_segment, {"type", "sche_no_segment"}),                                                                             \
+        F(type_sche_from_cache, {"type", "sche_from_cache"}),                                                                             \
+        F(type_sche_new_task, {"type", "sche_new_task"}),                                                                                 \
+        F(type_add_cache_succ, {"type", "add_cache_succ"}),                                                                               \
+        F(type_add_cache_stale, {"type", "add_cache_stale"}),                                                                             \
+        F(type_get_cache_miss, {"type", "get_cache_miss"}),                                                                               \
+        F(type_get_cache_part, {"type", "get_cache_part"}),                                                                               \
+        F(type_get_cache_hit, {"type", "get_cache_hit"}),                                                                                 \
+        F(type_get_cache_copy, {"type", "add_cache_copy"}))                                                                               \
+    M(tiflash_storage_read_thread_gauge, "The gauge of storage read thread", Gauge,                                                       \
+        F(type_merged_task, {"type", "merged_task"}))                                                                                     \
+    M(tiflash_storage_read_thread_seconds, "Bucketed histogram of read thread", Histogram,                                                \
+        F(type_merged_task, {{"type", "merged_task"}}, ExpBuckets{0.001, 2, 20}))
 // clang-format on
 
 struct ExpBuckets
diff --git a/dbms/src/Common/config_build.cpp.in b/dbms/src/Common/config_build.cpp.in
index a9fddd4723a..0ceab9a6d71 100644
--- a/dbms/src/Common/config_build.cpp.in
+++ b/dbms/src/Common/config_build.cpp.in
@@ -47,8 +47,6 @@ const char * auto_config_build[]{
     "@GLIBC_COMPATIBILITY@",
     "USE_JEMALLOC",
     "@USE_JEMALLOC@",
-    "USE_TCMALLOC",
-    "@USE_TCMALLOC@",
     "USE_MIMALLOC",
     "@USE_MIMALLOC@",
     "USE_UNWIND",
diff --git a/dbms/src/Common/tests/allocator_perf.cpp b/dbms/src/Common/tests/allocator_perf.cpp
index 2298cc63089..016967f6776 100644
--- a/dbms/src/Common/tests/allocator_perf.cpp
+++ b/dbms/src/Common/tests/allocator_perf.cpp
@@ -94,13 +94,11 @@ void dump_profile(int type)
 {
     fmt::print(
         R"raw(USE_JEMALLOC={};USE_JEMALLOC_PROF={}
-USE_TCMALLOC={}
 USE_MIMALLOC={}
 type={}
 )raw",
         USE_JEMALLOC,
         USE_JEMALLOC_PROF,
-        USE_TCMALLOC,
         USE_MIMALLOC,
         type);
 }
diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp
index 22545327edd..134645e56e3 100644
--- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp
+++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp
@@ -257,6 +257,8 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery)
             if (subquery.join)
                 subquery.join->setTotals(profiling_in->getTotals());
         }
+        if (subquery.join)
+            head_rows = subquery.join->getTotalBuildInputRows();
 
         if (head_rows != 0)
         {
diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp
index cd9d6235f52..4d85abcb3b2 100644
--- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp
+++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp
@@ -195,10 +195,9 @@ void ParallelAggregatingBlockInputStream::Handler::onException(std::exception_pt
     Int32 old_value = -1;
     parent.first_exception_index.compare_exchange_strong(old_value, static_cast<Int32>(thread_num), std::memory_order_seq_cst, std::memory_order_relaxed);
 
-    /// can not cancel parent inputStream or the exception might be lost
     if (!parent.executed)
         /// use cancel instead of kill to avoid too many useless error message
-        parent.processor.cancel(false);
+        parent.cancel(false);
 }
 
 
diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h
index 7bfc67afcad..1e9d2d3628e 100644
--- a/dbms/src/Flash/Coprocessor/DAGContext.h
+++ b/dbms/src/Flash/Coprocessor/DAGContext.h
@@ -52,6 +52,7 @@ struct JoinExecuteInfo
     String build_side_root_executor_id;
     JoinPtr join_ptr;
     BlockInputStreams non_joined_streams;
+    BlockInputStreams join_build_streams;
 };
 
 using MPPTunnelSetPtr = std::shared_ptr<MPPTunnelSet>;
@@ -133,7 +134,6 @@ class DAGContext
         , collect_execution_summaries(dag_request->has_collect_execution_summaries() && dag_request->collect_execution_summaries())
         , is_mpp_task(false)
         , is_root_mpp_task(false)
-        , tunnel_set(nullptr)
         , flags(dag_request->flags())
         , sql_mode(dag_request->sql_mode())
         , max_recorded_error_count(getMaxErrorCount(*dag_request))
@@ -153,7 +153,6 @@ class DAGContext
         , return_executor_id(true)
         , is_mpp_task(true)
         , is_root_mpp_task(is_root_mpp_task_)
-        , tunnel_set(nullptr)
         , flags(dag_request->flags())
         , sql_mode(dag_request->sql_mode())
         , mpp_task_meta(meta_)
@@ -175,7 +174,6 @@ class DAGContext
         , collect_execution_summaries(false)
         , is_mpp_task(false)
         , is_root_mpp_task(false)
-        , tunnel_set(nullptr)
         , flags(0)
         , sql_mode(0)
         , max_recorded_error_count(max_error_count_)
@@ -190,7 +188,6 @@ class DAGContext
         , initialize_concurrency(concurrency)
         , is_mpp_task(true)
         , is_root_mpp_task(false)
-        , tunnel_set(nullptr)
         , log(Logger::get(log_identifier))
         , flags(dag_request->flags())
         , sql_mode(dag_request->sql_mode())
@@ -343,6 +340,7 @@ class DAGContext
     bool is_mpp_task = false;
     bool is_root_mpp_task = false;
     bool is_batch_cop = false;
+    // `tunnel_set` is always set by `MPPTask` and is intended to be used for `DAGQueryBlockInterpreter`.
     MPPTunnelSetPtr tunnel_set;
     TablesRegionsInfo tables_regions_info;
     // part of regions_for_local_read + regions_for_remote_read, only used for batch-cop
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index d67a8f6ec52..7fa32c316a1 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -264,6 +264,8 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
 
     recordJoinExecuteInfo(tiflash_join.build_side_index, join_ptr);
 
+    auto & join_execute_info = dagContext().getJoinExecuteInfoMap()[query_block.source_name];
+
     size_t join_build_concurrency = settings.join_concurrent_build ? std::min(max_streams, build_pipeline.streams.size()) : 1;
 
     /// build side streams
@@ -274,6 +276,7 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
         stream = std::make_shared<HashJoinBuildBlockInputStream>(stream, join_ptr, get_concurrency_build_index(), log->identifier());
         stream->setExtraInfo(
             fmt::format("join build, build_side_root_executor_id = {}", dagContext().getJoinExecuteInfoMap()[query_block.source_name].build_side_root_executor_id));
+        join_execute_info.join_build_streams.push_back(stream);
     });
     // for test, join executor need the return blocks to output.
     executeUnion(build_pipeline, max_streams, log, /*ignore_block=*/!dagContext().isTest(), "for join");
@@ -294,7 +297,6 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
     /// add join input stream
     if (is_tiflash_right_join)
     {
-        auto & join_execute_info = dagContext().getJoinExecuteInfoMap()[query_block.source_name];
         size_t not_joined_concurrency = join_ptr->getNotJoinedStreamConcurrency();
         for (size_t i = 0; i < not_joined_concurrency; ++i)
         {
diff --git a/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp b/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp
index 33f6d99f9d8..fe4e9c23bca 100644
--- a/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp
@@ -120,17 +120,13 @@ void DAGResponseWriter::addExecuteSummaries(tipb::SelectResponse & response, boo
                 auto it = dag_context.getJoinExecuteInfoMap().find(join_executor_id);
                 if (it != dag_context.getJoinExecuteInfoMap().end())
                 {
-                    auto build_side_it = dag_context.getProfileStreamsMap().find(it->second.build_side_root_executor_id);
-                    if (build_side_it != dag_context.getProfileStreamsMap().end())
+                    UInt64 process_time_for_build = 0;
+                    for (const auto & join_build_stream : it->second.join_build_streams)
                     {
-                        UInt64 process_time_for_build = 0;
-                        for (const auto & join_build_stream : build_side_it->second)
-                        {
-                            if (auto * p_stream = dynamic_cast<IProfilingBlockInputStream *>(join_build_stream.get()))
-                                process_time_for_build = std::max(process_time_for_build, p_stream->getProfileInfo().execution_time);
-                        }
-                        current.time_processed_ns += process_time_for_build;
+                        if (auto * p_stream = dynamic_cast<IProfilingBlockInputStream *>(join_build_stream.get()); p_stream)
+                            process_time_for_build = std::max(process_time_for_build, p_stream->getProfileInfo().execution_time);
                     }
+                    current.time_processed_ns += process_time_for_build;
                 }
             }
         }
diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp
index 390ce7b9948..fb4a03f0999 100644
--- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp
@@ -599,6 +599,7 @@ std::unordered_map<TableID, SelectQueryInfo> DAGStorageInterpreter::generateSele
             analyzer->getCurrentInputColumns(),
             context.getTimezoneInfo());
         query_info.req_id = fmt::format("{} Table<{}>", log->identifier(), table_id);
+        query_info.keep_order = table_scan.keepOrder();
         return query_info;
     };
     if (table_scan.isPartitionTableScan())
diff --git a/dbms/src/Flash/Coprocessor/TiDBTableScan.cpp b/dbms/src/Flash/Coprocessor/TiDBTableScan.cpp
index 7d7ad2f2b57..dca738a2e6c 100644
--- a/dbms/src/Flash/Coprocessor/TiDBTableScan.cpp
+++ b/dbms/src/Flash/Coprocessor/TiDBTableScan.cpp
@@ -24,6 +24,9 @@ TiDBTableScan::TiDBTableScan(
     , executor_id(executor_id_)
     , is_partition_table_scan(table_scan->tp() == tipb::TypePartitionTableScan)
     , columns(is_partition_table_scan ? table_scan->partition_table_scan().columns() : table_scan->tbl_scan().columns())
+    // Only No-partition table need keep order when tablescan executor required keep order.
+    // If keep_order is not set, keep order for safety.
+    , keep_order(!is_partition_table_scan && (table_scan->tbl_scan().keep_order() || !table_scan->tbl_scan().has_keep_order()))
 {
     if (is_partition_table_scan)
     {
diff --git a/dbms/src/Flash/Coprocessor/TiDBTableScan.h b/dbms/src/Flash/Coprocessor/TiDBTableScan.h
index 6ac07d326f6..a5a463a8ff2 100644
--- a/dbms/src/Flash/Coprocessor/TiDBTableScan.h
+++ b/dbms/src/Flash/Coprocessor/TiDBTableScan.h
@@ -51,6 +51,10 @@ class TiDBTableScan
     {
         return executor_id;
     }
+    bool keepOrder() const
+    {
+        return keep_order;
+    }
 
 private:
     const tipb::Executor * table_scan;
@@ -66,6 +70,7 @@ class TiDBTableScan
     /// physical_table_ids contains the table ids of its partitions
     std::vector<Int64> physical_table_ids;
     Int64 logical_table_id;
+    bool keep_order;
 };
 
 } // namespace DB
diff --git a/dbms/src/Flash/EstablishCall.cpp b/dbms/src/Flash/EstablishCall.cpp
index 6dece74d007..32b2a803035 100644
--- a/dbms/src/Flash/EstablishCall.cpp
+++ b/dbms/src/Flash/EstablishCall.cpp
@@ -86,7 +86,7 @@ void EstablishCallData::initRpc()
     }
     if (eptr)
     {
-        state = FINISH;
+        setFinishState("initRpc called");
         grpc::Status status(static_cast<grpc::StatusCode>(GRPC_STATUS_UNKNOWN), getExceptionMessage(eptr, false));
         responderFinish(status);
     }
@@ -112,9 +112,20 @@ void EstablishCallData::writeErr(const mpp::MPPDataPacket & packet)
         err_status = grpc::Status(grpc::StatusCode::UNKNOWN, "Write error message failed for unknown reason.");
 }
 
-void EstablishCallData::writeDone(const ::grpc::Status & status)
+void EstablishCallData::setFinishState(const String & msg)
 {
     state = FINISH;
+    if (async_tunnel_sender && !async_tunnel_sender->isConsumerFinished())
+    {
+        async_tunnel_sender->consumerFinish(fmt::format("{}: {}",
+                                                        async_tunnel_sender->getTunnelId(),
+                                                        msg)); //trigger mpp tunnel finish work
+    }
+}
+
+void EstablishCallData::writeDone(const ::grpc::Status & status)
+{
+    setFinishState("writeDone called");
     if (stopwatch)
     {
         LOG_FMT_INFO(async_tunnel_sender->getLogger(), "connection for {} cost {} ms.", async_tunnel_sender->getTunnelId(), stopwatch->elapsedMilliseconds());
@@ -140,12 +151,7 @@ void EstablishCallData::cancel()
 
 void EstablishCallData::finishTunnelAndResponder()
 {
-    state = FINISH;
-    if (async_tunnel_sender)
-    {
-        async_tunnel_sender->consumerFinish(fmt::format("{}: finishTunnelAndResponder called.",
-                                                        async_tunnel_sender->getTunnelId())); //trigger mpp tunnel finish work
-    }
+    setFinishState("finishTunnelAndResponder called");
     grpc::Status status(static_cast<grpc::StatusCode>(GRPC_STATUS_UNKNOWN), "Consumer exits unexpected, grpc writes failed.");
     responder.Finish(status, this);
 }
@@ -174,7 +180,6 @@ void EstablishCallData::proceed()
     }
     else if (state == ERR_HANDLE)
     {
-        state = FINISH;
         writeDone(err_status);
     }
     else
diff --git a/dbms/src/Flash/EstablishCall.h b/dbms/src/Flash/EstablishCall.h
index 4a62e5f0c6b..1faa988a49c 100644
--- a/dbms/src/Flash/EstablishCall.h
+++ b/dbms/src/Flash/EstablishCall.h
@@ -84,6 +84,9 @@ class EstablishCallData : public PacketWriter
 
     void finishTunnelAndResponder();
 
+    // will try to call async_sender's consumerFinish if needed
+    void setFinishState(const String & msg);
+
     void responderFinish(const grpc::Status & status);
 
     std::mutex mu;
diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp
index 582fe3c2796..716d3c0207a 100644
--- a/dbms/src/Flash/Mpp/MPPTunnel.cpp
+++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp
@@ -349,11 +349,18 @@ void SyncTunnelSender::startSendThread()
 
 void AsyncTunnelSender::tryFlushOne()
 {
+    // When consumer finished, sending work is done already, just return
+    if (consumer_state.msgHasSet())
+        return;
     writer->tryFlushOne();
 }
 
 void AsyncTunnelSender::sendOne()
 {
+    // When consumer finished, sending work is done already, just return
+    if (consumer_state.msgHasSet())
+        return;
+
     String err_msg;
     bool queue_empty_flag = false;
     try
diff --git a/dbms/src/Flash/Statistics/ExecutorStatistics.h b/dbms/src/Flash/Statistics/ExecutorStatistics.h
index 4ed5bb02aaa..6b5fb88a971 100644
--- a/dbms/src/Flash/Statistics/ExecutorStatistics.h
+++ b/dbms/src/Flash/Statistics/ExecutorStatistics.h
@@ -84,10 +84,11 @@ class ExecutorStatistics : public ExecutorStatisticsBase
         {
             for (const auto & input_stream : it->second)
             {
-                auto * p_stream = dynamic_cast<IProfilingBlockInputStream *>(input_stream.get());
-                assert(p_stream);
-                const auto & profile_info = p_stream->getProfileInfo();
-                base.append(profile_info);
+                if (auto * p_stream = dynamic_cast<IProfilingBlockInputStream *>(input_stream.get()); p_stream)
+                {
+                    const auto & profile_info = p_stream->getProfileInfo();
+                    base.append(profile_info);
+                }
             }
         }
         if constexpr (ExecutorImpl::has_extra_info)
diff --git a/dbms/src/Flash/Statistics/JoinImpl.cpp b/dbms/src/Flash/Statistics/JoinImpl.cpp
index 35f8c5b40bd..9faf3da16eb 100644
--- a/dbms/src/Flash/Statistics/JoinImpl.cpp
+++ b/dbms/src/Flash/Statistics/JoinImpl.cpp
@@ -21,13 +21,18 @@ void JoinStatistics::appendExtraJson(FmtBuffer & fmt_buffer) const
 {
     fmt_buffer.fmtAppend(
         R"("hash_table_bytes":{},"build_side_child":"{}",)"
-        R"("non_joined_outbound_rows":{},"non_joined_outbound_blocks":{},"non_joined_outbound_bytes":{},"non_joined_execution_time_ns":{})",
+        R"("non_joined_outbound_rows":{},"non_joined_outbound_blocks":{},"non_joined_outbound_bytes":{},"non_joined_execution_time_ns":{},)"
+        R"("join_build_inbound_rows":{},"join_build_inbound_blocks":{},"join_build_inbound_bytes":{},"join_build_execution_time_ns":{})",
         hash_table_bytes,
         build_side_child,
         non_joined_base.rows,
         non_joined_base.blocks,
         non_joined_base.bytes,
-        non_joined_base.execution_time_ns);
+        non_joined_base.execution_time_ns,
+        join_build_base.rows,
+        join_build_base.blocks,
+        join_build_base.bytes,
+        join_build_base.execution_time_ns);
 }
 
 void JoinStatistics::collectExtraRuntimeDetail()
@@ -41,10 +46,19 @@ void JoinStatistics::collectExtraRuntimeDetail()
         build_side_child = join_execute_info.build_side_root_executor_id;
         for (const auto & non_joined_stream : join_execute_info.non_joined_streams)
         {
-            auto * p_stream = dynamic_cast<IProfilingBlockInputStream *>(non_joined_stream.get());
-            assert(p_stream);
-            const auto & profile_info = p_stream->getProfileInfo();
-            non_joined_base.append(profile_info);
+            if (auto * p_stream = dynamic_cast<IProfilingBlockInputStream *>(non_joined_stream.get()); p_stream)
+            {
+                const auto & profile_info = p_stream->getProfileInfo();
+                non_joined_base.append(profile_info);
+            }
+        }
+        for (const auto & join_build_stream : join_execute_info.join_build_streams)
+        {
+            if (auto * p_stream = dynamic_cast<IProfilingBlockInputStream *>(join_build_stream.get()); p_stream)
+            {
+                const auto & profile_info = p_stream->getProfileInfo();
+                join_build_base.append(profile_info);
+            }
         }
     }
 }
diff --git a/dbms/src/Flash/Statistics/JoinImpl.h b/dbms/src/Flash/Statistics/JoinImpl.h
index a9e0ef8418e..3fedb77f2ff 100644
--- a/dbms/src/Flash/Statistics/JoinImpl.h
+++ b/dbms/src/Flash/Statistics/JoinImpl.h
@@ -44,6 +44,8 @@ class JoinStatistics : public JoinStatisticsBase
 
     BaseRuntimeStatistics non_joined_base;
 
+    BaseRuntimeStatistics join_build_base;
+
 protected:
     void appendExtraJson(FmtBuffer &) const override;
     void collectExtraRuntimeDetail() override;
diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp
new file mode 100644
index 00000000000..0cf865b5d20
--- /dev/null
+++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp
@@ -0,0 +1,347 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <TestUtils/ExecutorTestUtils.h>
+#include <TestUtils/mockExecutor.h>
+
+namespace DB
+{
+namespace tests
+{
+
+#define DT DecimalField<Decimal32>
+#define COL_GROUP2(a, b)                               \
+    {                                                  \
+        col(types_col_name[a]), col(types_col_name[b]) \
+    }
+#define COL_PROJ2(a, b)                      \
+    {                                        \
+        types_col_name[a], types_col_name[b] \
+    }
+
+class ExecutorAggTestRunner : public DB::tests::ExecutorTest
+{
+public:
+    using ColStringNullableType = std::optional<typename TypeTraits<String>::FieldType>;
+    using ColInt8NullableType = std::optional<typename TypeTraits<Int8>::FieldType>;
+    using ColInt16NullableType = std::optional<typename TypeTraits<Int16>::FieldType>;
+    using ColInt32NullableType = std::optional<typename TypeTraits<Int32>::FieldType>;
+    using ColInt64NullableType = std::optional<typename TypeTraits<Int64>::FieldType>;
+    using ColFloat32NullableType = std::optional<typename TypeTraits<Float32>::FieldType>;
+    using ColFloat64NullableType = std::optional<typename TypeTraits<Float64>::FieldType>;
+    using ColMyDateNullableType = std::optional<typename TypeTraits<MyDate>::FieldType>;
+    using ColMyDateTimeNullableType = std::optional<typename TypeTraits<MyDateTime>::FieldType>;
+    using ColDecimalNullableType = std::optional<typename TypeTraits<Decimal32>::FieldType>;
+    using ColUInt64Type = typename TypeTraits<UInt64>::FieldType;
+
+    using ColumnWithNullableString = std::vector<ColStringNullableType>;
+    using ColumnWithNullableInt8 = std::vector<ColInt8NullableType>;
+    using ColumnWithNullableInt16 = std::vector<ColInt16NullableType>;
+    using ColumnWithNullableInt32 = std::vector<ColInt32NullableType>;
+    using ColumnWithNullableInt64 = std::vector<ColInt64NullableType>;
+    using ColumnWithNullableFloat32 = std::vector<ColFloat32NullableType>;
+    using ColumnWithNullableFloat64 = std::vector<ColFloat64NullableType>;
+    using ColumnWithNullableMyDate = std::vector<ColMyDateNullableType>;
+    using ColumnWithNullableMyDateTime = std::vector<ColMyDateTimeNullableType>;
+    using ColumnWithNullableDecimal = std::vector<ColDecimalNullableType>;
+    using ColumnWithUInt64 = std::vector<ColUInt64Type>;
+
+    void initializeContext() override
+    {
+        ExecutorTest::initializeContext();
+
+        /// Create table for tests of group by
+        context.addMockTable(/* name= */ {db_name, table_types},
+                             /* columnInfos= */
+                             {{types_col_name[0], TiDB::TP::TypeLong},
+                              {types_col_name[1], TiDB::TP::TypeDecimal},
+                              {types_col_name[2], TiDB::TP::TypeTiny},
+                              {types_col_name[3], TiDB::TP::TypeShort},
+                              {types_col_name[4], TiDB::TP::TypeLong},
+                              {types_col_name[5], TiDB::TP::TypeLongLong},
+                              {types_col_name[6], TiDB::TP::TypeFloat},
+                              {types_col_name[7], TiDB::TP::TypeDouble},
+                              {types_col_name[8], TiDB::TP::TypeDate},
+                              {types_col_name[9], TiDB::TP::TypeDatetime},
+                              {types_col_name[10], TiDB::TP::TypeString}},
+                             /* columns= */
+                             {toNullableVec<Int32>(types_col_name[0], col_id),
+                              toNullableVec<Decimal32>(types_col_name[1], col_decimal),
+                              toNullableVec<Int8>(types_col_name[2], col_tinyint),
+                              toNullableVec<Int16>(types_col_name[3], col_smallint),
+                              toNullableVec<Int32>(types_col_name[4], col_int),
+                              toNullableVec<Int64>(types_col_name[5], col_bigint),
+                              toNullableVec<Float32>(types_col_name[6], col_float),
+                              toNullableVec<Float64>(types_col_name[7], col_double),
+                              toNullableVec<MyDate>(types_col_name[8], col_mydate),
+                              toNullableVec<MyDateTime>(types_col_name[9], col_mydatetime),
+                              toNullableVec<String>(types_col_name[10], col_string)});
+
+        /// Create table for tests of aggregation functions
+        context.addMockTable(/* name= */ {db_name, table_name},
+                             /* columnInfos= */
+                             {{col_name[0], TiDB::TP::TypeLong},
+                              {col_name[1], TiDB::TP::TypeString},
+                              {col_name[2], TiDB::TP::TypeString},
+                              {col_name[3], TiDB::TP::TypeDouble}},
+                             /* columns= */
+                             {toNullableVec<Int32>(col_name[0], col_age),
+                              toNullableVec<String>(col_name[1], col_gender),
+                              toNullableVec<String>(col_name[2], col_country),
+                              toNullableVec<Float64>(col_name[3], col_salary)});
+
+        context.addMockTable({"aggnull_test", "t1"},
+                             {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}},
+                             {toNullableVec<String>("s1", {"banana", {}, "banana"}),
+                              toNullableVec<String>("s2", {"apple", {}, "banana"})});
+    }
+
+    std::shared_ptr<tipb::DAGRequest> buildDAGRequest(std::pair<String, String> src, MockAstVec agg_funcs, MockAstVec group_by_exprs, MockColumnNameVec proj)
+    {
+        /// We can filter the group by column with project operator.
+        /// project is applied to get partial aggregation output, so that we can remove redundant outputs and compare results with less handwriting codes.
+        return context.scan(src.first, src.second).aggregation(agg_funcs, group_by_exprs).project(proj).build(context);
+    }
+
+    void executeWithConcurrency(const std::shared_ptr<tipb::DAGRequest> & request, const ColumnsWithTypeAndName & expect_columns)
+    {
+        for (size_t i = 1; i <= max_concurrency; i += step)
+            ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i));
+    }
+
+    static const size_t max_concurrency = 10;
+    static const size_t step = 2;
+
+    const String db_name{"test_db"};
+
+    /// Prepare some data and names for tests of group by
+    const String table_types{"types"};
+    const std::vector<String> types_col_name{"id", "decimal_", "tinyint_", "smallint_", "int_", "bigint_", "float_", "double_", "date_", "datetime_", "string_"};
+    ColumnWithNullableInt32 col_id{1, 2, 3, 4, 5, 6, 7, 8, 9};
+    ColumnWithNullableDecimal col_decimal{DT(55, 1), {}, DT(-24, 1), DT(40, 1), DT(-40, 1), DT(40, 1), {}, DT(55, 1), DT(0, 1)};
+    ColumnWithNullableInt8 col_tinyint{1, 2, 3, {}, {}, 0, 0, -1, -2};
+    ColumnWithNullableInt16 col_smallint{2, 3, {}, {}, 0, -1, -2, 4, 0};
+    ColumnWithNullableInt32 col_int{4, {}, {}, 0, 123, -1, -1, 123, 4};
+    ColumnWithNullableInt64 col_bigint{2, 2, {}, 0, -1, {}, -1, 0, 123};
+    ColumnWithNullableFloat32 col_float{3.3, {}, 0, 4.0, 3.3, 5.6, -0.1, -0.1, {}};
+    ColumnWithNullableFloat64 col_double{0.1, 0, 1.1, 1.1, 1.2, {}, {}, -1.2, -1.2};
+    ColumnWithNullableMyDate col_mydate{1000000, 2000000, {}, 300000, 1000000, {}, 0, 2000000, {}};
+    ColumnWithNullableMyDateTime col_mydatetime{2000000, 0, {}, 3000000, 1000000, {}, 0, 2000000, 1000000};
+    ColumnWithNullableString col_string{{}, "pingcap", "PingCAP", {}, "PINGCAP", "PingCAP", {}, "Shanghai", "Shanghai"};
+
+    /// Prepare some data and names for aggregation functions
+    const String table_name{"clerk"};
+    const std::vector<String> col_name{"age", "gender", "country", "salary"};
+    ColumnWithNullableInt32 col_age{30, {}, 27, 32, 25, 36, {}, 22, 34};
+    ColumnWithNullableString col_gender{
+        "male",
+        "female",
+        "female",
+        "male",
+        "female",
+        "female",
+        "male",
+        "female",
+        "male",
+    };
+    ColumnWithNullableString col_country{"russia", "korea", "usa", "usa", "usa", "china", "china", "china", "china"};
+    ColumnWithNullableFloat64 col_salary{1000.1, 1300.2, 0.3, {}, -200.4, 900.5, -999.6, 2000.7, -300.8};
+};
+
+/// Guarantee the correctness of group by
+TEST_F(ExecutorAggTestRunner, GroupBy)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    std::shared_ptr<tipb::DAGRequest> request;
+    std::vector<MockAstVec> group_by_exprs;
+    std::vector<MockColumnNameVec> projections;
+    std::vector<ColumnsWithTypeAndName> expect_cols;
+    size_t test_num;
+
+    {
+        /// group by single column
+        group_by_exprs = {{col(types_col_name[2])}, {col(types_col_name[3])}, {col(types_col_name[4])}, {col(types_col_name[5])}, {col(types_col_name[6])}, {col(types_col_name[7])}, {col(types_col_name[8])}, {col(types_col_name[9])}, {col(types_col_name[10])}};
+        projections = {{types_col_name[2]}, {types_col_name[3]}, {types_col_name[4]}, {types_col_name[5]}, {types_col_name[6]}, {types_col_name[7]}, {types_col_name[8]}, {types_col_name[9]}, {types_col_name[10]}};
+        expect_cols = {
+            {toNullableVec<Int8>(types_col_name[2], ColumnWithNullableInt8{-1, 2, {}, 0, 1, 3, -2})}, /// select tinyint_ from test_db.types group by tinyint_;
+            {toNullableVec<Int16>(types_col_name[3], ColumnWithNullableInt16{-1, 2, -2, {}, 0, 4, 3})}, /// select smallint_ from test_db.types group by smallint_;
+            {toNullableVec<Int32>(types_col_name[4], ColumnWithNullableInt32{-1, {}, 4, 0, 123})}, /// select int_ from test_db.types group by int_;
+            {toNullableVec<Int64>(types_col_name[5], ColumnWithNullableInt64{2, -1, 0, 123, {}})}, /// select bigint_ from test_db.types group by bigint_;
+            {toNullableVec<Float32>(types_col_name[6], ColumnWithNullableFloat32{0, 4, 3.3, {}, 5.6, -0.1})}, /// select float_ from test_db.types group by float_;
+            {toNullableVec<Float64>(types_col_name[7], ColumnWithNullableFloat64{0, {}, -1.2, 1.1, 1.2, 0.1})}, /// select double_ from test_db.types group by double_;
+            {toNullableVec<MyDate>(types_col_name[8], ColumnWithNullableMyDate{{}, 0, 300000, 1000000, 2000000})}, /// select date_ from test_db.types group by date_;
+            {toNullableVec<MyDateTime>(types_col_name[9], ColumnWithNullableMyDateTime{{}, 0, 1000000, 2000000, 3000000})}, /// select datetime_ from test_db.types group by datetime_;
+            {toNullableVec<String>(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", "PINGCAP", "Shanghai"})}}; /// select string_ from test_db.types group by string_;
+        test_num = expect_cols.size();
+        ASSERT_EQ(group_by_exprs.size(), test_num);
+        ASSERT_EQ(projections.size(), test_num);
+
+        for (size_t i = 0; i < test_num; ++i)
+        {
+            request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]);
+            executeWithConcurrency(request, expect_cols[i]);
+        }
+    }
+
+    {
+        /// group by two columns
+        group_by_exprs = {COL_GROUP2(2, 6), COL_GROUP2(3, 9), COL_GROUP2(4, 7), COL_GROUP2(5, 10), COL_GROUP2(8, 9), COL_GROUP2(9, 10)};
+        projections = {COL_PROJ2(2, 6), COL_PROJ2(3, 9), COL_PROJ2(4, 7), COL_PROJ2(5, 10), COL_PROJ2(8, 9), COL_PROJ2(9, 10)};
+        expect_cols = {/// select tinyint_, float_ from test_db.types group by tinyint_, float_;
+                       {toNullableVec<Int8>(types_col_name[2], ColumnWithNullableInt8{1, 2, {}, 3, 0, 0, -1, {}, -2}),
+                        toNullableVec<Float32>(types_col_name[6], ColumnWithNullableFloat32{3.3, {}, 4, 0, -0.1, 5.6, -0.1, 3.3, {}})},
+                       /// select smallint_, datetime_ from test_db.types group by smallint_, datetime_;
+                       {toNullableVec<Int16>(types_col_name[3], ColumnWithNullableInt16{2, 3, {}, {}, 0, -1, -2, 4}),
+                        toNullableVec<MyDateTime>(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, {}, 0, 2000000})},
+                       /// select int_, double_ from test_db.types group by int_, double_;
+                       {toNullableVec<Int32>(types_col_name[4], ColumnWithNullableInt32{{}, 123, -1, 0, {}, 4, 4, 123}),
+                        toNullableVec<Float64>(types_col_name[7], ColumnWithNullableFloat64{0, -1.2, {}, 1.1, 1.1, -1.2, 0.1, 1.2})},
+                       /// select bigint_, string_ from test_db.types group by bigint_, string_;
+                       {toNullableVec<Int64>(types_col_name[5], ColumnWithNullableInt64{-1, 0, 0, 123, 2, {}, -1, 2}),
+                        toNullableVec<String>(types_col_name[10], ColumnWithNullableString{{}, {}, "Shanghai", "Shanghai", {}, "PingCAP", "PINGCAP", "pingcap"})},
+                       /// select date_, datetime_ from test_db.types group by date_, datetime_;
+                       {toNullableVec<MyDate>(types_col_name[8], ColumnWithNullableMyDate{1000000, 2000000, {}, 300000, 1000000, 0, 2000000, {}}),
+                        toNullableVec<MyDateTime>(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, 0, 2000000, 1000000})},
+                       /// select datetime_, string_ from test_db.types group by datetime_, string_;
+                       {toNullableVec<MyDateTime>(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, 0, 2000000, 1000000}),
+                        toNullableVec<String>(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", {}, "PINGCAP", {}, "Shanghai", "Shanghai"})}};
+        test_num = expect_cols.size();
+        ASSERT_EQ(group_by_exprs.size(), test_num);
+        ASSERT_EQ(projections.size(), test_num);
+
+        for (size_t i = 0; i < test_num; ++i)
+        {
+            request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]);
+            executeWithConcurrency(request, expect_cols[i]);
+        }
+    }
+
+    /// TODO type: decimal, enum and unsigned numbers
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(ExecutorAggTestRunner, AggregationMaxAndMin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    std::shared_ptr<tipb::DAGRequest> request;
+    auto agg_func0 = Max(col(col_name[0])); /// select max(age) from clerk group by country;
+    auto agg_func1 = Max(col(col_name[3])); /// select max(salary) from clerk group by country, gender;
+
+    auto group_by_expr0 = col(col_name[2]);
+    auto group_by_expr10 = col(col_name[2]);
+    auto group_by_expr11 = col(col_name[1]);
+
+    /// Prepare some data for max function test
+    std::vector<ColumnsWithTypeAndName> expect_cols{
+        {toNullableVec<Int32>("max(age)", ColumnWithNullableInt32{36, 32, 30, {}})},
+        {toNullableVec<Float64>("max(salary)", ColumnWithNullableFloat64{2000.7, 1300.2, 1000.1, 0.3, -300.8, {}})}};
+    std::vector<MockAstVec> group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}};
+    std::vector<MockColumnNameVec> projections{{"max(age)"}, {"max(salary)"}};
+    std::vector<MockAstVec> agg_funcs{{agg_func0}, {agg_func1}};
+    size_t test_num = expect_cols.size();
+
+    /// Start to test max function
+    for (size_t i = 0; i < test_num; ++i)
+    {
+        request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]);
+        executeWithConcurrency(request, expect_cols[i]);
+    }
+
+    /// Min function tests
+
+    agg_func0 = Min(col(col_name[0])); /// select min(age) from clerk group by country;
+    agg_func1 = Min(col(col_name[3])); /// select min(salary) from clerk group by country, gender;
+
+    expect_cols = {
+        {toNullableVec<Int32>("min(age)", ColumnWithNullableInt32{30, 25, 22, {}})},
+        {toNullableVec<Float64>("min(salary)", ColumnWithNullableFloat64{1300.2, 1000.1, 900.5, -200.4, -999.6, {}})}};
+    projections = {{"min(age)"}, {"min(salary)"}};
+    agg_funcs = {{agg_func0}, {agg_func1}};
+    test_num = expect_cols.size();
+
+    /// Start to test min function
+    for (size_t i = 0; i < test_num; ++i)
+    {
+        request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]);
+        executeWithConcurrency(request, expect_cols[i]);
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(ExecutorAggTestRunner, AggregationCount)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    /// Prepare some data
+    std::shared_ptr<tipb::DAGRequest> request;
+    auto agg_func0 = Count(col(col_name[0])); /// select count(age) from clerk group by country;
+    auto agg_func1 = Count(col(col_name[1])); /// select count(gender) from clerk group by country, gender;
+    std::vector<MockAstVec> agg_funcs = {{agg_func0}, {agg_func1}};
+
+    auto group_by_expr0 = col(col_name[2]);
+    auto group_by_expr10 = col(col_name[2]);
+    auto group_by_expr11 = col(col_name[1]);
+
+    std::vector<ColumnsWithTypeAndName> expect_cols{
+        {toVec<UInt64>("count(age)", ColumnWithUInt64{3, 3, 1, 0})},
+        {toVec<UInt64>("count(gender)", ColumnWithUInt64{2, 2, 2, 1, 1, 1})}};
+    std::vector<MockAstVec> group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}};
+    std::vector<MockColumnNameVec> projections{{"count(age)"}, {"count(gender)"}};
+    size_t test_num = expect_cols.size();
+
+    /// Start to test
+    for (size_t i = 0; i < test_num; ++i)
+    {
+        request = buildDAGRequest(std::make_pair(db_name, table_name), {agg_funcs[i]}, group_by_exprs[i], projections[i]);
+        executeWithConcurrency(request, expect_cols[i]);
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(ExecutorAggTestRunner, AggNull)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context
+                       .scan("aggnull_test", "t1")
+                       .aggregation({Max(col("s1"))}, {})
+                       .build(context);
+    {
+        ASSERT_COLUMNS_EQ_R(executeStreams(request),
+                            createColumns({toNullableVec<String>({"banana"})}));
+    }
+
+    request = context
+                  .scan("aggnull_test", "t1")
+                  .aggregation({}, {col("s1")})
+                  .build(context);
+    {
+        ASSERT_COLUMNS_EQ_R(executeStreams(request),
+                            createColumns({toNullableVec<String>("s1", {{}, "banana"})}));
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+
+// TODO support more type of min, max, count.
+//      support more aggregation functions: sum, forst_row, group_concat
+
+} // namespace tests
+} // namespace DB
diff --git a/dbms/src/Flash/tests/gtest_executor.cpp b/dbms/src/Flash/tests/gtest_executor.cpp
index ee35af0c03d..b8fac961832 100644
--- a/dbms/src/Flash/tests/gtest_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_executor.cpp
@@ -33,196 +33,32 @@ class ExecutorTestRunner : public DB::tests::ExecutorTest
                                     {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}},
                                     {toNullableVec<String>("s1", {"banana", {}, "banana"}),
                                      toNullableVec<String>("s2", {"apple", {}, "banana"})});
-
-        context.addExchangeReceiver("exchange_r_table",
-                                    {{"s1", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
-                                    {toNullableVec<String>("s", {"banana", "banana"}),
-                                     toNullableVec<String>("join_c", {"apple", "banana"})});
-
-        context.addExchangeReceiver("exchange_l_table",
-                                    {{"s1", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
-                                    {toNullableVec<String>("s", {"banana", "banana"}),
-                                     toNullableVec<String>("join_c", {"apple", "banana"})});
-
-        context.addMockTable({"test_db", "r_table"},
-                             {{"s", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
-                             {toVec<String>("s", {"banana", "banana"}),
-                              toVec<String>("join_c", {"apple", "banana"})});
-
-        context.addMockTable({"test_db", "r_table_2"},
-                             {{"s", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
-                             {toVec<String>("s", {"banana", "banana", "banana"}),
-                              toVec<String>("join_c", {"apple", "apple", "apple"})});
-
-        context.addMockTable({"test_db", "l_table"},
-                             {{"s", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
-                             {toVec<String>("s", {"banana", "banana"}),
-                              toVec<String>("join_c", {"apple", "banana"})});
     }
 };
 
 TEST_F(ExecutorTestRunner, Filter)
 try
 {
-    wrapForDisEnablePlanner([&]() {
-        auto request = context
-                           .scan("test_db", "test_table")
-                           .filter(eq(col("s1"), col("s2")))
-                           .build(context);
-        {
-            ASSERT_COLUMNS_EQ_R(executeStreams(request),
-                                createColumns({toNullableVec<String>({"banana"}),
-                                               toNullableVec<String>({"banana"})}));
-        }
-
-        request = context.receive("exchange1")
-                      .filter(eq(col("s1"), col("s2")))
-                      .build(context);
-        {
-            ASSERT_COLUMNS_EQ_R(executeStreams(request),
-                                createColumns({toNullableVec<String>({"banana"}),
-                                               toNullableVec<String>({"banana"})}));
-        }
-    });
-}
-CATCH
-
-TEST_F(ExecutorTestRunner, JoinWithTableScan)
-try
-{
-    wrapForDisEnablePlanner([&]() {
-        auto request = context
-                           .scan("test_db", "l_table")
-                           .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left)
-                           .topN("join_c", false, 2)
-                           .build(context);
-        {
-            String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n"
-                              " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n"
-                              "  table_scan_0 | {<0, String>, <1, String>}\n"
-                              "  table_scan_1 | {<0, String>, <1, String>}\n";
-            ASSERT_DAGREQUEST_EQAUL(expected, request);
-            ASSERT_COLUMNS_EQ_R(executeStreams(request, 2),
-                                createColumns({toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"})}));
-
-            ASSERT_COLUMNS_EQ_R(executeStreams(request, 5),
-                                createColumns({toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"})}));
-
-            ASSERT_COLUMNS_EQ_R(executeStreams(request),
-                                createColumns({toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"})}));
-        }
-        request = context
-                      .scan("test_db", "l_table")
-                      .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left)
-                      .project({"s", "join_c"})
-                      .topN("join_c", false, 2)
-                      .build(context);
-        {
-            String expected = "topn_4 | order_by: {(<1, String>, desc: false)}, limit: 2\n"
-                              " project_3 | {<0, String>, <1, String>}\n"
-                              "  Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n"
-                              "   table_scan_0 | {<0, String>, <1, String>}\n"
-                              "   table_scan_1 | {<0, String>, <1, String>}\n";
-            ASSERT_DAGREQUEST_EQAUL(expected, request);
-            ASSERT_COLUMNS_EQ_R(executeStreams(request, 2),
-                                createColumns({toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"})}));
-        }
-
-        request = context
-                      .scan("test_db", "l_table")
-                      .join(context.scan("test_db", "r_table_2"), {col("join_c")}, ASTTableJoin::Kind::Left)
-                      .topN("join_c", false, 4)
-                      .build(context);
-        {
-            String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 4\n"
-                              " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n"
-                              "  table_scan_0 | {<0, String>, <1, String>}\n"
-                              "  table_scan_1 | {<0, String>, <1, String>}\n";
-            ASSERT_DAGREQUEST_EQAUL(expected, request);
-            ASSERT_COLUMNS_EQ_R(executeStreams(request, 2),
-                                createColumns({toNullableVec<String>({"banana", "banana", "banana", "banana"}),
-                                               toNullableVec<String>({"apple", "apple", "apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana", "banana", {}}),
-                                               toNullableVec<String>({"apple", "apple", "apple", {}})}));
-            ASSERT_COLUMNS_EQ_R(executeStreams(request, 3),
-                                createColumns({toNullableVec<String>({"banana", "banana", "banana", "banana"}),
-                                               toNullableVec<String>({"apple", "apple", "apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana", "banana", {}}),
-                                               toNullableVec<String>({"apple", "apple", "apple", {}})}));
-        }
-    });
-}
-CATCH
-
-TEST_F(ExecutorTestRunner, JoinWithExchangeReceiver)
-try
-{
-    wrapForDisEnablePlanner([&]() {
-        auto request = context
-                           .receive("exchange_l_table")
-                           .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left)
-                           .topN("join_c", false, 2)
-                           .build(context);
-        {
-            String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n"
-                              " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n"
-                              "  exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>}\n"
-                              "  exchange_receiver_1 | type:PassThrough, {<0, String>, <1, String>}\n";
-            ASSERT_DAGREQUEST_EQAUL(expected, request);
-            ASSERT_COLUMNS_EQ_R(executeStreams(request, 2),
-                                createColumns({toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"})}));
-
-            ASSERT_COLUMNS_EQ_R(executeStreams(request, 5),
-                                createColumns({toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"})}));
-
-            ASSERT_COLUMNS_EQ_R(executeStreams(request),
-                                createColumns({toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"})}));
-        }
-    });
-}
-CATCH
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context
+                       .scan("test_db", "test_table")
+                       .filter(eq(col("s1"), col("s2")))
+                       .build(context);
+    {
+        ASSERT_COLUMNS_EQ_R(executeStreams(request),
+                            createColumns({toNullableVec<String>({"banana"}),
+                                           toNullableVec<String>({"banana"})}));
+    }
 
-TEST_F(ExecutorTestRunner, JoinWithTableScanAndReceiver)
-try
-{
-    wrapForDisEnablePlanner([&]() {
-        auto request = context
-                           .scan("test_db", "l_table")
-                           .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left)
-                           .topN("join_c", false, 2)
-                           .build(context);
-        {
-            String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n"
-                              " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n"
-                              "  table_scan_0 | {<0, String>, <1, String>}\n"
-                              "  exchange_receiver_1 | type:PassThrough, {<0, String>, <1, String>}\n";
-            ASSERT_DAGREQUEST_EQAUL(expected, request);
-            ASSERT_COLUMNS_EQ_R(executeStreams(request, 2),
-                                createColumns({toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"}),
-                                               toNullableVec<String>({"banana", "banana"}),
-                                               toNullableVec<String>({"apple", "banana"})}));
-        }
-    });
+    request = context.receive("exchange1")
+                  .filter(eq(col("s1"), col("s2")))
+                  .build(context);
+    {
+        ASSERT_COLUMNS_EQ_R(executeStreams(request),
+                            createColumns({toNullableVec<String>({"banana"}),
+                                           toNullableVec<String>({"banana"})}));
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
 }
 CATCH
 
diff --git a/dbms/src/Flash/tests/gtest_join_executor.cpp b/dbms/src/Flash/tests/gtest_join_executor.cpp
new file mode 100644
index 00000000000..736f33d15b2
--- /dev/null
+++ b/dbms/src/Flash/tests/gtest_join_executor.cpp
@@ -0,0 +1,477 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <TestUtils/ExecutorTestUtils.h>
+
+#include <tuple>
+
+namespace DB
+{
+namespace tests
+{
+class JoinExecutorTestRunner : public DB::tests::ExecutorTest
+{
+    static const size_t max_concurrency_level = 10;
+
+public:
+    void initializeContext() override
+    {
+        ExecutorTest::initializeContext();
+        context.addMockTable({"test_db", "test_table"},
+                             {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}},
+                             {toNullableVec<String>("s1", {"banana", {}, "banana"}),
+                              toNullableVec<String>("s2", {"apple", {}, "banana"})});
+
+        context.addMockTable({"test_db", "r_table"},
+                             {{"s", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
+                             {toVec<String>("s", {"banana", "banana"}),
+                              toVec<String>("join_c", {"apple", "banana"})});
+
+        context.addMockTable({"test_db", "r_table_2"},
+                             {{"s", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
+                             {toVec<String>("s", {"banana", "banana", "banana"}),
+                              toVec<String>("join_c", {"apple", "apple", "apple"})});
+
+        context.addMockTable({"test_db", "l_table"},
+                             {{"s", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
+                             {toVec<String>("s", {"banana", "banana"}),
+                              toVec<String>("join_c", {"apple", "banana"})});
+
+        context.addMockTable("simple_test", "t1", {{"a", TiDB::TP::TypeString}, {"b", TiDB::TP::TypeString}}, {toNullableVec<String>("a", {"1", "2", {}, "1", {}}), toNullableVec<String>("b", {"3", "4", "3", {}, {}})});
+
+        context.addMockTable("simple_test", "t2", {{"a", TiDB::TP::TypeString}, {"b", TiDB::TP::TypeString}}, {toNullableVec<String>("a", {"1", "3", {}, "1", {}}), toNullableVec<String>("b", {"3", "4", "3", {}, {}})});
+
+        context.addMockTable("multi_test", "t1", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}, {"c", TiDB::TP::TypeLong}}, {toVec<Int32>("a", {1, 3, 0}), toVec<Int32>("b", {2, 2, 0}), toVec<Int32>("c", {3, 2, 0})});
+
+        context.addMockTable("multi_test", "t2", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}, {"c", TiDB::TP::TypeLong}}, {toVec<Int32>("a", {3, 3, 0}), toVec<Int32>("b", {4, 2, 0}), toVec<Int32>("c", {5, 3, 0})});
+
+        context.addMockTable("multi_test", "t3", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec<Int32>("a", {1, 2, 0}), toVec<Int32>("b", {2, 2, 0})});
+
+        context.addMockTable("multi_test", "t4", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec<Int32>("a", {3, 2, 0}), toVec<Int32>("b", {4, 2, 0})});
+
+        context.addMockTable("join_agg", "t1", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec<Int32>("a", {1, 1, 3, 4}), toVec<Int32>("b", {1, 1, 4, 1})});
+
+        context.addMockTable("join_agg", "t2", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec<Int32>("a", {1, 4, 2}), toVec<Int32>("b", {2, 6, 2})});
+
+        context.addExchangeReceiver("exchange_r_table",
+                                    {{"s1", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
+                                    {toNullableVec<String>("s", {"banana", "banana"}),
+                                     toNullableVec<String>("join_c", {"apple", "banana"})});
+
+        context.addExchangeReceiver("exchange_l_table",
+                                    {{"s1", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}},
+                                    {toNullableVec<String>("s", {"banana", "banana"}),
+                                     toNullableVec<String>("join_c", {"apple", "banana"})});
+    }
+
+    std::tuple<DAGRequestBuilder, DAGRequestBuilder, DAGRequestBuilder, DAGRequestBuilder> multiTestScan()
+    {
+        return {context.scan("multi_test", "t1"), context.scan("multi_test", "t2"), context.scan("multi_test", "t3"), context.scan("multi_test", "t4")};
+    }
+
+    void executeWithConcurrency(const std::shared_ptr<tipb::DAGRequest> & request, const ColumnsWithTypeAndName & expect_columns)
+    {
+        for (size_t i = 1; i <= max_concurrency_level; ++i)
+        {
+            ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i));
+        }
+    }
+};
+
+TEST_F(JoinExecutorTestRunner, SimpleInnerJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context.scan("simple_test", "t1")
+                       .join(context.scan("simple_test", "t2"), {col("a")}, ASTTableJoin::Kind::Inner)
+                       .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"1", "1", "1", "1"}), toNullableVec<String>({{}, "3", {}, "3"}), toNullableVec<String>({"1", "1", "1", "1"}), toNullableVec<String>({"3", "3", {}, {}})});
+    }
+
+    request = context.scan("simple_test", "t2")
+                  .join(context.scan("simple_test", "t1"), {col("a")}, ASTTableJoin::Kind::Inner)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"1", "1", "1", "1"}), toNullableVec<String>({{}, "3", {}, "3"}), toNullableVec<String>({"1", "1", "1", "1"}), toNullableVec<String>({"3", "3", {}, {}})});
+    }
+
+    request = context.scan("simple_test", "t1")
+                  .join(context.scan("simple_test", "t2"), {col("b")}, ASTTableJoin::Kind::Inner)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({{}, "1", "2", {}, "1"}), toNullableVec<String>({"3", "3", "4", "3", "3"}), toNullableVec<String>({"1", "1", "3", {}, {}}), toNullableVec<String>({"3", "3", "4", "3", "3"})});
+    }
+
+    request = context.scan("simple_test", "t2")
+                  .join(context.scan("simple_test", "t1"), {col("b")}, ASTTableJoin::Kind::Inner)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({{}, "1", "3", {}, "1"}), toNullableVec<String>({"3", "3", "4", "3", "3"}), toNullableVec<String>({"1", "1", "2", {}, {}}), toNullableVec<String>({"3", "3", "4", "3", "3"})});
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, SimpleLeftJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context.scan("simple_test", "t1")
+                       .join(context.scan("simple_test", "t2"), {col("a")}, ASTTableJoin::Kind::Left)
+                       .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"1", "1", "2", {}, "1", "1", {}}), toNullableVec<String>({"3", "3", "4", "3", {}, {}, {}}), toNullableVec<String>({"1", "1", {}, {}, "1", "1", {}}), toNullableVec<String>({{}, "3", {}, {}, {}, "3", {}})});
+    }
+
+    request = context.scan("simple_test", "t2")
+                  .join(context.scan("simple_test", "t1"), {col("a")}, ASTTableJoin::Kind::Left)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"1", "1", "3", {}, "1", "1", {}}), toNullableVec<String>({"3", "3", "4", "3", {}, {}, {}}), toNullableVec<String>({"1", "1", {}, {}, "1", "1", {}}), toNullableVec<String>({{}, "3", {}, {}, {}, "3", {}})});
+    }
+
+    request = context.scan("simple_test", "t1")
+                  .join(context.scan("simple_test", "t2"), {col("b")}, ASTTableJoin::Kind::Left)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"1", "1", "2", {}, {}, "1", {}}), toNullableVec<String>({"3", "3", "4", "3", "3", {}, {}}), toNullableVec<String>({{}, "1", "3", {}, "1", {}, {}}), toNullableVec<String>({"3", "3", "4", "3", "3", {}, {}})});
+    }
+
+    request = context.scan("simple_test", "t2")
+                  .join(context.scan("simple_test", "t1"), {col("b")}, ASTTableJoin::Kind::Left)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"1", "1", "3", {}, {}, "1", {}}), toNullableVec<String>({"3", "3", "4", "3", "3", {}, {}}), toNullableVec<String>({{}, "1", "2", {}, "1", {}, {}}), toNullableVec<String>({"3", "3", "4", "3", "3", {}, {}})});
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, SimpleRightJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context
+                       .scan("simple_test", "t1")
+                       .join(context.scan("simple_test", "t2"), {col("a")}, ASTTableJoin::Kind::Right)
+                       .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"1", "1", {}, {}, "1", "1", {}}), toNullableVec<String>({{}, "3", {}, {}, {}, "3", {}}), toNullableVec<String>({"1", "1", "3", {}, "1", "1", {}}), toNullableVec<String>({"3", "3", "4", "3", {}, {}, {}})});
+    }
+
+    request = context
+                  .scan("simple_test", "t2")
+                  .join(context.scan("simple_test", "t1"), {col("a")}, ASTTableJoin::Kind::Right)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"1", "1", {}, {}, "1", "1", {}}), toNullableVec<String>({{}, "3", {}, {}, {}, "3", {}}), toNullableVec<String>({"1", "1", "2", {}, "1", "1", {}}), toNullableVec<String>({"3", "3", "4", "3", {}, {}, {}})});
+    }
+
+    request = context
+                  .scan("simple_test", "t1")
+                  .join(context.scan("simple_test", "t2"), {col("b")}, ASTTableJoin::Kind::Right)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({{}, "1", "2", {}, "1", {}, {}}), toNullableVec<String>({"3", "3", "4", "3", "3", {}, {}}), toNullableVec<String>({"1", "1", "3", {}, {}, "1", {}}), toNullableVec<String>({"3", "3", "4", "3", "3", {}, {}})});
+    }
+
+    request = context
+                  .scan("simple_test", "t2")
+                  .join(context.scan("simple_test", "t1"), {col("b")}, ASTTableJoin::Kind::Right)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({{}, "1", "3", {}, "1", {}, {}}), toNullableVec<String>({"3", "3", "4", "3", "3", {}, {}}), toNullableVec<String>({"1", "1", "2", {}, {}, "1", {}}), toNullableVec<String>({"3", "3", "4", "3", "3", {}, {}})});
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, MultiInnerLeftJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto [t1, t2, t3, t4] = multiTestScan();
+    auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Inner)
+                       .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Inner),
+                             {col("b")},
+                             ASTTableJoin::Kind::Left)
+                       .build(context);
+
+    executeWithConcurrency(request, {toNullableVec<Int32>({3, 3, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({3, 3, 0}), toNullableVec<Int32>({4, 2, 0}), toNullableVec<Int32>({5, 3, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0})});
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, MultiInnerRightJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto [t1, t2, t3, t4] = multiTestScan();
+    auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Inner)
+                       .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Inner),
+                             {col("b")},
+                             ASTTableJoin::Kind::Right)
+                       .build(context);
+
+    executeWithConcurrency(request, {toNullableVec<Int32>({3, 3, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({3, 3, 0}), toNullableVec<Int32>({4, 2, 0}), toNullableVec<Int32>({5, 3, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0})});
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, MultiLeftInnerJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto [t1, t2, t3, t4] = multiTestScan();
+    auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Left)
+                       .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Left),
+                             {col("b")},
+                             ASTTableJoin::Kind::Inner)
+                       .build(context);
+
+    executeWithConcurrency(request, {toNullableVec<Int32>({1, 1, 3, 3, 3, 3, 0}), toNullableVec<Int32>({2, 2, 2, 2, 2, 2, 0}), toNullableVec<Int32>({3, 3, 2, 2, 2, 2, 0}), toNullableVec<Int32>({{}, {}, 3, 3, 3, 3, 0}), toNullableVec<Int32>({{}, {}, 4, 4, 2, 2, 0}), toNullableVec<Int32>({{}, {}, 5, 5, 3, 3, 0}), toNullableVec<Int32>({1, 2, 1, 2, 1, 2, 0}), toNullableVec<Int32>({2, 2, 2, 2, 2, 2, 0}), toNullableVec<Int32>({{}, 2, {}, 2, {}, 2, 0}), toNullableVec<Int32>({{}, 2, {}, 2, {}, 2, 0})});
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, MultiLeftRightJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto [t1, t2, t3, t4] = multiTestScan();
+    auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Left)
+                       .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Left),
+                             {col("b")},
+                             ASTTableJoin::Kind::Right)
+                       .build(context);
+
+    executeWithConcurrency(request, {toNullableVec<Int32>({1, 3, 3, 1, 3, 3, 0}), toNullableVec<Int32>({2, 2, 2, 2, 2, 2, 0}), toNullableVec<Int32>({3, 2, 2, 3, 2, 2, 0}), toNullableVec<Int32>({{}, 3, 3, {}, 3, 3, 0}), toNullableVec<Int32>({{}, 4, 2, {}, 4, 2, 0}), toNullableVec<Int32>({{}, 5, 3, {}, 5, 3, 0}), toNullableVec<Int32>({1, 1, 1, 2, 2, 2, 0}), toNullableVec<Int32>({2, 2, 2, 2, 2, 2, 0}), toNullableVec<Int32>({{}, {}, {}, 2, 2, 2, 0}), toNullableVec<Int32>({{}, {}, {}, 2, 2, 2, 0})});
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, MultiRightInnerJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto [t1, t2, t3, t4] = multiTestScan();
+    auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Right)
+                       .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Right),
+                             {col("b")},
+                             ASTTableJoin::Kind::Inner)
+                       .build(context);
+
+    executeWithConcurrency(request, {toNullableVec<Int32>({3, 3, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({3, 3, 0}), toNullableVec<Int32>({4, 2, 0}), toNullableVec<Int32>({5, 3, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0})});
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, MultiRightLeftJoin)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto [t1, t2, t3, t4] = multiTestScan();
+    auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Right)
+                       .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Right),
+                             {col("b")},
+                             ASTTableJoin::Kind::Left)
+                       .build(context);
+
+    executeWithConcurrency(request, {toNullableVec<Int32>({3, 3, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({3, 3, 0}), toNullableVec<Int32>({4, 2, 0}), toNullableVec<Int32>({5, 3, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0}), toNullableVec<Int32>({2, 2, 0})});
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, JoinCast)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto cast_request = [&]() {
+        return context.scan("cast", "t1")
+            .join(context.scan("cast", "t2"), {col("a")}, ASTTableJoin::Kind::Inner)
+            .build(context);
+    };
+
+    /// int(1) == float(1.0)
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeLong}}, {toVec<Int32>("a", {1})});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeFloat}}, {toVec<Float32>("a", {1.0})});
+
+    executeWithConcurrency(cast_request(), {toNullableVec<Int32>({1}), toNullableVec<Float32>({1.0})});
+
+    /// int(1) == double(1.0)
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeLong}}, {toVec<Int32>("a", {1})});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeDouble}}, {toVec<Float64>("a", {1.0})});
+
+    executeWithConcurrency(cast_request(), {toNullableVec<Int32>({1}), toNullableVec<Float64>({1.0})});
+
+    /// float(1) == double(1.0)
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeFloat}}, {toVec<Float32>("a", {1})});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeDouble}}, {toVec<Float64>("a", {1})});
+
+    executeWithConcurrency(cast_request(), {toNullableVec<Float32>({1}), toNullableVec<Float64>({1})});
+
+    /// varchar('x') == char('x')
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeString}}, {toVec<String>("a", {"x"})});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec<String>("a", {"x"})});
+
+    executeWithConcurrency(cast_request(), {toNullableVec<String>({"x"}), toNullableVec<String>({"x"})});
+
+    /// tinyblob('x') == varchar('x')
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeTinyBlob}}, {toVec<String>("a", {"x"})});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec<String>("a", {"x"})});
+
+    executeWithConcurrency(cast_request(), {toNullableVec<String>({"x"}), toNullableVec<String>({"x"})});
+
+    /// mediumBlob('x') == varchar('x')
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeMediumBlob}}, {toVec<String>("a", {"x"})});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec<String>("a", {"x"})});
+
+    executeWithConcurrency(cast_request(), {toNullableVec<String>({"x"}), toNullableVec<String>({"x"})});
+
+    /// blob('x') == varchar('x')
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeBlob}}, {toVec<String>("a", {"x"})});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec<String>("a", {"x"})});
+
+    executeWithConcurrency(cast_request(), {toNullableVec<String>({"x"}), toNullableVec<String>({"x"})});
+
+    /// longBlob('x') == varchar('x')
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeLongBlob}}, {toVec<String>("a", {"x"})});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec<String>("a", {"x"})});
+
+    executeWithConcurrency(cast_request(), {toNullableVec<String>({"x"}), toNullableVec<String>({"x"})});
+
+    /// decimal with different scale
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeNewDecimal}}, {createColumn<Decimal256>(std::make_tuple(9, 4), {"0.12"}, "a")});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeNewDecimal}}, {createColumn<Decimal256>(std::make_tuple(9, 3), {"0.12"}, "a")});
+
+    executeWithConcurrency(cast_request(), {createNullableColumn<Decimal256>(std::make_tuple(65, 0), {"0.12"}, {0}), createNullableColumn<Decimal256>(std::make_tuple(65, 0), {"0.12"}, {0})});
+
+    /// datetime(1970-01-01 00:00:01) == timestamp(1970-01-01 00:00:01)
+    context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeDatetime}}, {createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 6)});
+
+    context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeTimestamp}}, {createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 6)});
+
+    executeWithConcurrency(cast_request(), {createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 0), createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 0)});
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, JoinAgg)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context.scan("join_agg", "t1")
+                       .join(context.scan("join_agg", "t2"), {col("a")}, ASTTableJoin::Kind::Inner)
+                       .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")})
+                       .build(context);
+
+    {
+        executeWithConcurrency(request, {toNullableVec<Int32>({4}), toNullableVec<Int32>({1}), toVec<UInt64>({3}), toNullableVec<Int32>({1})});
+    }
+
+    request = context.scan("join_agg", "t1")
+                  .join(context.scan("join_agg", "t2"), {col("a")}, ASTTableJoin::Kind::Left)
+                  .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")})
+                  .build(context);
+
+    {
+        executeWithConcurrency(request, {toNullableVec<Int32>({4, 3}), toNullableVec<Int32>({1, 3}), toVec<UInt64>({3, 1}), toNullableVec<Int32>({1, 4})});
+    }
+
+    request = context.scan("join_agg", "t1")
+                  .join(context.scan("join_agg", "t2"), {col("a")}, ASTTableJoin::Kind::Right)
+                  .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")})
+                  .build(context);
+
+    {
+        executeWithConcurrency(request, {toNullableVec<Int32>({4, {}}), toNullableVec<Int32>({1, {}}), toVec<UInt64>({3, 0}), toNullableVec<Int32>({1, {}})});
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, JoinWithTableScan)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context
+                       .scan("test_db", "l_table")
+                       .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left)
+                       .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"banana", "banana"}), toNullableVec<String>({"apple", "banana"}), toNullableVec<String>({"banana", "banana"}), toNullableVec<String>({"apple", "banana"})});
+    }
+
+    request = context
+                  .scan("test_db", "l_table")
+                  .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left)
+                  .project({"s", "join_c"})
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"banana", "banana"}), toNullableVec<String>({"apple", "banana"})});
+    }
+
+    request = context
+                  .scan("test_db", "l_table")
+                  .join(context.scan("test_db", "r_table_2"), {col("join_c")}, ASTTableJoin::Kind::Left)
+                  .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"banana", "banana", "banana", "banana"}), toNullableVec<String>({"apple", "apple", "apple", "banana"}), toNullableVec<String>({"banana", "banana", "banana", {}}), toNullableVec<String>({"apple", "apple", "apple", {}})});
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, JoinWithExchangeReceiver)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context
+                       .receive("exchange_l_table")
+                       .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left)
+                       .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"banana", "banana"}), toNullableVec<String>({"apple", "banana"}), toNullableVec<String>({"banana", "banana"}), toNullableVec<String>({"apple", "banana"})});
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+TEST_F(JoinExecutorTestRunner, JoinWithTableScanAndReceiver)
+try
+{
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    auto request = context
+                       .scan("test_db", "l_table")
+                       .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left)
+                       .build(context);
+    {
+        executeWithConcurrency(request, {toNullableVec<String>({"banana", "banana"}), toNullableVec<String>({"apple", "banana"}), toNullableVec<String>({"banana", "banana"}), toNullableVec<String>({"apple", "banana"})});
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
+}
+CATCH
+
+} // namespace tests
+} // namespace DB
diff --git a/dbms/src/Flash/tests/gtest_limit_executor.cpp b/dbms/src/Flash/tests/gtest_limit_executor.cpp
index a3ddf341525..b4363c2db2a 100644
--- a/dbms/src/Flash/tests/gtest_limit_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_limit_executor.cpp
@@ -50,28 +50,28 @@ class ExecutorLimitTestRunner : public DB::tests::ExecutorTest
 TEST_F(ExecutorLimitTestRunner, Limit)
 try
 {
-    wrapForDisEnablePlanner([&]() {
-        std::shared_ptr<tipb::DAGRequest> request;
-        ColumnsWithTypeAndName expect_cols;
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    std::shared_ptr<tipb::DAGRequest> request;
+    ColumnsWithTypeAndName expect_cols;
 
-        /// Check limit result with various parameters
-        const size_t col_data_num = col0.size();
-        for (size_t limit_num = 0; limit_num <= col_data_num + 3; ++limit_num)
-        {
-            if (limit_num == col_data_num + 3)
-                limit_num = INT_MAX;
-            request = buildDAGRequest(limit_num);
+    /// Check limit result with various parameters
+    const size_t col_data_num = col0.size();
+    for (size_t limit_num = 0; limit_num <= col_data_num + 3; ++limit_num)
+    {
+        if (limit_num == col_data_num + 3)
+            limit_num = INT_MAX;
+        request = buildDAGRequest(limit_num);
 
-            if (limit_num == 0)
-                expect_cols = {};
-            else if (limit_num > col_data_num)
-                expect_cols = {toNullableVec<String>(col_name, ColumnWithData(col0.begin(), col0.end()))};
-            else
-                expect_cols = {toNullableVec<String>(col_name, ColumnWithData(col0.begin(), col0.begin() + limit_num))};
+        if (limit_num == 0)
+            expect_cols = {};
+        else if (limit_num > col_data_num)
+            expect_cols = {toNullableVec<String>(col_name, ColumnWithData(col0.begin(), col0.end()))};
+        else
+            expect_cols = {toNullableVec<String>(col_name, ColumnWithData(col0.begin(), col0.begin() + limit_num))};
 
-            ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols);
-        }
-    });
+        ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols);
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
 }
 CATCH
 
diff --git a/dbms/src/Flash/tests/gtest_projection_executor.cpp b/dbms/src/Flash/tests/gtest_projection_executor.cpp
index 65b75bad9ab..9381a1e73dc 100644
--- a/dbms/src/Flash/tests/gtest_projection_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_projection_executor.cpp
@@ -85,142 +85,142 @@ class ExecutorProjectionTestRunner : public DB::tests::ExecutorTest
 TEST_F(ExecutorProjectionTestRunner, Projection)
 try
 {
-    wrapForDisEnablePlanner([&]() {
-        /// Check single column
-        auto request = buildDAGRequest<MockColumnNameVec>({col_names[4]});
-        executeWithConcurrency(request, {toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        /// Check multi columns
-        request = buildDAGRequest<MockColumnNameVec>({col_names[0], col_names[4]});
-        executeWithConcurrency(request,
-                               {
-                                   toNullableVec<String>(col_names[0], col0_sorted_asc),
-                                   toNullableVec<Int32>(col_names[4], col4_sorted_asc),
-                               });
-
-        /// Check multi columns
-        request = buildDAGRequest<MockColumnNameVec>({col_names[0], col_names[1], col_names[4]});
-        executeWithConcurrency(request,
-                               {toNullableVec<String>(col_names[0], col0_sorted_asc),
-                                toNullableVec<String>(col_names[1], col1_sorted_asc),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        /// Check duplicate columns
-        request = buildDAGRequest<MockColumnNameVec>({col_names[4], col_names[4], col_names[4]});
-        executeWithConcurrency(request,
-                               {toNullableVec<Int32>(col_names[4], col4_sorted_asc),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    /// Check single column
+    auto request = buildDAGRequest<MockColumnNameVec>({col_names[4]});
+    executeWithConcurrency(request, {toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    /// Check multi columns
+    request = buildDAGRequest<MockColumnNameVec>({col_names[0], col_names[4]});
+    executeWithConcurrency(request,
+                           {
+                               toNullableVec<String>(col_names[0], col0_sorted_asc),
+                               toNullableVec<Int32>(col_names[4], col4_sorted_asc),
+                           });
+
+    /// Check multi columns
+    request = buildDAGRequest<MockColumnNameVec>({col_names[0], col_names[1], col_names[4]});
+    executeWithConcurrency(request,
+                           {toNullableVec<String>(col_names[0], col0_sorted_asc),
+                            toNullableVec<String>(col_names[1], col1_sorted_asc),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    /// Check duplicate columns
+    request = buildDAGRequest<MockColumnNameVec>({col_names[4], col_names[4], col_names[4]});
+    executeWithConcurrency(request,
+                           {toNullableVec<Int32>(col_names[4], col4_sorted_asc),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
 
+    {
+        /// Check large number of columns
+        const size_t col_num = 100;
+        MockColumnNameVec projection_input;
+        ColumnsWithTypeAndName columns;
+        auto expect_column = toNullableVec<Int32>(col_names[4], col4_sorted_asc);
+
+        for (size_t i = 0; i < col_num; ++i)
         {
-            /// Check large number of columns
-            const size_t col_num = 100;
-            MockColumnNameVec projection_input;
-            ColumnsWithTypeAndName columns;
-            auto expect_column = toNullableVec<Int32>(col_names[4], col4_sorted_asc);
-
-            for (size_t i = 0; i < col_num; ++i)
-            {
-                projection_input.push_back(col_names[4]);
-                columns.push_back(expect_column);
-            }
-
-            request = buildDAGRequest<MockColumnNameVec>(projection_input);
-            executeWithConcurrency(request, columns);
+            projection_input.push_back(col_names[4]);
+            columns.push_back(expect_column);
         }
-    });
+
+        request = buildDAGRequest<MockColumnNameVec>(projection_input);
+        executeWithConcurrency(request, columns);
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
 }
 CATCH
 
 TEST_F(ExecutorProjectionTestRunner, ProjectionFunction)
 try
 {
-    wrapForDisEnablePlanner([&]() {
-        std::shared_ptr<tipb::DAGRequest> request;
-
-        /// Test "equal" function
-
-        /// Data type: TypeString
-        request = buildDAGRequest<MockAstVec>({eq(col(col_names[0]), col(col_names[0])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 1, 1, 1, 1, 1, 1}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        request = buildDAGRequest<MockAstVec>({eq(col(col_names[0]), col(col_names[1])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 0, 1, 0, {}, 0, 0}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        /// Data type: TypeLong
-        request = buildDAGRequest<MockAstVec>({eq(col(col_names[3]), col(col_names[4])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 0, 0, 0, {}, 1, 0}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-
-        /// Test "greater" function
-
-        /// Data type: TypeString
-        request = buildDAGRequest<MockAstVec>({gt(col(col_names[0]), col(col_names[1])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 0, 0, 0, {}, 0, 0}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        request = buildDAGRequest<MockAstVec>({gt(col(col_names[1]), col(col_names[0])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 1, 0, 1, {}, 1, 1}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        /// Data type: TypeLong
-        request = buildDAGRequest<MockAstVec>({gt(col(col_names[3]), col(col_names[4])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 0, 1, 1, {}, 0, 0}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        request = buildDAGRequest<MockAstVec>({gt(col(col_names[4]), col(col_names[3])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 1, 0, 0, {}, 0, 1}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-
-        /// Test "and" function
-
-        /// Data type: TypeString
-        request = buildDAGRequest<MockAstVec>({And(col(col_names[0]), col(col_names[0])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 0, 0, 0, 0, 0, 0}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        request = buildDAGRequest<MockAstVec>({And(col(col_names[0]), col(col_names[1])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({0, 0, 0, 0, 0, 0, 0}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        /// Data type: TypeLong
-        request = buildDAGRequest<MockAstVec>({And(col(col_names[3]), col(col_names[4])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 1, 0, 0, {}, 1, 0}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        /// Test "not" function
-
-        /// Data type: TypeString
-        request = buildDAGRequest<MockAstVec>({NOT(col(col_names[0])), NOT(col(col_names[1])), NOT(col(col_names[2])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 1, 1, 1, 1, 1, 1}),
-                                toNullableVec<UInt64>({1, 1, 1, 1, {}, 1, 1}),
-                                toNullableVec<UInt64>({1, {}, 1, 1, 1, 1, {}}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        /// Data type: TypeLong
-        request = buildDAGRequest<MockAstVec>({NOT(col(col_names[3])), NOT(col(col_names[4])), col(col_names[4])});
-        executeWithConcurrency(request,
-                               {toNullableVec<UInt64>({{}, 0, 1, 0, {}, 0, 1}),
-                                toNullableVec<UInt64>({{}, 0, 0, 1, 0, 0, 0}),
-                                toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
-
-        /// TODO more functions...
-    });
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    std::shared_ptr<tipb::DAGRequest> request;
+
+    /// Test "equal" function
+
+    /// Data type: TypeString
+    request = buildDAGRequest<MockAstVec>({eq(col(col_names[0]), col(col_names[0])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 1, 1, 1, 1, 1, 1}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    request = buildDAGRequest<MockAstVec>({eq(col(col_names[0]), col(col_names[1])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 0, 1, 0, {}, 0, 0}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    /// Data type: TypeLong
+    request = buildDAGRequest<MockAstVec>({eq(col(col_names[3]), col(col_names[4])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 0, 0, 0, {}, 1, 0}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+
+    /// Test "greater" function
+
+    /// Data type: TypeString
+    request = buildDAGRequest<MockAstVec>({gt(col(col_names[0]), col(col_names[1])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 0, 0, 0, {}, 0, 0}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    request = buildDAGRequest<MockAstVec>({gt(col(col_names[1]), col(col_names[0])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 1, 0, 1, {}, 1, 1}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    /// Data type: TypeLong
+    request = buildDAGRequest<MockAstVec>({gt(col(col_names[3]), col(col_names[4])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 0, 1, 1, {}, 0, 0}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    request = buildDAGRequest<MockAstVec>({gt(col(col_names[4]), col(col_names[3])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 1, 0, 0, {}, 0, 1}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+
+    /// Test "and" function
+
+    /// Data type: TypeString
+    request = buildDAGRequest<MockAstVec>({And(col(col_names[0]), col(col_names[0])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 0, 0, 0, 0, 0, 0}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    request = buildDAGRequest<MockAstVec>({And(col(col_names[0]), col(col_names[1])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({0, 0, 0, 0, 0, 0, 0}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    /// Data type: TypeLong
+    request = buildDAGRequest<MockAstVec>({And(col(col_names[3]), col(col_names[4])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 1, 0, 0, {}, 1, 0}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    /// Test "not" function
+
+    /// Data type: TypeString
+    request = buildDAGRequest<MockAstVec>({NOT(col(col_names[0])), NOT(col(col_names[1])), NOT(col(col_names[2])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 1, 1, 1, 1, 1, 1}),
+                            toNullableVec<UInt64>({1, 1, 1, 1, {}, 1, 1}),
+                            toNullableVec<UInt64>({1, {}, 1, 1, 1, 1, {}}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    /// Data type: TypeLong
+    request = buildDAGRequest<MockAstVec>({NOT(col(col_names[3])), NOT(col(col_names[4])), col(col_names[4])});
+    executeWithConcurrency(request,
+                           {toNullableVec<UInt64>({{}, 0, 1, 0, {}, 0, 1}),
+                            toNullableVec<UInt64>({{}, 0, 0, 1, 0, 0, 0}),
+                            toNullableVec<Int32>(col_names[4], col4_sorted_asc)});
+
+    /// TODO more functions...
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
 }
 CATCH
 
diff --git a/dbms/src/Flash/tests/gtest_topn_executor.cpp b/dbms/src/Flash/tests/gtest_topn_executor.cpp
index a6ba3183118..d2d138e01aa 100644
--- a/dbms/src/Flash/tests/gtest_topn_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_topn_executor.cpp
@@ -44,7 +44,7 @@ class ExecutorTopNTestRunner : public DB::tests::ExecutorTest
                              {toNullableVec<Int32>(col_name[0], col_age),
                               toNullableVec<String>(col_name[1], col_gender),
                               toNullableVec<String>(col_name[2], col_country),
-                              toNullableVec<Int32>(col_name[3], c0l_salary)});
+                              toNullableVec<Int32>(col_name[3], col_salary)});
     }
 
     std::shared_ptr<tipb::DAGRequest> buildDAGRequest(const String & table_name, const String & col_name, bool is_desc, int limit_num)
@@ -72,152 +72,152 @@ class ExecutorTopNTestRunner : public DB::tests::ExecutorTest
     ColumnWithInt32 col_age{{}, 27, 32, 36, {}, 34};
     ColumnWithString col_gender{"female", "female", "male", "female", "male", "male"};
     ColumnWithString col_country{"korea", "usa", "usa", "china", "china", "china"};
-    ColumnWithInt32 c0l_salary{1300, 0, {}, 900, {}, -300};
+    ColumnWithInt32 col_salary{1300, 0, {}, 900, {}, -300};
 };
 
 TEST_F(ExecutorTopNTestRunner, TopN)
 try
 {
-    wrapForDisEnablePlanner([&]() {
-        std::shared_ptr<tipb::DAGRequest> request;
-        std::vector<ColumnsWithTypeAndName> expect_cols;
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    std::shared_ptr<tipb::DAGRequest> request;
+    std::vector<ColumnsWithTypeAndName> expect_cols;
 
+    {
+        /// Test single column
+        size_t col_data_num = col0.size();
+        for (size_t i = 1; i <= 1; ++i)
         {
-            /// Test single column
-            size_t col_data_num = col0.size();
-            for (size_t i = 1; i <= 1; ++i)
+            bool is_desc;
+            is_desc = static_cast<bool>(i); /// Set descent or ascent
+            if (is_desc)
+                sort(col0.begin(), col0.end(), std::greater<ColStringType>()); /// Sort col0 for the following comparison
+            else
+                sort(col0.begin(), col0.end());
+
+            for (size_t limit_num = 0; limit_num <= col_data_num + 5; ++limit_num)
             {
-                bool is_desc;
-                is_desc = static_cast<bool>(i); /// Set descent or ascent
-                if (is_desc)
-                    sort(col0.begin(), col0.end(), std::greater<ColStringType>()); /// Sort col0 for the following comparison
+                request = buildDAGRequest(table_single_name, single_col_name, is_desc, limit_num);
+
+                expect_cols.clear();
+                if (limit_num == 0 || limit_num > col_data_num)
+                    expect_cols.push_back({toNullableVec<String>(single_col_name, ColumnWithString(col0.begin(), col0.end()))});
                 else
-                    sort(col0.begin(), col0.end());
-
-                for (size_t limit_num = 0; limit_num <= col_data_num + 5; ++limit_num)
-                {
-                    request = buildDAGRequest(table_single_name, single_col_name, is_desc, limit_num);
-
-                    expect_cols.clear();
-                    if (limit_num == 0 || limit_num > col_data_num)
-                        expect_cols.push_back({toNullableVec<String>(single_col_name, ColumnWithString(col0.begin(), col0.end()))});
-                    else
-                        expect_cols.push_back({toNullableVec<String>(single_col_name, ColumnWithString(col0.begin(), col0.begin() + limit_num))});
-
-                    ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]);
-                    ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), expect_cols[0]);
-                    ASSERT_COLUMNS_EQ_R(executeStreams(request, 4), expect_cols[0]);
-                    ASSERT_COLUMNS_EQ_R(executeStreams(request, 8), expect_cols[0]);
-                }
+                    expect_cols.push_back({toNullableVec<String>(single_col_name, ColumnWithString(col0.begin(), col0.begin() + limit_num))});
+
+                ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]);
+                ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), expect_cols[0]);
+                ASSERT_COLUMNS_EQ_R(executeStreams(request, 4), expect_cols[0]);
+                ASSERT_COLUMNS_EQ_R(executeStreams(request, 8), expect_cols[0]);
             }
         }
+    }
 
+    {
+        /// Test multi-columns
+        expect_cols = {{toNullableVec<Int32>(col_name[0], ColumnWithInt32{36, 34, 32, 27, {}, {}}),
+                        toNullableVec<String>(col_name[1], ColumnWithString{"female", "male", "male", "female", "male", "female"}),
+                        toNullableVec<String>(col_name[2], ColumnWithString{"china", "china", "usa", "usa", "china", "korea"}),
+                        toNullableVec<Int32>(col_name[3], ColumnWithInt32{900, -300, {}, 0, {}, 1300})},
+                       {toNullableVec<Int32>(col_name[0], ColumnWithInt32{32, {}, 34, 27, 36, {}}),
+                        toNullableVec<String>(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}),
+                        toNullableVec<String>(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "china", "korea"}),
+                        toNullableVec<Int32>(col_name[3], ColumnWithInt32{{}, {}, -300, 0, 900, 1300})},
+                       {toNullableVec<Int32>(col_name[0], ColumnWithInt32{34, {}, 32, 36, {}, 27}),
+                        toNullableVec<String>(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}),
+                        toNullableVec<String>(col_name[2], ColumnWithString{"china", "china", "usa", "china", "korea", "usa"}),
+                        toNullableVec<Int32>(col_name[3], ColumnWithInt32{-300, {}, {}, 900, 1300, 0})}};
+
+        std::vector<MockOrderByItemVec> order_by_items{
+            /// select * from clerk order by age DESC, gender DESC;
+            {MockOrderByItem(col_name[0], true), MockOrderByItem(col_name[1], true)},
+            /// select * from clerk order by gender DESC, salary ASC;
+            {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[3], false)},
+            /// select * from clerk order by gender DESC, country ASC, salary DESC;
+            {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[2], false), MockOrderByItem(col_name[3], true)}};
+
+        size_t test_num = expect_cols.size();
+
+        for (size_t i = 0; i < test_num; ++i)
         {
-            /// Test multi-columns
-            expect_cols = {{toNullableVec<Int32>(col_name[0], ColumnWithInt32{36, 34, 32, 27, {}, {}}),
-                            toNullableVec<String>(col_name[1], ColumnWithString{"female", "male", "male", "female", "male", "female"}),
-                            toNullableVec<String>(col_name[2], ColumnWithString{"china", "china", "usa", "usa", "china", "korea"}),
-                            toNullableVec<Int32>(col_name[3], ColumnWithInt32{900, -300, {}, 0, {}, 1300})},
-                           {toNullableVec<Int32>(col_name[0], ColumnWithInt32{32, {}, 34, 27, 36, {}}),
-                            toNullableVec<String>(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}),
-                            toNullableVec<String>(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "china", "korea"}),
-                            toNullableVec<Int32>(col_name[3], ColumnWithInt32{{}, {}, -300, 0, 900, 1300})},
-                           {toNullableVec<Int32>(col_name[0], ColumnWithInt32{34, {}, 32, 36, {}, 27}),
-                            toNullableVec<String>(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}),
-                            toNullableVec<String>(col_name[2], ColumnWithString{"china", "china", "usa", "china", "korea", "usa"}),
-                            toNullableVec<Int32>(col_name[3], ColumnWithInt32{-300, {}, {}, 900, 1300, 0})}};
-
-            std::vector<MockOrderByItemVec> order_by_items{
-                /// select * from clerk order by age DESC, gender DESC;
-                {MockOrderByItem(col_name[0], true), MockOrderByItem(col_name[1], true)},
-                /// select * from clerk order by gender DESC, salary ASC;
-                {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[3], false)},
-                /// select * from clerk order by gender DESC, country ASC, salary DESC;
-                {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[2], false), MockOrderByItem(col_name[3], true)}};
-
-            size_t test_num = expect_cols.size();
-
-            for (size_t i = 0; i < test_num; ++i)
-            {
-                request = buildDAGRequest(table_name, order_by_items[i], 100);
-                ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[i]);
-            }
+            request = buildDAGRequest(table_name, order_by_items[i], 100);
+            ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[i]);
         }
-    });
+    }
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
 }
 CATCH
 
 TEST_F(ExecutorTopNTestRunner, TopNFunction)
 try
 {
-    wrapForDisEnablePlanner([&]() {
-        std::shared_ptr<tipb::DAGRequest> request;
-        std::vector<ColumnsWithTypeAndName> expect_cols;
-        MockColumnNameVec output_projection{col_name[0], col_name[1], col_name[2], col_name[3]};
-        MockAstVec func_projection; // Do function operation for topn
-        MockOrderByItemVec order_by_items;
-        ASTPtr col0_ast = col(col_name[0]);
-        ASTPtr col1_ast = col(col_name[1]);
-        ASTPtr col2_ast = col(col_name[2]);
-        ASTPtr col3_ast = col(col_name[3]);
-        ASTPtr func_ast;
+    WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN
+    std::shared_ptr<tipb::DAGRequest> request;
+    std::vector<ColumnsWithTypeAndName> expect_cols;
+    MockColumnNameVec output_projection{col_name[0], col_name[1], col_name[2], col_name[3]};
+    MockAstVec func_projection; // Do function operation for topn
+    MockOrderByItemVec order_by_items;
+    ASTPtr col0_ast = col(col_name[0]);
+    ASTPtr col1_ast = col(col_name[1]);
+    ASTPtr col2_ast = col(col_name[2]);
+    ASTPtr col3_ast = col(col_name[3]);
+    ASTPtr func_ast;
 
-        {
-            /// "and" function
-            expect_cols = {{toNullableVec<Int32>(col_name[0], ColumnWithInt32{{}, {}, 32, 27, 36, 34}),
-                            toNullableVec<String>(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}),
-                            toNullableVec<String>(col_name[2], ColumnWithString{"korea", "china", "usa", "usa", "china", "china"}),
-                            toNullableVec<Int32>(col_name[3], ColumnWithInt32{1300, {}, {}, 0, 900, -300})}};
+    {
+        /// "and" function
+        expect_cols = {{toNullableVec<Int32>(col_name[0], ColumnWithInt32{{}, {}, 32, 27, 36, 34}),
+                        toNullableVec<String>(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}),
+                        toNullableVec<String>(col_name[2], ColumnWithString{"korea", "china", "usa", "usa", "china", "china"}),
+                        toNullableVec<Int32>(col_name[3], ColumnWithInt32{1300, {}, {}, 0, 900, -300})}};
 
-            {
-                /// select * from clerk order by age and salary ASC limit 100;
-                order_by_items = {MockOrderByItem("and(age, salary)", false)};
-                func_ast = And(col(col_name[0]), col(col_name[3]));
-                func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast};
+        {
+            /// select * from clerk order by age and salary ASC limit 100;
+            order_by_items = {MockOrderByItem("and(age, salary)", false)};
+            func_ast = And(col(col_name[0]), col(col_name[3]));
+            func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast};
 
-                request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection);
-                ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]);
-            }
+            request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection);
+            ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]);
         }
+    }
 
-        {
-            /// "equal" function
-            expect_cols = {{toNullableVec<Int32>(col_name[0], ColumnWithInt32{27, 36, 34, 32, {}, {}}),
-                            toNullableVec<String>(col_name[1], ColumnWithString{"female", "female", "male", "male", "female", "male"}),
-                            toNullableVec<String>(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "korea", "china"}),
-                            toNullableVec<Int32>(col_name[3], ColumnWithInt32{0, 900, -300, {}, 1300, {}})}};
+    {
+        /// "equal" function
+        expect_cols = {{toNullableVec<Int32>(col_name[0], ColumnWithInt32{27, 36, 34, 32, {}, {}}),
+                        toNullableVec<String>(col_name[1], ColumnWithString{"female", "female", "male", "male", "female", "male"}),
+                        toNullableVec<String>(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "korea", "china"}),
+                        toNullableVec<Int32>(col_name[3], ColumnWithInt32{0, 900, -300, {}, 1300, {}})}};
 
-            {
-                /// select age, salary from clerk order by age = salary DESC limit 100;
-                order_by_items = {MockOrderByItem("equals(age, salary)", true)};
-                func_ast = eq(col(col_name[0]), col(col_name[3]));
-                func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast};
+        {
+            /// select age, salary from clerk order by age = salary DESC limit 100;
+            order_by_items = {MockOrderByItem("equals(age, salary)", true)};
+            func_ast = eq(col(col_name[0]), col(col_name[3]));
+            func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast};
 
-                request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection);
-                ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]);
-            }
+            request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection);
+            ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]);
         }
+    }
 
-        {
-            /// "greater" function
-            expect_cols = {{toNullableVec<Int32>(col_name[0], ColumnWithInt32{{}, 32, {}, 36, 27, 34}),
-                            toNullableVec<String>(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}),
-                            toNullableVec<String>(col_name[2], ColumnWithString{"korea", "usa", "china", "china", "usa", "china"}),
-                            toNullableVec<Int32>(col_name[3], ColumnWithInt32{1300, {}, {}, 900, 0, -300})}};
+    {
+        /// "greater" function
+        expect_cols = {{toNullableVec<Int32>(col_name[0], ColumnWithInt32{{}, 32, {}, 36, 27, 34}),
+                        toNullableVec<String>(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}),
+                        toNullableVec<String>(col_name[2], ColumnWithString{"korea", "usa", "china", "china", "usa", "china"}),
+                        toNullableVec<Int32>(col_name[3], ColumnWithInt32{1300, {}, {}, 900, 0, -300})}};
 
-            {
-                /// select age, gender, country, salary from clerk order by age > salary ASC limit 100;
-                order_by_items = {MockOrderByItem("greater(age, salary)", false)};
-                func_ast = gt(col(col_name[0]), col(col_name[3]));
-                func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast};
+        {
+            /// select age, gender, country, salary from clerk order by age > salary ASC limit 100;
+            order_by_items = {MockOrderByItem("greater(age, salary)", false)};
+            func_ast = gt(col(col_name[0]), col(col_name[3]));
+            func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast};
 
-                request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection);
-                ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]);
-            }
+            request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection);
+            ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]);
         }
+    }
 
-        /// TODO more functions...
-    });
+    /// TODO more functions...
+    WRAP_FOR_DIS_ENABLE_PLANNER_END
 }
 CATCH
 
diff --git a/dbms/src/Functions/CollationOperatorOptimized.h b/dbms/src/Functions/CollationOperatorOptimized.h
index 395ecc5b9eb..8276a41fa17 100644
--- a/dbms/src/Functions/CollationOperatorOptimized.h
+++ b/dbms/src/Functions/CollationOperatorOptimized.h
@@ -17,6 +17,7 @@
 #include <Columns/ColumnString.h>
 #include <Core/AccurateComparison.h>
 #include <Functions/StringUtil.h>
+#include <Storages/Transaction/CollatorUtils.h>
 #include <common/StringRef.h>
 #include <common/defines.h>
 
@@ -27,42 +28,6 @@
 namespace DB
 {
 
-template <typename T>
-ALWAYS_INLINE inline int signum(T val)
-{
-    return (0 < val) - (val < 0);
-}
-
-// Check equality is much faster than other comparison.
-// - check size first
-// - return 0 if equal else 1
-__attribute__((flatten, always_inline, pure)) inline uint8_t RawStrEqualCompare(const std::string_view & lhs, const std::string_view & rhs)
-{
-    return StringRef(lhs) == StringRef(rhs) ? 0 : 1;
-}
-
-// Compare str view by memcmp
-__attribute__((flatten, always_inline, pure)) inline int RawStrCompare(const std::string_view & v1, const std::string_view & v2)
-{
-    return signum(v1.compare(v2));
-}
-
-constexpr char SPACE = ' ';
-
-// Remove tail space
-__attribute__((flatten, always_inline, pure)) inline std::string_view RightTrim(const std::string_view & v)
-{
-    if (likely(v.empty() || v.back() != SPACE))
-        return v;
-    size_t end = v.find_last_not_of(SPACE);
-    return end == std::string_view::npos ? std::string_view{} : std::string_view(v.data(), end + 1);
-}
-
-__attribute__((flatten, always_inline, pure)) inline int RtrimStrCompare(const std::string_view & va, const std::string_view & vb)
-{
-    return RawStrCompare(RightTrim(va), RightTrim(vb));
-}
-
 // If true, only need to check equal or not.
 template <typename T>
 struct IsEqualRelated
@@ -83,6 +48,7 @@ struct IsEqualRelated<DB::NotEqualsOp<A...>>
 };
 
 // Loop columns and invoke callback for each pair.
+// Remove last zero byte.
 template <typename F>
 __attribute__((flatten, always_inline)) inline void LoopTwoColumns(
     const ColumnString::Chars_t & a_data,
@@ -92,18 +58,29 @@ __attribute__((flatten, always_inline)) inline void LoopTwoColumns(
     size_t size,
     F && func)
 {
+    ColumnString::Offset a_prev_offset = 0;
+    ColumnString::Offset b_prev_offset = 0;
+    const auto * a_ptr = reinterpret_cast<const char *>(a_data.data());
+    const auto * b_ptr = reinterpret_cast<const char *>(b_data.data());
+
     for (size_t i = 0; i < size; ++i)
     {
-        size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1;
-        size_t b_size = StringUtil::sizeAt(b_offsets, i) - 1;
-        const auto * a_ptr = reinterpret_cast<const char *>(&a_data[StringUtil::offsetAt(a_offsets, i)]);
-        const auto * b_ptr = reinterpret_cast<const char *>(&b_data[StringUtil::offsetAt(b_offsets, i)]);
+        auto a_size = a_offsets[i] - a_prev_offset;
+        auto b_size = b_offsets[i] - b_prev_offset;
+
+        // Remove last zero byte.
+        func({a_ptr, a_size - 1}, {b_ptr, b_size - 1}, i);
+
+        a_ptr += a_size;
+        b_ptr += b_size;
 
-        func({a_ptr, a_size}, {b_ptr, b_size}, i);
+        a_prev_offset = a_offsets[i];
+        b_prev_offset = b_offsets[i];
     }
 }
 
 // Loop one column and invoke callback for each pair.
+// Remove last zero byte.
 template <typename F>
 __attribute__((flatten, always_inline)) inline void LoopOneColumn(
     const ColumnString::Chars_t & a_data,
@@ -111,12 +88,18 @@ __attribute__((flatten, always_inline)) inline void LoopOneColumn(
     size_t size,
     F && func)
 {
+    ColumnString::Offset a_prev_offset = 0;
+    const auto * a_ptr = reinterpret_cast<const char *>(a_data.data());
+
     for (size_t i = 0; i < size; ++i)
     {
-        size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1;
-        const auto * a_ptr = reinterpret_cast<const char *>(&a_data[StringUtil::offsetAt(a_offsets, i)]);
+        auto a_size = a_offsets[i] - a_prev_offset;
+
+        // Remove last zero byte.
+        func({a_ptr, a_size - 1}, i);
 
-        func({a_ptr, a_size}, i);
+        a_ptr += a_size;
+        a_prev_offset = a_offsets[i];
     }
 }
 
diff --git a/dbms/src/Interpreters/AggregationCommon.h b/dbms/src/Interpreters/AggregationCommon.h
index 9f390133088..a043760b9af 100644
--- a/dbms/src/Interpreters/AggregationCommon.h
+++ b/dbms/src/Interpreters/AggregationCommon.h
@@ -171,7 +171,7 @@ static inline T ALWAYS_INLINE packFixed(
     return key;
 }
 
-
+/*
 /// Hash a set of keys into a UInt128 value.
 static inline UInt128 ALWAYS_INLINE hash128(
     size_t i,
@@ -202,7 +202,7 @@ static inline UInt128 ALWAYS_INLINE hash128(
 
     return key;
 }
-
+*/
 
 /// Almost the same as above but it doesn't return any reference to key data.
 static inline UInt128 ALWAYS_INLINE hash128(
@@ -212,7 +212,7 @@ static inline UInt128 ALWAYS_INLINE hash128(
     const TiDB::TiDBCollators & collators,
     std::vector<std::string> & sort_key_containers)
 {
-    UInt128 key;
+    UInt128 key{};
     SipHash hash;
 
     if (collators.empty())
@@ -252,7 +252,7 @@ static inline StringRef * ALWAYS_INLINE placeKeysInPool(
     return reinterpret_cast<StringRef *>(res);
 }
 
-
+/*
 /// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first.
 static inline StringRef * ALWAYS_INLINE extractKeysAndPlaceInPool(
     size_t i,
@@ -326,7 +326,7 @@ inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous(
 
     return {res, sum_keys_size};
 }
-
+*/
 
 /** Serialize keys into a continuous chunk of memory.
   */
diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp
index 8095fbb0e59..3f6ac940ab4 100644
--- a/dbms/src/Interpreters/AsynchronousMetrics.cpp
+++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp
@@ -31,16 +31,6 @@
 
 #include <chrono>
 
-#if USE_TCMALLOC
-#include <gperftools/malloc_extension.h>
-
-/// Initializing malloc extension in global constructor as required.
-struct MallocExtensionInitializer
-{
-    MallocExtensionInitializer() { MallocExtension::Initialize(); }
-} malloc_extension_initializer;
-#endif
-
 #if USE_JEMALLOC
 #include <jemalloc/jemalloc.h>
 #endif
@@ -209,32 +199,6 @@ void AsynchronousMetrics::update()
         set("BlobValidBytes", usage.total_valid_size);
     }
 
-#if USE_TCMALLOC
-    {
-        /// tcmalloc related metrics. Remove if you switch to different allocator.
-
-        MallocExtension & malloc_extension = *MallocExtension::instance();
-
-        auto malloc_metrics = {
-            "generic.current_allocated_bytes",
-            "generic.heap_size",
-            "tcmalloc.current_total_thread_cache_bytes",
-            "tcmalloc.central_cache_free_bytes",
-            "tcmalloc.transfer_cache_free_bytes",
-            "tcmalloc.thread_cache_free_bytes",
-            "tcmalloc.pageheap_free_bytes",
-            "tcmalloc.pageheap_unmapped_bytes",
-        };
-
-        for (auto malloc_metric : malloc_metrics)
-        {
-            size_t value = 0;
-            if (malloc_extension.GetNumericProperty(malloc_metric, &value))
-                set(malloc_metric, value);
-        }
-    }
-#endif
-
 #if USE_MIMALLOC
 #define MI_STATS_SET(X) set("mimalloc." #X, X)
 
diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 181ebcaaa64..9c9c9eb92ab 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -788,6 +788,7 @@ bool Join::insertFromBlock(const Block & block)
     std::unique_lock lock(rwlock);
     if (unlikely(!initialized))
         throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR);
+    total_input_build_rows += block.rows();
     blocks.push_back(block);
     Block * stored_block = &blocks.back();
     return insertFromBlockInternal(stored_block, 0);
@@ -805,6 +806,7 @@ void Join::insertFromBlock(const Block & block, size_t stream_index)
     Block * stored_block = nullptr;
     {
         std::lock_guard lk(blocks_lock);
+        total_input_build_rows += block.rows();
         blocks.push_back(block);
         stored_block = &blocks.back();
         original_blocks.push_back(block);
diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h
index 01916aa1dcc..f30479b6f46 100644
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@@ -141,6 +141,8 @@ class Join
     /// Sum size in bytes of all buffers, used for JOIN maps and for all memory pools.
     size_t getTotalByteCount() const;
 
+    size_t getTotalBuildInputRows() const { return total_input_build_rows; }
+
     ASTTableJoin::Kind getKind() const { return kind; }
 
     bool useNulls() const { return use_nulls; }
@@ -337,6 +339,7 @@ class Join
     SizeLimits limits;
 
     Block totals;
+    std::atomic<size_t> total_input_build_rows{0};
     /** Protect state for concurrent use in insertFromBlock and joinBlock.
       * Note that these methods could be called simultaneously only while use of StorageJoin,
       *  and StorageJoin only calls these two methods.
diff --git a/dbms/src/Interpreters/SetVariants.h b/dbms/src/Interpreters/SetVariants.h
index 08ebb5d5100..58f102555b6 100644
--- a/dbms/src/Interpreters/SetVariants.h
+++ b/dbms/src/Interpreters/SetVariants.h
@@ -21,7 +21,7 @@
 #include <Common/HashTable/ClearableHashSet.h>
 #include <Common/HashTable/HashSet.h>
 #include <Interpreters/AggregationCommon.h>
-#include <Storages/Transaction/Collator.h>
+
 
 namespace DB
 {
diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h
index 663a2957511..ecee2ab0671 100644
--- a/dbms/src/Interpreters/Settings.h
+++ b/dbms/src/Interpreters/Settings.h
@@ -317,6 +317,9 @@ struct Settings
                                                                                                                                                                                                                                         \
     M(SettingDouble, dt_storage_blob_heavy_gc_valid_rate, 0.2, "Max valid rate of deciding a blob can be compact")                                                                                                                      \
     M(SettingDouble, dt_storage_blob_block_alignment_bytes, 0, "Blob IO alignment size")                                                                                                                                                \
+    M(SettingBool, dt_enable_read_thread, false, "Enable storage read thread or not")                                                                                                                                                          \
+    M(SettingDouble, dt_block_slots_scale, 1.0, "Block slots limit of a read request")                                                                                                                                                          \
+    M(SettingDouble, dt_active_segments_scale, 1.0, "Acitve segments limit of a read request")                                                                                                                                                          \
                                                                                                                                                                                                                                         \
     M(SettingChecksumAlgorithm, dt_checksum_algorithm, ChecksumAlgo::XXH3, "Checksum algorithm for delta tree stable storage")                                                                                                          \
     M(SettingCompressionMethod, dt_compression_method, CompressionMethod::LZ4, "The method of data compression when writing.")                                                                                                          \
diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index aabca11cf9c..fa34400d5e3 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -64,6 +64,9 @@
 #include <Server/StorageConfigParser.h>
 #include <Server/TCPHandlerFactory.h>
 #include <Server/UserConfigParser.h>
+#include <Storages/DeltaMerge/ReadThread/ColumnSharingCache.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReader.h>
 #include <Storages/FormatVersion.h>
 #include <Storages/IManageableStorage.h>
 #include <Storages/PathCapacityMetrics.h>
@@ -1335,6 +1338,12 @@ int Server::main(const std::vector<std::string> & /*args*/)
         global_context->getTMTContext().reloadConfig(config());
     }
 
+    // Initialize the thread pool of storage before the storage engine is initialized.
+    LOG_FMT_INFO(log, "dt_enable_read_thread {}", global_context->getSettingsRef().dt_enable_read_thread);
+    DM::SegmentReaderPoolManager::instance().init(server_info);
+    DM::SegmentReadTaskScheduler::instance();
+    DM::DMFileReaderPool::instance();
+
     {
         // Note that this must do before initialize schema sync service.
         do
diff --git a/dbms/src/Server/main.cpp b/dbms/src/Server/main.cpp
index dbcaa4f38fc..83629eda1ae 100644
--- a/dbms/src/Server/main.cpp
+++ b/dbms/src/Server/main.cpp
@@ -25,10 +25,6 @@
 #include <utility> /// pair
 #include <vector>
 
-#if USE_TCMALLOC
-#include <gperftools/malloc_extension.h>
-#endif
-
 #if ENABLE_CLICKHOUSE_SERVER
 #include "Server.h"
 #endif
@@ -154,14 +150,6 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
 
 int main(int argc_, char ** argv_)
 {
-#if USE_TCMALLOC
-    /** Without this option, tcmalloc returns memory to OS too frequently for medium-sized memory allocations
-      *  (like IO buffers, column vectors, hash tables, etc.),
-      *  that lead to page faults and significantly hurts performance.
-      */
-    MallocExtension::instance()->SetNumericProperty("tcmalloc.aggressive_memory_decommit", false);
-#endif
-
     std::vector<char *> argv(argv_, argv_ + argc_);
 
     /// Print a basic help if nothing was matched
diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp
index 3328f60b15d..da4372670be 100644
--- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp
+++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp
@@ -91,6 +91,7 @@ void ColumnFileBigReader::initStream()
                       .setTracingID(context.tracing_id)
                       .build(column_file.getFile(), *col_defs, RowKeyRanges{column_file.segment_range});
 
+    header = file_stream->getHeader();
     // If we only need to read pk and version columns, then cache columns data in memory.
     if (pk_ver_only)
     {
@@ -223,7 +224,20 @@ Block ColumnFileBigReader::readNextBlock()
 {
     initStream();
 
-    return file_stream->read();
+    if (pk_ver_only)
+    {
+        if (next_block_index_in_cache >= cached_pk_ver_columns.size())
+        {
+            return {};
+        }
+        auto & columns = cached_pk_ver_columns[next_block_index_in_cache];
+        next_block_index_in_cache += 1;
+        return header.cloneWithColumns(std::move(columns));
+    }
+    else
+    {
+        return file_stream->read();
+    }
 }
 
 ColumnFileReaderPtr ColumnFileBigReader::createNewReader(const ColumnDefinesPtr & new_col_defs)
diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.h
index dba7eca7247..44302399960 100644
--- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.h
+++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.h
@@ -104,6 +104,8 @@ class ColumnFileBigReader : public ColumnFileReader
     const ColumnFileBig & column_file;
     const ColumnDefinesPtr col_defs;
 
+    Block header;
+
     bool pk_ver_only;
 
     DMFileBlockInputStreamPtr file_stream;
@@ -112,6 +114,7 @@ class ColumnFileBigReader : public ColumnFileReader
     // we cache them to minimize the cost.
     std::vector<Columns> cached_pk_ver_columns;
     std::vector<size_t> cached_block_rows_end;
+    size_t next_block_index_in_cache = 0;
 
     // The data members for reading all columns, but can only read once.
     size_t rows_before_cur_block = 0;
@@ -131,7 +134,24 @@ class ColumnFileBigReader : public ColumnFileReader
         , column_file(column_file_)
         , col_defs(col_defs_)
     {
-        pk_ver_only = col_defs->size() <= 2;
+        if (col_defs_->size() == 1)
+        {
+            if ((*col_defs)[0].id == EXTRA_HANDLE_COLUMN_ID)
+            {
+                pk_ver_only = true;
+            }
+        }
+        else if (col_defs_->size() == 2)
+        {
+            if ((*col_defs)[0].id == EXTRA_HANDLE_COLUMN_ID && (*col_defs)[1].id == VERSION_COLUMN_ID)
+            {
+                pk_ver_only = true;
+            }
+        }
+        else
+        {
+            pk_ver_only = false;
+        }
     }
 
     size_t readRows(MutableColumns & output_cols, size_t rows_offset, size_t rows_limit, const RowKeyRange * range) override;
diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp
index d34856ec32f..27062c2c108 100644
--- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp
+++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp
@@ -26,6 +26,8 @@
 #include <Storages/DeltaMerge/DeltaMergeStore.h>
 #include <Storages/DeltaMerge/File/DMFile.h>
 #include <Storages/DeltaMerge/Filter/RSOperator.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.h>
+#include <Storages/DeltaMerge/ReadThread/UnorderedInputStream.h>
 #include <Storages/DeltaMerge/SchemaUpdate.h>
 #include <Storages/DeltaMerge/Segment.h>
 #include <Storages/DeltaMerge/SegmentReadTaskPool.h>
@@ -39,10 +41,6 @@
 #include <atomic>
 #include <ext/scope_guard.h>
 
-#if USE_TCMALLOC
-#include <gperftools/malloc_extension.h>
-#endif
-
 namespace ProfileEvents
 {
 extern const Event DMWriteBlock;
@@ -469,11 +467,6 @@ void DeltaMergeStore::drop()
     // Drop data in storage path pool
     path_pool.drop(/*recursive=*/true, /*must_success=*/false);
     LOG_FMT_INFO(log, "Drop DeltaMerge done [{}.{}]", db_name, table_name);
-
-#if USE_TCMALLOC
-    // Reclaim memory.
-    MallocExtension::instance()->ReleaseFreeMemory();
-#endif
 }
 
 void DeltaMergeStore::shutdown()
@@ -982,6 +975,8 @@ void DeltaMergeStore::deleteRange(const Context & db_context, const DB::Settings
 
 bool DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRange & range, bool try_until_succeed)
 {
+    size_t sleep_ms = 5;
+
     RowKeyRange cur_range = range;
     while (!cur_range.none())
     {
@@ -1005,7 +1000,6 @@ bool DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRa
             }
             segment_range = segment->getRowKeyRange();
 
-            // Flush could fail.
             if (segment->flushCache(*dm_context))
             {
                 break;
@@ -1014,6 +1008,13 @@ bool DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRa
             {
                 return false;
             }
+
+            // Flush could fail. Typical cases:
+            // #1. The segment is abandoned (due to an update is finished)
+            // #2. There is another flush in progress, for example, triggered in background
+            // Let's sleep 5ms ~ 100ms and then retry flush again.
+            std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
+            sleep_ms = std::min(sleep_ms * 2, 100);
         }
 
         cur_range.setStart(segment_range.end);
@@ -1047,6 +1048,8 @@ std::optional<DM::RowKeyRange> DeltaMergeStore::mergeDeltaBySegment(const Contex
     auto dm_context = newDMContext(context, context.getSettingsRef(),
                                    /*tracing_id*/ fmt::format("mergeDeltaBySegment_{}", latest_gc_safe_point.load(std::memory_order_relaxed)));
 
+    size_t sleep_ms = 50;
+
     while (true)
     {
         SegmentPtr segment;
@@ -1060,25 +1063,33 @@ std::optional<DM::RowKeyRange> DeltaMergeStore::mergeDeltaBySegment(const Contex
             segment = segment_it->second;
         }
 
-        if (!segment->flushCache(*dm_context))
-        {
-            // If the flush failed, it means there are parallel updates to the segment in the background.
-            // In this case, we try again.
-            continue;
-        }
-
-        const auto new_segment = segmentMergeDelta(*dm_context, segment, run_thread);
-        if (new_segment)
+        if (segment->flushCache(*dm_context))
         {
-            const auto segment_end = new_segment->getRowKeyRange().end;
-            if (unlikely(*segment_end.value <= *start_key.value))
+            const auto new_segment = segmentMergeDelta(*dm_context, segment, run_thread);
+            if (new_segment)
             {
-                // The next start key must be > current start key
-                LOG_FMT_ERROR(log, "Assert new_segment.end {} > start {} failed", segment_end.toDebugString(), start_key.toDebugString());
-                throw Exception("Assert segment range failed", ErrorCodes::LOGICAL_ERROR);
-            }
-            return new_segment->getRowKeyRange();
-        }
+                const auto segment_end = new_segment->getRowKeyRange().end;
+                if (unlikely(*segment_end.value <= *start_key.value))
+                {
+                    // The next start key must be > current start key
+                    LOG_FMT_ERROR(log, "Assert new_segment.end {} > start {} failed", segment_end.toDebugString(), start_key.toDebugString());
+                    throw Exception("Assert segment range failed", ErrorCodes::LOGICAL_ERROR);
+                }
+                return new_segment->getRowKeyRange();
+            } // else: sleep and retry
+        } // else: sleep and retry
+
+        // Typical cases:
+        // #1. flushCache failed
+        //    - The segment is abandoned (due to segment updated)
+        //    - There is another flush in progress (e.g. triggered in background)
+        // #2. segmentMergeDelta failed
+        //    - The segment is abandoned (due to segment updated)
+        //    - The segment is updating (e.g. a split-preparation is working, which occupies a for-write snapshot).
+        // It could be possible to take seconds to finish the segment updating, so let's sleep for a short time
+        // (50ms ~ 1000ms) and then retry.
+        std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
+        sleep_ms = std::min(sleep_ms * 2, 1000);
     }
 }
 
@@ -1126,13 +1137,15 @@ BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context,
                                            const DB::Settings & db_settings,
                                            const ColumnDefines & columns_to_read,
                                            size_t num_streams,
+                                           bool keep_order,
                                            const SegmentIdSet & read_segments,
                                            size_t extra_table_id_index)
 {
     SegmentReadTasks tasks;
 
     auto dm_context = newDMContext(db_context, db_settings, fmt::format("read_raw_{}", db_context.getCurrentQueryId()));
-
+    // If keep order is required, disable read thread.
+    auto enable_read_thread = db_context.getSettingsRef().dt_enable_read_thread && !keep_order;
     {
         std::shared_lock lock(read_write_mutex);
 
@@ -1164,7 +1177,17 @@ BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context,
         this->checkSegmentUpdate(dm_context_, segment_, ThreadType::Read);
     };
     size_t final_num_stream = std::min(num_streams, tasks.size());
-    auto read_task_pool = std::make_shared<SegmentReadTaskPool>(std::move(tasks));
+    auto read_task_pool = std::make_shared<SegmentReadTaskPool>(
+        physical_table_id,
+        dm_context,
+        columns_to_read,
+        EMPTY_FILTER,
+        std::numeric_limits<UInt64>::max(),
+        DEFAULT_BLOCK_SIZE,
+        /* is_raw = */ true,
+        /* do_delete_mark_filter_for_raw = */ false,
+        std::move(tasks),
+        after_segment_read);
 
     String req_info;
     if (db_context.getDAGContext() != nullptr && db_context.getDAGContext()->isMPPTask())
@@ -1172,21 +1195,38 @@ BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context,
     BlockInputStreams res;
     for (size_t i = 0; i < final_num_stream; ++i)
     {
-        BlockInputStreamPtr stream = std::make_shared<DMSegmentThreadInputStream>(
-            dm_context,
-            read_task_pool,
-            after_segment_read,
-            columns_to_read,
-            EMPTY_FILTER,
-            std::numeric_limits<UInt64>::max(),
-            DEFAULT_BLOCK_SIZE,
-            /* is_raw_ */ true,
-            /* do_delete_mark_filter_for_raw_ */ false, // don't do filter based on del_mark = 1
-            extra_table_id_index,
-            physical_table_id,
-            req_info);
+        BlockInputStreamPtr stream;
+        if (enable_read_thread)
+        {
+            stream = std::make_shared<UnorderedInputStream>(
+                read_task_pool,
+                columns_to_read,
+                extra_table_id_index,
+                physical_table_id,
+                req_info);
+        }
+        else
+        {
+            stream = std::make_shared<DMSegmentThreadInputStream>(
+                dm_context,
+                read_task_pool,
+                after_segment_read,
+                columns_to_read,
+                EMPTY_FILTER,
+                std::numeric_limits<UInt64>::max(),
+                DEFAULT_BLOCK_SIZE,
+                /* is_raw_ */ true,
+                /* do_delete_mark_filter_for_raw_ */ false, // don't do filter based on del_mark = 1
+                extra_table_id_index,
+                physical_table_id,
+                req_info);
+        }
         res.push_back(stream);
     }
+    if (enable_read_thread)
+    {
+        SegmentReadTaskScheduler::instance().add(read_task_pool);
+    }
     return res;
 }
 
@@ -1198,6 +1238,7 @@ BlockInputStreams DeltaMergeStore::read(const Context & db_context,
                                         UInt64 max_version,
                                         const RSOperatorPtr & filter,
                                         const String & tracing_id,
+                                        bool keep_order,
                                         bool is_fast_mode,
                                         size_t expected_block_size,
                                         const SegmentIdSet & read_segments,
@@ -1205,11 +1246,19 @@ BlockInputStreams DeltaMergeStore::read(const Context & db_context,
 {
     // Use the id from MPP/Coprocessor level as tracing_id
     auto dm_context = newDMContext(db_context, db_settings, tracing_id);
-
-    SegmentReadTasks tasks = getReadTasksByRanges(*dm_context, sorted_ranges, num_streams, read_segments);
+    // If keep order is required, disable read thread.
+    auto enable_read_thread = db_context.getSettingsRef().dt_enable_read_thread && !keep_order;
+    // SegmentReadTaskScheduler and SegmentReadTaskPool use table_id + segment id as unique ID when read thread is enabled.
+    // 'try_split_task' can result in several read tasks with the same id that can cause some trouble.
+    // Also, too many read tasks of a segment with different samll ranges is not good for data sharing cache.
+    SegmentReadTasks tasks = getReadTasksByRanges(*dm_context, sorted_ranges, num_streams, read_segments, /*try_split_task =*/!enable_read_thread);
 
     auto tracing_logger = Logger::get(log->name(), dm_context->tracing_id);
-    LOG_FMT_DEBUG(tracing_logger, "Read create segment snapshot done");
+    LOG_FMT_DEBUG(tracing_logger,
+                  "Read create segment snapshot done keep_order {} dt_enable_read_thread {} => enable_read_thread {}",
+                  keep_order,
+                  db_context.getSettingsRef().dt_enable_read_thread,
+                  enable_read_thread);
 
     auto after_segment_read = [&](const DMContextPtr & dm_context_, const SegmentPtr & segment_) {
         // TODO: Update the tracing_id before checkSegmentUpdate?
@@ -1218,7 +1267,17 @@ BlockInputStreams DeltaMergeStore::read(const Context & db_context,
 
     GET_METRIC(tiflash_storage_read_tasks_count).Increment(tasks.size());
     size_t final_num_stream = std::max(1, std::min(num_streams, tasks.size()));
-    auto read_task_pool = std::make_shared<SegmentReadTaskPool>(std::move(tasks));
+    auto read_task_pool = std::make_shared<SegmentReadTaskPool>(
+        physical_table_id,
+        dm_context,
+        columns_to_read,
+        filter,
+        max_version,
+        expected_block_size,
+        /* is_raw = */ is_fast_mode,
+        /* do_delete_mark_filter_for_raw = */ is_fast_mode,
+        std::move(tasks),
+        after_segment_read);
 
     String req_info;
     if (db_context.getDAGContext() != nullptr && db_context.getDAGContext()->isMPPTask())
@@ -1226,22 +1285,38 @@ BlockInputStreams DeltaMergeStore::read(const Context & db_context,
     BlockInputStreams res;
     for (size_t i = 0; i < final_num_stream; ++i)
     {
-        BlockInputStreamPtr stream = std::make_shared<DMSegmentThreadInputStream>(
-            dm_context,
-            read_task_pool,
-            after_segment_read,
-            columns_to_read,
-            filter,
-            max_version,
-            expected_block_size,
-            /* is_raw_ */ is_fast_mode,
-            /* do_delete_mark_filter_for_raw_ */ is_fast_mode,
-            extra_table_id_index,
-            physical_table_id,
-            req_info);
+        BlockInputStreamPtr stream;
+        if (enable_read_thread)
+        {
+            stream = std::make_shared<UnorderedInputStream>(
+                read_task_pool,
+                columns_to_read,
+                extra_table_id_index,
+                physical_table_id,
+                req_info);
+        }
+        else
+        {
+            stream = std::make_shared<DMSegmentThreadInputStream>(
+                dm_context,
+                read_task_pool,
+                after_segment_read,
+                columns_to_read,
+                filter,
+                max_version,
+                expected_block_size,
+                /* is_raw_ */ is_fast_mode,
+                /* do_delete_mark_filter_for_raw_ */ is_fast_mode,
+                extra_table_id_index,
+                physical_table_id,
+                req_info);
+        }
         res.push_back(stream);
     }
-
+    if (enable_read_thread)
+    {
+        SegmentReadTaskScheduler::instance().add(read_task_pool);
+    }
     LOG_FMT_DEBUG(tracing_logger, "Read create stream done");
 
     return res;
@@ -2623,7 +2698,8 @@ SegmentReadTasks DeltaMergeStore::getReadTasksByRanges(
     DMContext & dm_context,
     const RowKeyRanges & sorted_ranges,
     size_t expected_tasks_count,
-    const SegmentIdSet & read_segments)
+    const SegmentIdSet & read_segments,
+    bool try_split_task)
 {
     SegmentReadTasks tasks;
 
@@ -2678,12 +2754,15 @@ SegmentReadTasks DeltaMergeStore::getReadTasksByRanges(
                 ++seg_it;
         }
     }
-
-    /// Try to make task number larger or equal to expected_tasks_count.
-    auto result_tasks = SegmentReadTask::trySplitReadTasks(tasks, expected_tasks_count);
+    auto tasks_before_split = tasks.size();
+    if (try_split_task)
+    {
+        /// Try to make task number larger or equal to expected_tasks_count.
+        tasks = SegmentReadTask::trySplitReadTasks(tasks, expected_tasks_count);
+    }
 
     size_t total_ranges = 0;
-    for (auto & task : result_tasks)
+    for (auto & task : tasks)
     {
         /// Merge continuously ranges.
         task->mergeRanges();
@@ -2695,11 +2774,11 @@ SegmentReadTasks DeltaMergeStore::getReadTasksByRanges(
         tracing_logger,
         "[sorted_ranges: {}] [tasks before split: {}] [tasks final: {}] [ranges final: {}]",
         sorted_ranges.size(),
+        tasks_before_split,
         tasks.size(),
-        result_tasks.size(),
         total_ranges);
 
-    return result_tasks;
+    return tasks;
 }
 
 } // namespace DM
diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h
index bd7a5bb542a..62d28e4743b 100644
--- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h
+++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h
@@ -350,6 +350,7 @@ class DeltaMergeStore : private boost::noncopyable
                               const DB::Settings & db_settings,
                               const ColumnDefines & columns_to_read,
                               size_t num_streams,
+                              bool keep_order,
                               const SegmentIdSet & read_segments = {},
                               size_t extra_table_id_index = InvalidColumnID);
 
@@ -366,6 +367,7 @@ class DeltaMergeStore : private boost::noncopyable
                            UInt64 max_version,
                            const RSOperatorPtr & filter,
                            const String & tracing_id,
+                           bool keep_order,
                            bool is_fast_mode = false, // set true when read in fast mode
                            size_t expected_block_size = DEFAULT_BLOCK_SIZE,
                            const SegmentIdSet & read_segments = {},
@@ -493,7 +495,8 @@ class DeltaMergeStore : private boost::noncopyable
     SegmentReadTasks getReadTasksByRanges(DMContext & dm_context,
                                           const RowKeyRanges & sorted_ranges,
                                           size_t expected_tasks_count = 1,
-                                          const SegmentIdSet & read_segments = {});
+                                          const SegmentIdSet & read_segments = {},
+                                          bool try_split_task = true);
 
 private:
     void dropAllSegments(bool keep_first_segment);
diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp
index 56b11b975a1..196a300688b 100644
--- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp
+++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp
@@ -56,6 +56,8 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build(const DMFilePtr &
         read_limiter,
         tracing_id);
 
+    bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader();
+
     DMFileReader reader(
         dmfile,
         read_columns,
@@ -73,8 +75,9 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build(const DMFilePtr &
         read_limiter,
         rows_threshold_per_read,
         read_one_pack_every_time,
-        tracing_id);
+        tracing_id,
+        enable_read_thread);
 
-    return std::make_shared<DMFileBlockInputStream>(std::move(reader));
+    return std::make_shared<DMFileBlockInputStream>(std::move(reader), enable_read_thread);
 }
 } // namespace DB::DM
diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h
index a7f2fe9d556..e1b5fa835c3 100644
--- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h
+++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h
@@ -16,9 +16,9 @@
 
 #include <Storages/DeltaMerge/File/ColumnCache.h>
 #include <Storages/DeltaMerge/File/DMFileReader.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReader.h>
 #include <Storages/DeltaMerge/RowKeyRange.h>
 #include <Storages/DeltaMerge/SkippableBlockInputStream.h>
-
 namespace DB
 {
 class Context;
@@ -29,11 +29,23 @@ namespace DM
 class DMFileBlockInputStream : public SkippableBlockInputStream
 {
 public:
-    explicit DMFileBlockInputStream(DMFileReader && reader_)
+    explicit DMFileBlockInputStream(DMFileReader && reader_, bool enable_read_thread_)
         : reader(std::move(reader_))
-    {}
+        , enable_read_thread(enable_read_thread_)
+    {
+        if (enable_read_thread)
+        {
+            DMFileReaderPool::instance().add(reader);
+        }
+    }
 
-    ~DMFileBlockInputStream() = default;
+    ~DMFileBlockInputStream()
+    {
+        if (enable_read_thread)
+        {
+            DMFileReaderPool::instance().del(reader);
+        }
+    }
 
     String getName() const override { return "DMFile"; }
 
@@ -45,6 +57,7 @@ class DMFileBlockInputStream : public SkippableBlockInputStream
 
 private:
     DMFileReader reader;
+    bool enable_read_thread;
 };
 
 using DMFileBlockInputStreamPtr = std::shared_ptr<DMFileBlockInputStream>;
@@ -129,6 +142,7 @@ class DMFileBlockInputStreamBuilder
         enable_column_cache = settings.dt_enable_stable_column_cache;
         aio_threshold = settings.min_bytes_to_use_direct_io;
         max_read_buffer_size = settings.max_read_buffer_size;
+        enable_read_thread = settings.dt_enable_read_thread;
         return *this;
     }
     DMFileBlockInputStreamBuilder & setCaches(const MarkCachePtr & mark_cache_, const MinMaxIndexCachePtr & index_cache_)
@@ -159,7 +173,7 @@ class DMFileBlockInputStreamBuilder
     size_t max_read_buffer_size{};
     size_t rows_threshold_per_read = DMFILE_READ_ROWS_THRESHOLD;
     bool read_one_pack_every_time = false;
-
+    bool enable_read_thread = false;
     String tracing_id;
 };
 
diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp
index 779de36da63..b00e3cc5c10 100644
--- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp
+++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp
@@ -226,7 +226,8 @@ DMFileReader::DMFileReader(
     const ReadLimiterPtr & read_limiter,
     size_t rows_threshold_per_read_,
     bool read_one_pack_every_time_,
-    const String & tracing_id_)
+    const String & tracing_id_,
+    bool enable_col_sharing_cache)
     : dmfile(dmfile_)
     , read_columns(read_columns_)
     , is_common_handle(is_common_handle_)
@@ -266,6 +267,14 @@ DMFileReader::DMFileReader(
         const auto data_type = dmfile->getColumnStat(cd.id).type;
         data_type->enumerateStreams(callback, {});
     }
+    if (enable_col_sharing_cache)
+    {
+        col_data_cache = std::make_unique<ColumnSharingCacheMap>(path(), read_columns, log);
+        for (const auto & cd : read_columns)
+        {
+            last_read_from_cache[cd.id] = false;
+        }
+    }
 }
 
 bool DMFileReader::shouldSeek(size_t pack_id)
@@ -305,7 +314,6 @@ Block DMFileReader::read()
     const auto & use_packs = pack_filter.getUsePacks();
     if (next_pack_id >= use_packs.size())
         return {};
-
     // Find max continuing rows we can read.
     size_t start_pack_id = next_pack_id;
     // When single_file_mode is true, or read_one_pack_every_time is true, we can just read one pack every time.
@@ -440,7 +448,9 @@ Block DMFileReader::read()
                                 {
                                     rows_count += pack_stats[cursor].rows;
                                 }
-                                readFromDisk(cd, column, range.first, rows_count, skip_packs_by_column[i], single_file_mode);
+                                ColumnPtr col;
+                                readColumn(cd, col, range.first, range.second - range.first, rows_count, skip_packs_by_column[i], single_file_mode);
+                                column->insertRangeFrom(*col, 0, col->size());
                                 skip_packs_by_column[i] = 0;
                             }
                             else
@@ -462,8 +472,8 @@ Block DMFileReader::read()
                     else
                     {
                         auto data_type = dmfile->getColumnStat(cd.id).type;
-                        auto column = data_type->createColumn();
-                        readFromDisk(cd, column, start_pack_id, read_rows, skip_packs_by_column[i], single_file_mode);
+                        ColumnPtr column;
+                        readColumn(cd, column, start_pack_id, read_packs, read_rows, skip_packs_by_column[i], single_file_mode);
                         auto converted_column = convertColumnByColumnDefineIfNeed(data_type, std::move(column), cd);
 
                         res.insert(ColumnWithTypeAndName{std::move(converted_column), cd.type, cd.name, cd.id});
@@ -534,5 +544,58 @@ void DMFileReader::readFromDisk(
     }
 }
 
+void DMFileReader::readColumn(ColumnDefine & column_define,
+                              ColumnPtr & column,
+                              size_t start_pack_id,
+                              size_t pack_count,
+                              size_t read_rows,
+                              size_t skip_packs,
+                              bool force_seek)
+{
+    if (!getCachedPacks(column_define.id, start_pack_id, pack_count, read_rows, column))
+    {
+        auto data_type = dmfile->getColumnStat(column_define.id).type;
+        auto col = data_type->createColumn();
+        readFromDisk(column_define, col, start_pack_id, read_rows, skip_packs, force_seek || last_read_from_cache[column_define.id]);
+        column = std::move(col);
+        last_read_from_cache[column_define.id] = false;
+    }
+    else
+    {
+        last_read_from_cache[column_define.id] = true;
+    }
+
+    if (col_data_cache != nullptr)
+    {
+        DMFileReaderPool::instance().set(*this, column_define.id, start_pack_id, pack_count, column);
+    }
+}
+
+void DMFileReader::addCachedPacks(ColId col_id, size_t start_pack_id, size_t pack_count, ColumnPtr & col)
+{
+    if (col_data_cache == nullptr)
+    {
+        return;
+    }
+    if (next_pack_id >= start_pack_id + pack_count)
+    {
+        col_data_cache->addStale();
+    }
+    else
+    {
+        col_data_cache->add(col_id, start_pack_id, pack_count, col);
+    }
+}
+
+bool DMFileReader::getCachedPacks(ColId col_id, size_t start_pack_id, size_t pack_count, size_t read_rows, ColumnPtr & col)
+{
+    if (col_data_cache == nullptr)
+    {
+        return false;
+    }
+    auto found = col_data_cache->get(col_id, start_pack_id, pack_count, read_rows, col, dmfile->getColumnStat(col_id).type);
+    col_data_cache->del(col_id, next_pack_id);
+    return found;
+}
 } // namespace DM
 } // namespace DB
diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h
index c04c93871a7..71f6df84bf9 100644
--- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h
+++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h
@@ -21,6 +21,7 @@
 #include <Storages/DeltaMerge/File/ColumnCache.h>
 #include <Storages/DeltaMerge/File/DMFile.h>
 #include <Storages/DeltaMerge/File/DMFilePackFilter.h>
+#include <Storages/DeltaMerge/ReadThread/ColumnSharingCache.h>
 #include <Storages/DeltaMerge/RowKeyRange.h>
 #include <Storages/MarkCache.h>
 
@@ -91,7 +92,8 @@ class DMFileReader
         const ReadLimiterPtr & read_limiter,
         size_t rows_threshold_per_read_,
         bool read_one_pack_every_time_,
-        const String & tracing_id_);
+        const String & tracing_id_,
+        bool enable_col_sharing_cache);
 
     Block getHeader() const { return toEmptyBlock(read_columns); }
 
@@ -99,6 +101,15 @@ class DMFileReader
     /// Return false if it is the end of stream.
     bool getSkippedRows(size_t & skip_rows);
     Block read();
+    UInt64 fileId() const
+    {
+        return dmfile->fileId();
+    }
+    std::string path() const
+    {
+        return dmfile->path();
+    }
+    void addCachedPacks(ColId col_id, size_t start_pack_id, size_t pack_count, ColumnPtr & col);
 
 private:
     bool shouldSeek(size_t pack_id);
@@ -109,6 +120,14 @@ class DMFileReader
                       size_t read_rows,
                       size_t skip_packs,
                       bool force_seek);
+    void readColumn(ColumnDefine & column_define,
+                    ColumnPtr & column,
+                    size_t start_pack_id,
+                    size_t pack_count,
+                    size_t read_rows,
+                    size_t skip_packs,
+                    bool force_seek);
+    bool getCachedPacks(ColId col_id, size_t start_pack_id, size_t pack_count, size_t read_rows, ColumnPtr & col);
 
 private:
     DMFilePtr dmfile;
@@ -146,6 +165,9 @@ class DMFileReader
     FileProviderPtr file_provider;
 
     LoggerPtr log;
+
+    std::unique_ptr<ColumnSharingCacheMap> col_data_cache;
+    std::unordered_map<ColId, bool> last_read_from_cache;
 };
 
 } // namespace DM
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/CPU.cpp b/dbms/src/Storages/DeltaMerge/ReadThread/CPU.cpp
new file mode 100644
index 00000000000..fbecd79c6d8
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/CPU.cpp
@@ -0,0 +1,125 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <Common/Exception.h>
+#include <Poco/DirectoryIterator.h>
+#include <Poco/File.h>
+#include <Poco/Logger.h>
+#include <Storages/DeltaMerge/ReadThread/CPU.h>
+#include <common/logger_useful.h>
+
+#include <exception>
+#include <string>
+
+namespace DB::DM
+{
+// In Linux a numa node is represented by a device directory, such as '/sys/devices/system/node/node0', '/sys/devices/system/node/node01'.
+static inline bool isNodeDir(const std::string & name)
+{
+    return name.size() > 4 && name.substr(0, 4) == "node" && std::all_of(name.begin() + 4, name.end(), [](unsigned char c) { return std::isdigit(c); });
+}
+
+// Under a numa node directory is CPU cores and memory, such as  '/sys/devices/system/node/node0/cpu0' and '/sys/devices/system/node/node0/memory0'.
+static inline bool isCPU(const std::string & name)
+{
+    return name.size() > 3 && name.substr(0, 3) == "cpu" && std::all_of(name.begin() + 3, name.end(), [](unsigned char c) { return std::isdigit(c); });
+}
+
+static inline int parseCPUNumber(const std::string & name)
+{
+    return std::stoi(name.substr(3));
+}
+
+// Scan the numa node directory and parse the CPU numbers.
+static inline std::vector<int> getCPUs(const std::string & dir_name)
+{
+    std::vector<int> cpus;
+    Poco::File dir(dir_name);
+    Poco::DirectoryIterator end;
+    for (auto iter = Poco::DirectoryIterator(dir); iter != end; ++iter)
+    {
+        if (isCPU(iter.name()))
+        {
+            cpus.push_back(parseCPUNumber(iter.name()));
+        }
+    }
+    return cpus;
+}
+
+// TODO: What if the process running in the container and the CPU is limited.
+
+// Scan the device directory and parse the CPU information.
+std::vector<std::vector<int>> getLinuxNumaNodes()
+{
+    static const std::string nodes_dir_name{"/sys/devices/system/node"};
+    static const std::string cpus_dir_name{"/sys/devices/system/cpu"};
+
+    std::vector<std::vector<int>> numa_nodes;
+    Poco::File nodes(nodes_dir_name);
+    if (!nodes.exists() || !nodes.isDirectory())
+    {
+        auto cpus = getCPUs(cpus_dir_name);
+        if (cpus.empty())
+        {
+            throw Exception("Not recognize CPU: " + cpus_dir_name);
+        }
+        numa_nodes.push_back(std::move(cpus));
+    }
+    else
+    {
+        Poco::DirectoryIterator end;
+        for (Poco::DirectoryIterator iter(nodes); iter != end; ++iter)
+        {
+            if (!isNodeDir(iter.name()))
+            {
+                continue;
+            }
+            auto dir_name = nodes_dir_name + "/" + iter.name();
+            auto cpus = getCPUs(dir_name);
+            if (cpus.empty())
+            {
+                throw Exception("Not recognize CPU: " + nodes_dir_name);
+            }
+            numa_nodes.push_back(std::move(cpus));
+        }
+    }
+    if (numa_nodes.empty())
+    {
+        throw Exception("Not recognize CPU");
+    }
+    return numa_nodes;
+}
+
+std::vector<std::vector<int>> getNumaNodes(Poco::Logger * log)
+{
+    try
+    {
+        return getLinuxNumaNodes();
+    }
+    catch (Exception & e)
+    {
+        LOG_FMT_WARNING(log, "{}", e.message());
+    }
+    catch (std::exception & e)
+    {
+        LOG_FMT_WARNING(log, "{}", e.what());
+    }
+    catch (...)
+    {
+        LOG_FMT_WARNING(log, "Unknow Error");
+    }
+    LOG_FMT_WARNING(log, "Cannot recognize the CPU NUMA infomation, use the CPU as 'one numa node'");
+    std::vector<std::vector<int>> numa_nodes(1); // "One numa node"
+    return numa_nodes;
+}
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/CPU.h b/dbms/src/Storages/DeltaMerge/ReadThread/CPU.h
new file mode 100644
index 00000000000..2d1f564a43a
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/CPU.h
@@ -0,0 +1,24 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <Poco/Logger.h>
+
+#include <vector>
+
+namespace DB::DM
+{
+// `getNumaNodes` returns cpus of each Numa node.
+std::vector<std::vector<int>> getNumaNodes(Poco::Logger * log);
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/CircularScanList.h b/dbms/src/Storages/DeltaMerge/ReadThread/CircularScanList.h
new file mode 100644
index 00000000000..a9a976f2f30
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/CircularScanList.h
@@ -0,0 +1,104 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <list>
+#include <memory>
+
+namespace DB::DM
+{
+// CircularScanList is a special circular list.
+// It remembers the location of the last iteration and will check whether the object is expired.
+template <typename T>
+class CircularScanList
+{
+public:
+    using Value = std::shared_ptr<T>;
+
+    CircularScanList()
+        : last_itr(l.end())
+    {}
+
+    void add(const Value & ptr)
+    {
+        l.push_back(ptr);
+    }
+
+    Value next()
+    {
+        last_itr = nextItr(last_itr);
+        while (!l.empty())
+        {
+            auto ptr = *last_itr;
+            if (ptr->valid())
+            {
+                return ptr;
+            }
+            else
+            {
+                last_itr = l.erase(last_itr);
+                if (last_itr == l.end())
+                {
+                    last_itr = l.begin();
+                }
+            }
+        }
+        return nullptr;
+    }
+
+    std::pair<int64_t, int64_t> count(int64_t table_id) const
+    {
+        int64_t valid = 0;
+        int64_t invalid = 0;
+        for (const auto & p : l)
+        {
+            if (table_id == 0 || p->tableId() == table_id)
+            {
+                p->valid() ? valid++ : invalid++;
+            }
+        }
+        return {valid, invalid};
+    }
+
+    Value get(uint64_t pool_id) const
+    {
+        for (const auto & p : l)
+        {
+            if (p->poolId() == pool_id)
+            {
+                return p;
+            }
+        }
+        return nullptr;
+    }
+
+private:
+    using Iter = typename std::list<Value>::iterator;
+    Iter nextItr(Iter itr)
+    {
+        if (itr == l.end() || std::next(itr) == l.end())
+        {
+            return l.begin();
+        }
+        else
+        {
+            return std::next(itr);
+        }
+    }
+
+    std::list<Value> l;
+    Iter last_itr;
+};
+
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/ColumnSharingCache.cpp b/dbms/src/Storages/DeltaMerge/ReadThread/ColumnSharingCache.cpp
new file mode 100644
index 00000000000..13b187167b6
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/ColumnSharingCache.cpp
@@ -0,0 +1,63 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <Storages/DeltaMerge/File/DMFileReader.h>
+#include <Storages/DeltaMerge/ReadThread/ColumnSharingCache.h>
+
+namespace DB::DM
+{
+DMFileReaderPool & DMFileReaderPool::instance()
+{
+    static DMFileReaderPool reader_pool;
+    return reader_pool;
+}
+
+void DMFileReaderPool::add(DMFileReader & reader)
+{
+    std::lock_guard lock(mtx);
+    readers[reader.path()].insert(&reader);
+}
+
+void DMFileReaderPool::del(DMFileReader & reader)
+{
+    std::lock_guard lock(mtx);
+    auto itr = readers.find(reader.path());
+    if (itr == readers.end())
+    {
+        return;
+    }
+    itr->second.erase(&reader);
+    if (itr->second.empty())
+    {
+        readers.erase(itr);
+    }
+}
+
+void DMFileReaderPool::set(DMFileReader & from_reader, int64_t col_id, size_t start, size_t count, ColumnPtr & col)
+{
+    std::lock_guard lock(mtx);
+    auto itr = readers.find(from_reader.path());
+    if (itr == readers.end())
+    {
+        return;
+    }
+    for (auto * r : itr->second)
+    {
+        if (&from_reader == r)
+        {
+            continue;
+        }
+        r->addCachedPacks(col_id, start, count, col);
+    }
+}
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/ColumnSharingCache.h b/dbms/src/Storages/DeltaMerge/ReadThread/ColumnSharingCache.h
new file mode 100644
index 00000000000..446e6777338
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/ColumnSharingCache.h
@@ -0,0 +1,234 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <Common/Logger.h>
+#include <Common/TiFlashMetrics.h>
+#include <DataTypes/IDataType.h>
+#include <Storages/DeltaMerge/File/DMFile.h>
+
+#include <memory>
+
+namespace DB::DM
+{
+using PackId = size_t;
+
+enum class ColumnCacheStatus
+{
+    ADD_COUNT = 0,
+    ADD_STALE,
+    GET_MISS,
+    GET_PART,
+    GET_HIT,
+    GET_COPY,
+
+    _TOTAL_COUNT,
+};
+
+class ColumnSharingCache
+{
+public:
+    struct ColumnData
+    {
+        size_t pack_count{0};
+        ColumnPtr col_data;
+    };
+
+    void add(size_t start_pack_id, size_t pack_count, ColumnPtr & col_data)
+    {
+        GET_METRIC(tiflash_storage_read_thread_counter, type_add_cache_succ).Increment();
+        std::lock_guard lock(mtx);
+        auto & value = packs[start_pack_id];
+        if (value.pack_count < pack_count)
+        {
+            value.pack_count = pack_count;
+            value.col_data = col_data;
+        }
+    }
+
+    ColumnCacheStatus get(size_t start_pack_id, size_t pack_count, size_t read_rows, ColumnPtr & col_data, DataTypePtr data_type)
+    {
+        ColumnCacheStatus status;
+        std::lock_guard lock(mtx);
+        auto target = packs.find(start_pack_id);
+        if (target == packs.end())
+        {
+            GET_METRIC(tiflash_storage_read_thread_counter, type_get_cache_miss).Increment();
+            status = ColumnCacheStatus::GET_MISS;
+        }
+        else if (target->second.pack_count < pack_count)
+        {
+            GET_METRIC(tiflash_storage_read_thread_counter, type_get_cache_part).Increment();
+            status = ColumnCacheStatus::GET_PART;
+        }
+        else if (target->second.pack_count == pack_count)
+        {
+            GET_METRIC(tiflash_storage_read_thread_counter, type_get_cache_hit).Increment();
+            status = ColumnCacheStatus::GET_HIT;
+            col_data = target->second.col_data;
+        }
+        else
+        {
+            GET_METRIC(tiflash_storage_read_thread_counter, type_get_cache_copy).Increment();
+            status = ColumnCacheStatus::GET_COPY;
+            auto column = data_type->createColumn();
+            column->insertRangeFrom(*(target->second.col_data), 0, read_rows);
+            col_data = std::move(column);
+        }
+        return status;
+    }
+
+    void del(size_t upper_start_pack_id)
+    {
+        std::lock_guard lock(mtx);
+        for (auto itr = packs.begin(); itr != packs.end();)
+        {
+            if (itr->first + itr->second.pack_count <= upper_start_pack_id)
+            {
+                itr = packs.erase(itr);
+            }
+            else
+            {
+                break;
+            }
+        }
+    }
+
+private:
+    std::mutex mtx;
+    // start_pack_id -> <pack_count, col_data>
+    std::map<PackId, ColumnData> packs;
+};
+
+// `ColumnSharingCacheMap` is a per DMFileReader cache.
+// It store a ColumnSharingCache(std::map) for each column.
+// When read threads are enable, each DMFileReader will be add to DMFileReaderPool,
+// so we can find DMFileReaders of the same DMFile easily.
+// Each DMFileReader will add the block that they read to other DMFileReaders' cache if the start_pack_id of the block
+// is greater or equal than the next_pack_id of the DMFileReader —— This means that the DMFileReader maybe read the block later.
+class ColumnSharingCacheMap
+{
+public:
+    ColumnSharingCacheMap(const std::string & dmfile_name_, const ColumnDefines & cds, LoggerPtr & log_)
+        : dmfile_name(dmfile_name_)
+        , stats(static_cast<int>(ColumnCacheStatus::_TOTAL_COUNT))
+        , log(log_)
+    {
+        for (const auto & cd : cds)
+        {
+            addColumn(cd.id);
+        }
+    }
+
+    ~ColumnSharingCacheMap()
+    {
+        LOG_FMT_DEBUG(log, "dmfile {} stat {}", dmfile_name, statString());
+    }
+
+    // `addStale` just do some statistics.
+    void addStale()
+    {
+        GET_METRIC(tiflash_storage_read_thread_counter, type_add_cache_stale).Increment();
+        stats[static_cast<int>(ColumnCacheStatus::ADD_STALE)].fetch_add(1, std::memory_order_relaxed);
+    }
+
+    void add(int64_t col_id, size_t start_pack_id, size_t pack_count, ColumnPtr & col_data)
+    {
+        stats[static_cast<int>(ColumnCacheStatus::ADD_COUNT)].fetch_add(1, std::memory_order_relaxed);
+        auto itr = cols.find(col_id);
+        if (itr == cols.end())
+        {
+            return;
+        }
+        itr->second.add(start_pack_id, pack_count, col_data);
+    }
+
+    bool get(int64_t col_id, size_t start_pack_id, size_t pack_count, size_t read_rows, ColumnPtr & col_data, DataTypePtr data_type)
+    {
+        auto status = ColumnCacheStatus::GET_MISS;
+        auto itr = cols.find(col_id);
+        if (itr != cols.end())
+        {
+            status = itr->second.get(start_pack_id, pack_count, read_rows, col_data, data_type);
+        }
+        stats[static_cast<int>(status)].fetch_add(1, std::memory_order_relaxed);
+        return status == ColumnCacheStatus::GET_HIT || status == ColumnCacheStatus::GET_COPY;
+    }
+
+    // Each read operator of DMFileReader will advance the next_pack_id.
+    // So we can delete packs if their pack_id is less than next_pack_id to save memory.
+    void del(int64_t col_id, size_t upper_start_pack_id)
+    {
+        auto itr = cols.find(col_id);
+        if (itr != cols.end())
+        {
+            itr->second.del(upper_start_pack_id);
+        }
+    }
+
+private:
+    void addColumn(int64_t col_id)
+    {
+        cols[col_id];
+    }
+    std::string statString() const
+    {
+        auto add_count = stats[static_cast<int>(ColumnCacheStatus::ADD_COUNT)].load(std::memory_order_relaxed);
+        auto add_stale = stats[static_cast<int>(ColumnCacheStatus::ADD_STALE)].load(std::memory_order_relaxed);
+        auto get_miss = stats[static_cast<int>(ColumnCacheStatus::GET_MISS)].load(std::memory_order_relaxed);
+        auto get_part = stats[static_cast<int>(ColumnCacheStatus::GET_PART)].load(std::memory_order_relaxed);
+        auto get_hit = stats[static_cast<int>(ColumnCacheStatus::GET_HIT)].load(std::memory_order_relaxed);
+        auto get_copy = stats[static_cast<int>(ColumnCacheStatus::GET_COPY)].load(std::memory_order_relaxed);
+        auto add_total = add_count + add_stale;
+        auto get_cached = get_hit + get_copy;
+        auto get_total = get_miss + get_part + get_hit + get_copy;
+        return fmt::format("add_count {} add_stale {} add_ratio {} get_miss {} get_part {} get_hit {} get_copy {} cached_ratio {}",
+                           add_count,
+                           add_stale,
+                           add_total > 0 ? add_count * 1.0 / add_total : 0,
+                           get_miss,
+                           get_part,
+                           get_hit,
+                           get_copy,
+                           get_total > 0 ? get_cached * 1.0 / get_total : 0);
+    }
+    std::string dmfile_name;
+    std::unordered_map<int64_t, ColumnSharingCache> cols;
+    std::vector<std::atomic<int64_t>> stats;
+    LoggerPtr log;
+};
+
+class DMFileReader;
+
+// DMFileReaderPool holds all the DMFileReader objects, so we can easily find DMFileReader objects with the same DMFile ID.
+// When a DMFileReader object successfully reads a column's packs, it will try to put these packs into other DMFileReader objects' cache.
+class DMFileReaderPool
+{
+public:
+    static DMFileReaderPool & instance();
+    DMFileReaderPool() = default;
+    ~DMFileReaderPool() = default;
+    DISALLOW_COPY_AND_MOVE(DMFileReaderPool);
+
+    void add(DMFileReader & reader);
+    void del(DMFileReader & reader);
+    void set(DMFileReader & from_reader, int64_t col_id, size_t start, size_t count, ColumnPtr & col);
+
+private:
+    std::mutex mtx;
+    std::unordered_map<std::string, std::unordered_set<DMFileReader *>> readers;
+};
+
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.cpp b/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.cpp
new file mode 100644
index 00000000000..c81fcfe8ccd
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.cpp
@@ -0,0 +1,123 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <Storages/DeltaMerge/ReadThread/MergedTask.h>
+
+namespace DB::DM
+{
+int MergedTask::readBlock()
+{
+    initOnce();
+    return readOneBlock();
+}
+
+void MergedTask::initOnce()
+{
+    if (inited)
+    {
+        return;
+    }
+
+    for (cur_idx = 0; cur_idx < static_cast<int>(units.size()); cur_idx++)
+    {
+        auto & [pool, task, stream] = units[cur_idx];
+        if (!pool->valid())
+        {
+            setStreamFinished(cur_idx);
+            continue;
+        }
+        stream = pool->buildInputStream(task);
+    }
+
+    inited = true;
+}
+
+int MergedTask::readOneBlock()
+{
+    int read_block_count = 0;
+    for (cur_idx = 0; cur_idx < static_cast<int>(units.size()); cur_idx++)
+    {
+        if (isStreamFinished(cur_idx))
+        {
+            continue;
+        }
+
+        auto & [pool, task, stream] = units[cur_idx];
+
+        if (!pool->valid())
+        {
+            setStreamFinished(cur_idx);
+            continue;
+        }
+
+        if (pool->getFreeBlockSlots() <= 0)
+        {
+            continue;
+        }
+
+        if (pool->readOneBlock(stream, task->segment))
+        {
+            read_block_count++;
+        }
+        else
+        {
+            setStreamFinished(cur_idx);
+        }
+    }
+    return read_block_count;
+}
+
+void MergedTask::setException(const DB::Exception & e)
+{
+    if (cur_idx >= 0 && cur_idx < static_cast<int>(units.size()))
+    {
+        auto & pool = units[cur_idx].pool;
+        if (pool != nullptr)
+        {
+            pool->setException(e);
+        }
+    }
+    else
+    {
+        for (auto & unit : units)
+        {
+            if (unit.pool != nullptr)
+            {
+                unit.pool->setException(e);
+            }
+        }
+    }
+}
+
+MergedTaskPtr MergedTaskPool::pop(uint64_t pool_id)
+{
+    std::lock_guard lock(mtx);
+    MergedTaskPtr target;
+    for (auto itr = merged_task_pool.begin(); itr != merged_task_pool.end(); ++itr)
+    {
+        if ((*itr)->containPool(pool_id))
+        {
+            target = *itr;
+            merged_task_pool.erase(itr);
+            break;
+        }
+    }
+    return target;
+}
+
+void MergedTaskPool::push(const MergedTaskPtr & t)
+{
+    std::lock_guard lock(mtx);
+    merged_task_pool.push_back(t);
+}
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.h b/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.h
new file mode 100644
index 00000000000..8233cd2e431
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.h
@@ -0,0 +1,156 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <Common/Stopwatch.h>
+#include <Common/TiFlashMetrics.h>
+#include <Storages/DeltaMerge/SegmentReadTaskPool.h>
+
+namespace DB::DM
+{
+struct MergedUnit
+{
+    MergedUnit(const SegmentReadTaskPoolPtr & pool_, const SegmentReadTaskPtr & task_)
+        : pool(pool_)
+        , task(task_)
+    {}
+
+    SegmentReadTaskPoolPtr pool; // The information of a read request.
+    SegmentReadTaskPtr task; // The information of a segment that want to read.
+    BlockInputStreamPtr stream; // BlockInputStream of a segment, will be created by read threads.
+};
+
+// MergedTask merges the same segment of different SegmentReadTaskPools.
+// Read segment input streams of different SegmentReadTaskPools sequentially to improve cache sharing.
+// MergedTask is NOT thread-safe.
+class MergedTask
+{
+public:
+    static int64_t getPassiveMergedSegments()
+    {
+        return passive_merged_segments.load(std::memory_order_relaxed);
+    }
+
+    MergedTask(uint64_t seg_id_, std::vector<MergedUnit> && units_)
+        : seg_id(seg_id_)
+        , units(std::move(units_))
+        , inited(false)
+        , cur_idx(-1)
+        , finished_count(0)
+        , log(&Poco::Logger::get("MergedTask"))
+    {
+        passive_merged_segments.fetch_add(units.size() - 1, std::memory_order_relaxed);
+        GET_METRIC(tiflash_storage_read_thread_gauge, type_merged_task).Increment();
+    }
+    ~MergedTask()
+    {
+        passive_merged_segments.fetch_sub(units.size() - 1, std::memory_order_relaxed);
+        GET_METRIC(tiflash_storage_read_thread_gauge, type_merged_task).Decrement();
+        GET_METRIC(tiflash_storage_read_thread_seconds, type_merged_task).Observe(sw.elapsedSeconds());
+    }
+
+    int readBlock();
+
+    bool allStreamsFinished() const
+    {
+        return finished_count >= units.size();
+    }
+
+    uint64_t getSegmentId() const
+    {
+        return seg_id;
+    }
+
+    size_t getPoolCount() const
+    {
+        return units.size();
+    }
+
+    std::vector<uint64_t> getPoolIds() const
+    {
+        std::vector<uint64_t> ids;
+        ids.reserve(units.size());
+        for (const auto & unit : units)
+        {
+            if (unit.pool != nullptr)
+            {
+                ids.push_back(unit.pool->poolId());
+            }
+        }
+        return ids;
+    }
+
+    bool containPool(uint64_t pool_id) const
+    {
+        for (const auto & unit : units)
+        {
+            if (unit.pool != nullptr && unit.pool->poolId() == pool_id)
+            {
+                return true;
+            }
+        }
+        return false;
+    }
+    void setException(const DB::Exception & e);
+
+private:
+    void initOnce();
+    int readOneBlock();
+
+    bool isStreamFinished(size_t i) const
+    {
+        return units[i].pool == nullptr && units[i].task == nullptr && units[i].stream == nullptr;
+    }
+
+    void setStreamFinished(size_t i)
+    {
+        if (!isStreamFinished(i))
+        {
+            units[i].pool = nullptr;
+            units[i].task = nullptr;
+            units[i].stream = nullptr;
+            finished_count++;
+        }
+    }
+
+    uint64_t seg_id;
+    std::vector<MergedUnit> units;
+    bool inited;
+    int cur_idx;
+    size_t finished_count;
+    Poco::Logger * log;
+    Stopwatch sw;
+    inline static std::atomic<int64_t> passive_merged_segments{0};
+};
+
+using MergedTaskPtr = std::shared_ptr<MergedTask>;
+
+// MergedTaskPool is a MergedTask list.
+// When SegmentReadTaskPool's block queue reaching limit, read thread will push MergedTask into it.
+// The scheduler thread will try to pop a MergedTask of a related pool_id before build a new MergedTask object.
+class MergedTaskPool
+{
+public:
+    MergedTaskPool()
+        : log(&Poco::Logger::get("MergedTaskPool"))
+    {}
+
+    MergedTaskPtr pop(uint64_t pool_id);
+    void push(const MergedTaskPtr & t);
+
+private:
+    std::mutex mtx;
+    std::list<MergedTaskPtr> merged_task_pool;
+    Poco::Logger * log;
+};
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.cpp b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.cpp
new file mode 100644
index 00000000000..9bec9e18afa
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.cpp
@@ -0,0 +1,211 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReader.h>
+#include <Storages/DeltaMerge/Segment.h>
+
+namespace DB::DM
+{
+SegmentReadTaskScheduler::SegmentReadTaskScheduler()
+    : stop(false)
+    , log(&Poco::Logger::get("SegmentReadTaskScheduler"))
+{
+    sched_thread = std::thread(&SegmentReadTaskScheduler::schedThread, this);
+}
+
+SegmentReadTaskScheduler::~SegmentReadTaskScheduler()
+{
+    setStop();
+    sched_thread.join();
+}
+
+void SegmentReadTaskScheduler::add(const SegmentReadTaskPoolPtr & pool)
+{
+    std::lock_guard lock(mtx);
+
+    read_pools.add(pool);
+
+    std::unordered_set<uint64_t> seg_ids;
+    for (const auto & task : pool->getTasks())
+    {
+        auto seg_id = task->segment->segmentId();
+        merging_segments[pool->tableId()][seg_id].push_back(pool->poolId());
+        if (!seg_ids.insert(seg_id).second)
+        {
+            throw DB::Exception(fmt::format("Not support split segment task. seg_ids {} => seg_id {} already exist.", seg_ids, seg_id));
+        }
+    }
+    auto block_slots = pool->getFreeBlockSlots();
+    auto [unexpired, expired] = read_pools.count(pool->tableId());
+    LOG_FMT_DEBUG(log, "add pool {} table {} block_slots {} segment count {} segments {} unexpired pool {} expired pool {}", //
+                  pool->poolId(),
+                  pool->tableId(),
+                  block_slots,
+                  seg_ids.size(),
+                  seg_ids,
+                  unexpired,
+                  expired);
+}
+
+std::pair<MergedTaskPtr, bool> SegmentReadTaskScheduler::scheduleMergedTask()
+{
+    std::lock_guard lock(mtx);
+    auto pool = scheduleSegmentReadTaskPoolUnlock();
+    if (pool == nullptr)
+    {
+        // No SegmentReadTaskPool to schedule. Maybe no read request or
+        // block queue of each SegmentReadTaskPool reaching the limit.
+        return {nullptr, false};
+    }
+
+    auto merged_task = merged_task_pool.pop(pool->poolId());
+    if (merged_task != nullptr)
+    {
+        GET_METRIC(tiflash_storage_read_thread_counter, type_sche_from_cache).Increment();
+        return {merged_task, true};
+    }
+
+    auto segment = scheduleSegmentUnlock(pool);
+    if (!segment)
+    {
+        // The number of active segments reaches the limit.
+        GET_METRIC(tiflash_storage_read_thread_counter, type_sche_no_segment).Increment();
+        return {nullptr, true};
+    }
+    auto pools = getPoolsUnlock(segment->second);
+    if (pools.empty())
+    {
+        // Maybe SegmentReadTaskPools are expired because of upper threads finish the request.
+        return {nullptr, true};
+    }
+
+    std::vector<MergedUnit> units;
+    units.reserve(pools.size());
+    for (auto & pool : pools)
+    {
+        units.emplace_back(pool, pool->getTask(segment->first));
+    }
+    GET_METRIC(tiflash_storage_read_thread_counter, type_sche_new_task).Increment();
+
+    return {std::make_shared<MergedTask>(segment->first, std::move(units)), true};
+}
+
+SegmentReadTaskPools SegmentReadTaskScheduler::getPoolsUnlock(const std::vector<uint64_t> & pool_ids)
+{
+    SegmentReadTaskPools pools;
+    pools.reserve(pool_ids.size());
+    for (uint64_t id : pool_ids)
+    {
+        auto p = read_pools.get(id);
+        if (p != nullptr)
+        {
+            pools.push_back(p);
+        }
+    }
+    return pools;
+}
+
+SegmentReadTaskPoolPtr SegmentReadTaskScheduler::scheduleSegmentReadTaskPoolUnlock()
+{
+    auto [unexpired, expired] = read_pools.count(0);
+    for (int64_t i = 0; i < unexpired; i++)
+    {
+        auto pool = read_pools.next();
+        if (pool != nullptr && pool->getFreeBlockSlots() > 0)
+        {
+            return pool;
+        }
+    }
+    if (unexpired == 0)
+    {
+        GET_METRIC(tiflash_storage_read_thread_counter, type_sche_no_pool).Increment();
+    }
+    else
+    {
+        GET_METRIC(tiflash_storage_read_thread_counter, type_sche_no_slot).Increment();
+    }
+    return nullptr;
+}
+
+std::optional<std::pair<uint64_t, std::vector<uint64_t>>> SegmentReadTaskScheduler::scheduleSegmentUnlock(const SegmentReadTaskPoolPtr & pool)
+{
+    auto [unexpired, expired] = read_pools.count(pool->tableId());
+    auto expected_merge_seg_count = std::min(unexpired, 2);
+    auto itr = merging_segments.find(pool->tableId());
+    if (itr == merging_segments.end())
+    {
+        // No segment of tableId left.
+        return std::nullopt;
+    }
+    std::optional<std::pair<uint64_t, std::vector<uint64_t>>> result;
+    auto & segments = itr->second;
+    auto target = pool->scheduleSegment(segments, expected_merge_seg_count);
+    if (target != segments.end())
+    {
+        if (MergedTask::getPassiveMergedSegments() < 100 || target->second.size() == 1)
+        {
+            result = *target;
+            segments.erase(target);
+            if (segments.empty())
+            {
+                merging_segments.erase(itr);
+            }
+        }
+        else
+        {
+            result = std::pair{target->first, std::vector<uint64_t>(1, pool->poolId())};
+            auto mutable_target = segments.find(target->first);
+            auto itr = std::find(mutable_target->second.begin(), mutable_target->second.end(), pool->poolId());
+            *itr = mutable_target->second.back(); // SegmentReadTaskPool::scheduleSegment ensures `pool->poolId` must exists in `target`.
+            mutable_target->second.resize(mutable_target->second.size() - 1);
+        }
+    }
+    return result;
+}
+
+void SegmentReadTaskScheduler::setStop()
+{
+    stop.store(true, std::memory_order_relaxed);
+}
+
+bool SegmentReadTaskScheduler::isStop() const
+{
+    return stop.load(std::memory_order_relaxed);
+}
+
+bool SegmentReadTaskScheduler::schedule()
+{
+    Stopwatch sw;
+    auto [merged_task, run_sche] = scheduleMergedTask();
+    if (merged_task != nullptr)
+    {
+        LOG_FMT_DEBUG(log, "scheduleMergedTask seg_id {} pools {} => {} ms", merged_task->getSegmentId(), merged_task->getPoolIds(), sw.elapsedMilliseconds());
+        SegmentReaderPoolManager::instance().addTask(std::move(merged_task));
+    }
+    return run_sche;
+}
+
+void SegmentReadTaskScheduler::schedThread()
+{
+    while (!isStop())
+    {
+        if (!schedule())
+        {
+            using namespace std::chrono_literals;
+            std::this_thread::sleep_for(2ms);
+        }
+    }
+}
+
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.h b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.h
new file mode 100644
index 00000000000..51aef4d176c
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.h
@@ -0,0 +1,83 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <Storages/DeltaMerge/ReadThread/CircularScanList.h>
+#include <Storages/DeltaMerge/ReadThread/MergedTask.h>
+#include <Storages/DeltaMerge/SegmentReadTaskPool.h>
+
+#include <memory>
+namespace DB::DM
+{
+using SegmentReadTaskPoolList = CircularScanList<SegmentReadTaskPool>;
+
+// SegmentReadTaskScheduler is a global singleton.
+// All SegmentReadTaskPool will be added to it and be scheduled by it.
+
+// 1. DeltaMergeStore::read/readRaw will call SegmentReadTaskScheduler::add to add a SegmentReadTaskPool object to the `read_pools` list and
+// index segments information into `merging_segments`.
+// 2. A schedule-thread will scheduling read tasks:
+//   a. It scans the read_pools list and choosing a SegmentReadTaskPool.
+//   b. Chooses a segment of the SegmentReadTaskPool and build a MergedTask.
+//   c. Sends the MergedTask to read threads(SegmentReader).
+class SegmentReadTaskScheduler
+{
+public:
+    static SegmentReadTaskScheduler & instance()
+    {
+        static SegmentReadTaskScheduler scheduler;
+        return scheduler;
+    }
+
+    ~SegmentReadTaskScheduler();
+    DISALLOW_COPY_AND_MOVE(SegmentReadTaskScheduler);
+
+    // Add SegmentReadTaskPool to `read_pools` and index segments into merging_segments.
+    void add(const SegmentReadTaskPoolPtr & pool);
+
+    void pushMergedTask(const MergedTaskPtr & p)
+    {
+        merged_task_pool.push(p);
+    }
+
+private:
+    SegmentReadTaskScheduler();
+
+    // Choose segment to read.
+    // Returns <MergedTaskPtr, run_next_schedule_immediatly>
+    std::pair<MergedTaskPtr, bool> scheduleMergedTask();
+
+    void setStop();
+    bool isStop() const;
+    bool schedule();
+    void schedThread();
+
+    SegmentReadTaskPools getPoolsUnlock(const std::vector<uint64_t> & pool_ids);
+    // <seg_id, pool_ids>
+    std::optional<std::pair<uint64_t, std::vector<uint64_t>>> scheduleSegmentUnlock(const SegmentReadTaskPoolPtr & pool);
+    SegmentReadTaskPoolPtr scheduleSegmentReadTaskPoolUnlock();
+
+    std::mutex mtx;
+    SegmentReadTaskPoolList read_pools;
+    // table_id -> {seg_id -> pool_ids, seg_id -> pool_ids, ...}
+    std::unordered_map<int64_t, std::unordered_map<uint64_t, std::vector<uint64_t>>> merging_segments;
+
+    MergedTaskPool merged_task_pool;
+
+    std::atomic<bool> stop;
+    std::thread sched_thread;
+
+    Poco::Logger * log;
+};
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.cpp b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.cpp
new file mode 100644
index 00000000000..77806fdef72
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.cpp
@@ -0,0 +1,233 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <Common/setThreadName.h>
+#include <Storages/DeltaMerge/ReadThread/CPU.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReader.h>
+#include <Storages/DeltaMerge/SegmentReadTaskPool.h>
+
+#include <ext/scope_guard.h>
+
+namespace DB::DM
+{
+class SegmentReader
+{
+    inline static const std::string name{"SegmentReader"};
+
+public:
+    SegmentReader(WorkQueue<MergedTaskPtr> & task_queue_, const std::vector<int> & cpus_)
+        : task_queue(task_queue_)
+        , stop(false)
+        , log(&Poco::Logger::get(name))
+        , cpus(cpus_)
+    {
+        t = std::thread(&SegmentReader::run, this);
+    }
+
+    void setStop()
+    {
+        stop.store(true, std::memory_order_relaxed);
+    }
+
+    ~SegmentReader()
+    {
+        LOG_FMT_DEBUG(log, "SegmentReader stop begin");
+        t.join();
+        LOG_FMT_DEBUG(log, "SegmentReader stop end");
+    }
+
+    std::thread::id getId() const
+    {
+        return t.get_id();
+    }
+
+private:
+    void setCPUAffinity()
+    {
+        if (cpus.empty())
+        {
+            return;
+        }
+#ifdef __linux__
+        cpu_set_t cpu_set;
+        CPU_ZERO(&cpu_set);
+        for (int i : cpus)
+        {
+            CPU_SET(i, &cpu_set);
+        }
+        int ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+        if (ret != 0)
+        {
+            LOG_FMT_ERROR(log, "sched_setaffinity fail: {}", std::strerror(errno));
+            throw Exception(fmt::format("sched_setaffinity fail: {}", std::strerror(errno)));
+        }
+        LOG_FMT_DEBUG(log, "sched_setaffinity cpus {} succ", cpus);
+#endif
+    }
+
+    bool isStop()
+    {
+        return stop.load(std::memory_order_relaxed);
+    }
+
+    void readSegments()
+    {
+        MergedTaskPtr merged_task;
+        try
+        {
+            if (!task_queue.pop(merged_task))
+            {
+                LOG_FMT_INFO(log, "pop fail, stop {}", isStop());
+                return;
+            }
+
+            SCOPE_EXIT({
+                if (!merged_task->allStreamsFinished())
+                {
+                    SegmentReadTaskScheduler::instance().pushMergedTask(merged_task);
+                }
+            });
+
+            int read_count = 0;
+            while (!merged_task->allStreamsFinished() && !isStop())
+            {
+                auto c = merged_task->readBlock();
+                read_count += c;
+                if (c <= 0)
+                {
+                    break;
+                }
+            }
+            if (read_count <= 0)
+            {
+                LOG_FMT_DEBUG(log, "pool {} seg_id {} read_count {}", merged_task->getPoolIds(), merged_task->getSegmentId(), read_count);
+            }
+        }
+        catch (DB::Exception & e)
+        {
+            LOG_FMT_ERROR(log, "ErrMsg: {} StackTrace {}", e.message(), e.getStackTrace().toString());
+            if (merged_task != nullptr)
+            {
+                merged_task->setException(e);
+            }
+        }
+        catch (std::exception & e)
+        {
+            LOG_FMT_ERROR(log, "ErrMsg: {}", e.what());
+            if (merged_task != nullptr)
+            {
+                merged_task->setException(DB::Exception(e.what()));
+            }
+        }
+        catch (...)
+        {
+            tryLogCurrentException("exception thrown in SegmentReader");
+            if (merged_task != nullptr)
+            {
+                merged_task->setException(DB::Exception("unknown exception thrown in SegmentReader"));
+            }
+        }
+    }
+
+    void run()
+    {
+        setCPUAffinity();
+        setThreadName(name.c_str());
+        while (!isStop())
+        {
+            readSegments();
+        }
+    }
+
+    WorkQueue<MergedTaskPtr> & task_queue;
+    std::atomic<bool> stop;
+    Poco::Logger * log;
+    std::thread t;
+    std::vector<int> cpus;
+};
+
+void SegmentReaderPool::addTask(MergedTaskPtr && task)
+{
+    if (!task_queue.push(std::forward<MergedTaskPtr>(task), nullptr))
+    {
+        throw Exception("addTask fail");
+    }
+}
+
+SegmentReaderPool::SegmentReaderPool(int thread_count, const std::vector<int> & cpus)
+    : log(&Poco::Logger::get("SegmentReaderPool"))
+{
+    LOG_FMT_INFO(log, "Create SegmentReaderPool thread_count {} cpus {} start", thread_count, cpus);
+    for (int i = 0; i < thread_count; i++)
+    {
+        readers.push_back(std::make_unique<SegmentReader>(task_queue, cpus));
+    }
+    LOG_FMT_INFO(log, "Create SegmentReaderPool thread_count {} cpus {} end", thread_count, cpus);
+}
+
+SegmentReaderPool::~SegmentReaderPool()
+{
+    for (auto & reader : readers)
+    {
+        reader->setStop();
+    }
+    task_queue.finish();
+}
+
+std::vector<std::thread::id> SegmentReaderPool::getReaderIds() const
+{
+    std::vector<std::thread::id> ids;
+    for (const auto & r : readers)
+    {
+        ids.push_back(r->getId());
+    }
+    return ids;
+}
+
+SegmentReaderPoolManager::SegmentReaderPoolManager()
+    : log(&Poco::Logger::get("SegmentReaderPoolManager"))
+{}
+
+SegmentReaderPoolManager::~SegmentReaderPoolManager() = default;
+
+void SegmentReaderPoolManager::init(const ServerInfo & server_info)
+{
+    auto numa_nodes = getNumaNodes(log);
+    LOG_FMT_INFO(log, "numa_nodes {} => {}", numa_nodes.size(), numa_nodes);
+    for (const auto & node : numa_nodes)
+    {
+        int thread_count = node.empty() ? server_info.cpu_info.logical_cores : node.size();
+        reader_pools.push_back(std::make_unique<SegmentReaderPool>(thread_count, node));
+        auto ids = reader_pools.back()->getReaderIds();
+        reader_ids.insert(ids.begin(), ids.end());
+    }
+    LOG_FMT_INFO(log, "readers count {}", reader_ids.size());
+}
+
+void SegmentReaderPoolManager::addTask(MergedTaskPtr && task)
+{
+    static std::hash<uint64_t> hash_func;
+    auto idx = hash_func(task->getSegmentId()) % reader_pools.size();
+    reader_pools[idx]->addTask(std::move(task));
+}
+
+// `isSegmentReader` checks whether this thread is a `SegmentReader`.
+// Use this function in DMFileBlockInputSteam to check whether enable read thread of this read request,
+// Maybe we can pass the argument from DeltaMerge -> SegmentReadTaskPool -> ... -> DMFileBlockInputSteam.
+// But this is a long code path and can affect a lot a code.
+bool SegmentReaderPoolManager::isSegmentReader() const
+{
+    return reader_ids.find(std::this_thread::get_id()) != reader_ids.end();
+}
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.h b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.h
new file mode 100644
index 00000000000..5408dcb4fd6
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.h
@@ -0,0 +1,77 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <Server/ServerInfo.h>
+#include <Storages/DeltaMerge/ReadThread/WorkQueue.h>
+#include <Storages/DeltaMerge/SegmentReadTaskPool.h>
+#include <common/logger_useful.h>
+
+namespace DB::DM
+{
+class MergedTask;
+using MergedTaskPtr = std::shared_ptr<MergedTask>;
+
+class SegmentReader;
+using SegmentReaderUPtr = std::unique_ptr<SegmentReader>;
+
+class SegmentReaderPool
+{
+public:
+    SegmentReaderPool(int thread_count, const std::vector<int> & cpus);
+    ~SegmentReaderPool();
+    SegmentReaderPool(const SegmentReaderPool &) = delete;
+    SegmentReaderPool & operator=(const SegmentReaderPool &) = delete;
+    SegmentReaderPool(SegmentReaderPool &&) = delete;
+    SegmentReaderPool & operator=(SegmentReaderPool &&) = delete;
+
+    void addTask(MergedTaskPtr && task);
+    std::vector<std::thread::id> getReaderIds() const;
+
+private:
+    void init(int thread_count, const std::vector<int> & cpus);
+
+    WorkQueue<MergedTaskPtr> task_queue;
+    std::vector<SegmentReaderUPtr> readers;
+    Poco::Logger * log;
+};
+
+// SegmentReaderPoolManager is a NUMA-aware singleton that manages several SegmentReaderPool objects.
+// The number of SegmentReadPool object is the same as the number of CPU NUMA node.
+// Thread number of a SegmentReadPool object is the same as the number of CPU logical core of a CPU NUMA node.
+// Function `addTask` dispatches MergedTask to SegmentReadPool by their segment id, so a segment read task
+// wouldn't be processed across NUMA nodes.
+class SegmentReaderPoolManager
+{
+public:
+    static SegmentReaderPoolManager & instance()
+    {
+        static SegmentReaderPoolManager pool_manager;
+        return pool_manager;
+    }
+    void init(const ServerInfo & server_info);
+    ~SegmentReaderPoolManager();
+    DISALLOW_COPY_AND_MOVE(SegmentReaderPoolManager);
+
+    void addTask(MergedTaskPtr && task);
+    bool isSegmentReader() const;
+
+private:
+    SegmentReaderPoolManager();
+    std::vector<std::unique_ptr<SegmentReaderPool>> reader_pools;
+    std::unordered_set<std::thread::id> reader_ids;
+    Poco::Logger * log;
+};
+
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/UnorderedInputStream.h b/dbms/src/Storages/DeltaMerge/ReadThread/UnorderedInputStream.h
new file mode 100644
index 00000000000..5bdbc7ed59d
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/UnorderedInputStream.h
@@ -0,0 +1,131 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <Common/FailPoint.h>
+#include <DataStreams/IProfilingBlockInputStream.h>
+#include <Storages/DeltaMerge/SegmentReadTaskPool.h>
+
+namespace DB::FailPoints
+{
+extern const char pause_when_reading_from_dt_stream[];
+}
+
+namespace DB::DM
+{
+class UnorderedInputStream : public IProfilingBlockInputStream
+{
+    static constexpr auto NAME = "UnorderedInputStream";
+
+public:
+    UnorderedInputStream(
+        const SegmentReadTaskPoolPtr & task_pool_,
+        const ColumnDefines & columns_to_read_,
+        const int extra_table_id_index,
+        const TableID physical_table_id,
+        const String & req_id)
+        : task_pool(task_pool_)
+        , header(toEmptyBlock(columns_to_read_))
+        , extra_table_id_index(extra_table_id_index)
+        , physical_table_id(physical_table_id)
+        , log(Logger::get(NAME, req_id))
+        , ref_no(0)
+    {
+        if (extra_table_id_index != InvalidColumnID)
+        {
+            ColumnDefine extra_table_id_col_define = getExtraTableIDColumnDefine();
+            ColumnWithTypeAndName col{extra_table_id_col_define.type->createColumn(), extra_table_id_col_define.type, extra_table_id_col_define.name, extra_table_id_col_define.id, extra_table_id_col_define.default_value};
+            header.insert(extra_table_id_index, col);
+        }
+        ref_no = task_pool->increaseUnorderedInputStreamRefCount();
+        LOG_FMT_DEBUG(log, "pool {} ref {} created", task_pool->poolId(), ref_no);
+    }
+
+    ~UnorderedInputStream()
+    {
+        task_pool->decreaseUnorderedInputStreamRefCount();
+        LOG_FMT_DEBUG(log, "pool {} ref {} destroy", task_pool->poolId(), ref_no);
+    }
+
+    String getName() const override { return NAME; }
+
+    Block getHeader() const override { return header; }
+
+protected:
+    Block readImpl() override
+    {
+        FilterPtr filter_ignored;
+        return readImpl(filter_ignored, false);
+    }
+
+    // Currently, res_fiter and return_filter is unused.
+    Block readImpl(FilterPtr & /*res_filter*/, bool /*return_filter*/) override
+    {
+        if (done)
+        {
+            return {};
+        }
+        while (true)
+        {
+            FAIL_POINT_PAUSE(FailPoints::pause_when_reading_from_dt_stream);
+
+            Block res;
+            task_pool->popBlock(res);
+            if (res)
+            {
+                if (extra_table_id_index != InvalidColumnID)
+                {
+                    ColumnDefine extra_table_id_col_define = getExtraTableIDColumnDefine();
+                    ColumnWithTypeAndName col{{}, extra_table_id_col_define.type, extra_table_id_col_define.name, extra_table_id_col_define.id};
+                    size_t row_number = res.rows();
+                    auto col_data = col.type->createColumnConst(row_number, Field(physical_table_id));
+                    col.column = std::move(col_data);
+                    res.insert(extra_table_id_index, std::move(col));
+                }
+                if (!res.rows())
+                {
+                    continue;
+                }
+                else
+                {
+                    total_rows += res.rows();
+                    return res;
+                }
+            }
+            else
+            {
+                done = true;
+                return {};
+            }
+        }
+    }
+
+    void readSuffixImpl() override
+    {
+        LOG_FMT_DEBUG(log, "pool {} ref {} finish read {} rows from storage", task_pool->poolId(), ref_no, total_rows);
+    }
+
+private:
+    SegmentReadTaskPoolPtr task_pool;
+    Block header;
+    // position of the ExtraPhysTblID column in column_names parameter in the StorageDeltaMerge::read function.
+    const int extra_table_id_index;
+    bool done = false;
+    TableID physical_table_id;
+    LoggerPtr log;
+    int64_t ref_no;
+    size_t total_rows = 0;
+};
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/WorkQueue.h b/dbms/src/Storages/DeltaMerge/ReadThread/WorkQueue.h
new file mode 100644
index 00000000000..7726617ebef
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/ReadThread/WorkQueue.h
@@ -0,0 +1,174 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <stdint.h>
+
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+
+namespace DB::DM
+{
+template <typename T>
+class WorkQueue
+{
+    // Protects all member variable access
+    std::mutex mutex_;
+    std::condition_variable readerCv_;
+    std::condition_variable writerCv_;
+    std::condition_variable finishCv_;
+    std::queue<T> queue_;
+    bool done_;
+    std::size_t maxSize_;
+
+    std::size_t peak_queue_size;
+    int64_t pop_times;
+    int64_t pop_empty_times;
+    // Must have lock to call this function
+    bool full() const
+    {
+        if (maxSize_ == 0)
+        {
+            return false;
+        }
+        return queue_.size() >= maxSize_;
+    }
+
+public:
+    /**
+   * Constructs an empty work queue with an optional max size.
+   * If `maxSize == 0` the queue size is unbounded.
+   *
+   * @param maxSize The maximum allowed size of the work queue.
+   */
+    WorkQueue(std::size_t maxSize = 0)
+        : done_(false)
+        , maxSize_(maxSize)
+        , peak_queue_size(0)
+        , pop_times(0)
+        , pop_empty_times(0)
+    {}
+    /**
+   * Push an item onto the work queue.  Notify a single thread that work is
+   * available.  If `finish()` has been called, do nothing and return false.
+   * If `push()` returns false, then `item` has not been copied from.
+   *
+   * @param item  Item to push onto the queue.
+   * @returns     True upon success, false if `finish()` has been called.  An
+   *               item was pushed iff `push()` returns true.
+   */
+    template <typename U>
+    bool push(U && item, size_t * size)
+    {
+        {
+            std::unique_lock<std::mutex> lock(mutex_);
+            while (full() && !done_)
+            {
+                writerCv_.wait(lock);
+            }
+            if (done_)
+            {
+                return false;
+            }
+            queue_.push(std::forward<U>(item));
+            peak_queue_size = std::max(queue_.size(), peak_queue_size);
+            if (size != nullptr)
+            {
+                *size = queue_.size();
+            }
+        }
+        readerCv_.notify_one();
+        return true;
+    }
+    /**
+   * Attempts to pop an item off the work queue.  It will block until data is
+   * available or `finish()` has been called.
+   *
+   * @param[out] item  If `pop` returns `true`, it contains the popped item.
+   *                    If `pop` returns `false`, it is unmodified.
+   * @returns          True upon success.  False if the queue is empty and
+   *                    `finish()` has been called.
+   */
+    bool pop(T & item)
+    {
+        {
+            std::unique_lock<std::mutex> lock(mutex_);
+            pop_times++;
+            while (queue_.empty() && !done_)
+            {
+                pop_empty_times++;
+                readerCv_.wait(lock);
+            }
+            if (queue_.empty())
+            {
+                assert(done_);
+                return false;
+            }
+            item = std::move(queue_.front());
+            queue_.pop();
+        }
+        writerCv_.notify_one();
+        return true;
+    }
+    /**
+   * Sets the maximum queue size.  If `maxSize == 0` then it is unbounded.
+   *
+   * @param maxSize The new maximum queue size.
+   */
+    void setMaxSize(std::size_t maxSize)
+    {
+        {
+            std::lock_guard<std::mutex> lock(mutex_);
+            maxSize_ = maxSize;
+        }
+        writerCv_.notify_all();
+    }
+    /**
+   * Promise that `push()` won't be called again, so once the queue is empty
+   * there will never any more work.
+   */
+    void finish()
+    {
+        {
+            std::lock_guard<std::mutex> lock(mutex_);
+            assert(!done_);
+            done_ = true;
+        }
+        readerCv_.notify_all();
+        writerCv_.notify_all();
+        finishCv_.notify_all();
+    }
+    /// Blocks until `finish()` has been called (but the queue may not be empty).
+    void waitUntilFinished()
+    {
+        std::unique_lock<std::mutex> lock(mutex_);
+        while (!done_)
+        {
+            finishCv_.wait(lock);
+        }
+    }
+
+    size_t size()
+    {
+        std::lock_guard<std::mutex> lock(mutex_);
+        return queue_.size();
+    }
+
+    std::tuple<int64_t, int64_t, size_t> getStat() const
+    {
+        return std::tuple<int64_t, int64_t, size_t>{pop_times, pop_empty_times, peak_queue_size};
+    }
+};
+} // namespace DB::DM
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp
index 0e61a01b01a..80ff8487c21 100644
--- a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp
+++ b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp
@@ -61,7 +61,7 @@ SegmentReadTasks SegmentReadTask::trySplitReadTasks(const SegmentReadTasks & tas
         return a->ranges.size() < b->ranges.size();
     };
     std::priority_queue<SegmentReadTaskPtr, std::vector<SegmentReadTaskPtr>, decltype(cmp)> largest_ranges_first(cmp);
-    for (auto & task : tasks)
+    for (const auto & task : tasks)
         largest_ranges_first.push(task);
 
     // Split the top task.
@@ -95,4 +95,171 @@ SegmentReadTasks SegmentReadTask::trySplitReadTasks(const SegmentReadTasks & tas
     return result_tasks;
 }
 
+BlockInputStreamPtr SegmentReadTaskPool::buildInputStream(SegmentReadTaskPtr & t)
+{
+    MemoryTrackerSetter setter(true, mem_tracker);
+    auto seg = t->segment;
+    BlockInputStreamPtr stream;
+    if (is_raw)
+    {
+        stream = seg->getInputStreamRaw(*dm_context, columns_to_read, t->read_snapshot, t->ranges, filter, do_range_filter_for_raw);
+    }
+    else
+    {
+        auto block_size = std::max(expected_block_size, static_cast<size_t>(dm_context->db_context.getSettingsRef().dt_segment_stable_pack_rows));
+        stream = seg->getInputStream(*dm_context, columns_to_read, t->read_snapshot, t->ranges, filter, max_version, block_size);
+    }
+    LOG_FMT_DEBUG(log, "getInputStream pool {} seg_id {} succ", pool_id, seg->segmentId());
+    return stream;
+}
+
+void SegmentReadTaskPool::finishSegment(const SegmentPtr & seg)
+{
+    after_segment_read(dm_context, seg);
+    bool pool_finished = false;
+    {
+        std::lock_guard lock(mutex);
+        active_segment_ids.erase(seg->segmentId());
+        pool_finished = active_segment_ids.empty() && tasks.empty();
+    }
+    LOG_FMT_DEBUG(log, "finishSegment pool {} seg_id {} pool_finished {}", pool_id, seg->segmentId(), pool_finished);
+    if (pool_finished)
+    {
+        q.finish();
+    }
+}
+
+SegmentReadTaskPtr SegmentReadTaskPool::getTask(uint64_t seg_id)
+{
+    std::lock_guard lock(mutex);
+    // TODO(jinhelin): use unordered_map
+    auto itr = std::find_if(tasks.begin(), tasks.end(), [seg_id](const SegmentReadTaskPtr & task) { return task->segment->segmentId() == seg_id; });
+    if (itr == tasks.end())
+    {
+        throw Exception(fmt::format("pool {} seg_id {} not found", pool_id, seg_id));
+    }
+    auto t = *(itr);
+    tasks.erase(itr);
+    active_segment_ids.insert(seg_id);
+    return t;
+}
+
+// Choose a segment to read.
+// Returns <segment_id, pool_ids>.
+std::unordered_map<uint64_t, std::vector<uint64_t>>::const_iterator SegmentReadTaskPool::scheduleSegment(const std::unordered_map<uint64_t, std::vector<uint64_t>> & segments, uint64_t expected_merge_count)
+{
+    auto target = segments.end();
+    std::lock_guard lock(mutex);
+    if (getFreeActiveSegmentCountUnlock() <= 0)
+    {
+        return target;
+    }
+    for (const auto & task : tasks)
+    {
+        auto itr = segments.find(task->segment->segmentId());
+        if (itr == segments.end())
+        {
+            throw DB::Exception(fmt::format("seg_id {} not found from merging segments", task->segment->segmentId()));
+        }
+        if (std::find(itr->second.begin(), itr->second.end(), poolId()) == itr->second.end())
+        {
+            throw DB::Exception(fmt::format("pool {} not found from merging segment {}=>{}", poolId(), itr->first, itr->second));
+        }
+        if (target == segments.end() || itr->second.size() > target->second.size())
+        {
+            target = itr;
+        }
+        if (target->second.size() >= expected_merge_count)
+        {
+            break;
+        }
+    }
+    return target;
+}
+
+bool SegmentReadTaskPool::readOneBlock(BlockInputStreamPtr & stream, const SegmentPtr & seg)
+{
+    MemoryTrackerSetter setter(true, mem_tracker);
+    auto block = stream->read();
+    if (block)
+    {
+        pushBlock(std::move(block));
+        return true;
+    }
+    else
+    {
+        finishSegment(seg);
+        return false;
+    }
+}
+
+void SegmentReadTaskPool::popBlock(Block & block)
+{
+    q.pop(block);
+    blk_stat.pop(block);
+    global_blk_stat.pop(block);
+    if (exceptionHappened())
+    {
+        throw exception;
+    }
+}
+
+void SegmentReadTaskPool::pushBlock(Block && block)
+{
+    blk_stat.push(block);
+    global_blk_stat.push(block);
+    q.push(std::move(block), nullptr);
+}
+
+int64_t SegmentReadTaskPool::increaseUnorderedInputStreamRefCount()
+{
+    return unordered_input_stream_ref_count.fetch_add(1, std::memory_order_relaxed);
+}
+int64_t SegmentReadTaskPool::decreaseUnorderedInputStreamRefCount()
+{
+    return unordered_input_stream_ref_count.fetch_sub(1, std::memory_order_relaxed);
+}
+
+int64_t SegmentReadTaskPool::getFreeBlockSlots() const
+{
+    double block_slots_scale = dm_context->db_context.getSettingsRef().dt_block_slots_scale;
+    auto block_slots = static_cast<int64_t>(std::ceil(unordered_input_stream_ref_count.load(std::memory_order_relaxed) * block_slots_scale));
+    if (block_slots < 3)
+    {
+        block_slots = 3;
+    }
+    return block_slots - blk_stat.pendingCount();
+}
+
+int64_t SegmentReadTaskPool::getFreeActiveSegmentCountUnlock()
+{
+    double active_segments_scale = dm_context->db_context.getSettingsRef().dt_active_segments_scale;
+    auto active_segment_limit = static_cast<int64_t>(std::ceil(unordered_input_stream_ref_count.load(std::memory_order_relaxed) * active_segments_scale));
+    if (active_segment_limit < 2)
+    {
+        active_segment_limit = 2;
+    }
+    return active_segment_limit - static_cast<int64_t>(active_segment_ids.size());
+}
+
+bool SegmentReadTaskPool::exceptionHappened() const
+{
+    return exception_happened.load(std::memory_order_relaxed);
+}
+
+bool SegmentReadTaskPool::valid() const
+{
+    return !exceptionHappened() && unordered_input_stream_ref_count.load(std::memory_order_relaxed) > 0;
+}
+void SegmentReadTaskPool::setException(const DB::Exception & e)
+{
+    std::lock_guard lock(mutex);
+    if (!exceptionHappened())
+    {
+        exception = e;
+        exception_happened.store(true, std::memory_order_relaxed);
+        q.finish();
+    }
+}
+
 } // namespace DB::DM
diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h
index dc14716af80..e87099a22b4 100644
--- a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h
+++ b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h
@@ -13,23 +13,22 @@
 // limitations under the License.
 
 #pragma once
-
+#include <Common/MemoryTrackerSetter.h>
+#include <Storages/DeltaMerge/DMContext.h>
+#include <Storages/DeltaMerge/Filter/RSOperator.h>
+#include <Storages/DeltaMerge/ReadThread/WorkQueue.h>
 #include <Storages/DeltaMerge/RowKeyRangeUtils.h>
 
-#include <queue>
-
 namespace DB
 {
 namespace DM
 {
-struct DMContext;
 struct SegmentReadTask;
 class Segment;
 using SegmentPtr = std::shared_ptr<Segment>;
 struct SegmentSnapshot;
 using SegmentSnapshotPtr = std::shared_ptr<SegmentSnapshot>;
 
-using DMContextPtr = std::shared_ptr<DMContext>;
 using SegmentReadTaskPtr = std::shared_ptr<SegmentReadTask>;
 using SegmentReadTasks = std::list<SegmentReadTaskPtr>;
 using AfterSegmentRead = std::function<void(const DMContextPtr &, const SegmentPtr &)>;
@@ -57,13 +56,113 @@ struct SegmentReadTask
     static SegmentReadTasks trySplitReadTasks(const SegmentReadTasks & tasks, size_t expected_size);
 };
 
+class BlockStat
+{
+public:
+    BlockStat()
+        : pending_count(0)
+        , pending_bytes(0)
+        , total_count(0)
+        , total_bytes(0)
+    {}
+
+    void push(const Block & blk)
+    {
+        pending_count.fetch_add(1, std::memory_order_relaxed);
+        total_count.fetch_add(1, std::memory_order_relaxed);
+
+        auto b = blk.bytes();
+        pending_bytes.fetch_add(b, std::memory_order_relaxed);
+        total_bytes.fetch_add(b, std::memory_order_relaxed);
+    }
+
+    void pop(const Block & blk)
+    {
+        if (likely(blk))
+        {
+            pending_count.fetch_sub(1, std::memory_order_relaxed);
+            pending_bytes.fetch_sub(blk.bytes(), std::memory_order_relaxed);
+        }
+    }
+
+    int64_t pendingCount() const
+    {
+        return pending_count.load(std::memory_order_relaxed);
+    }
+
+    int64_t pendingBytes() const
+    {
+        return pending_bytes.load(std::memory_order_relaxed);
+    }
+
+    int64_t totalCount() const
+    {
+        return total_count.load(std::memory_order_relaxed);
+    }
+    int64_t totalBytes() const
+    {
+        return total_bytes.load(std::memory_order_relaxed);
+    }
+
+private:
+    std::atomic<int64_t> pending_count;
+    std::atomic<int64_t> pending_bytes;
+    std::atomic<int64_t> total_count;
+    std::atomic<int64_t> total_bytes;
+};
+
 class SegmentReadTaskPool : private boost::noncopyable
 {
 public:
-    explicit SegmentReadTaskPool(SegmentReadTasks && tasks_)
-        : tasks(std::move(tasks_))
+    explicit SegmentReadTaskPool(
+        int64_t table_id_,
+        const DMContextPtr & dm_context_,
+        const ColumnDefines & columns_to_read_,
+        const RSOperatorPtr & filter_,
+        uint64_t max_version_,
+        size_t expected_block_size_,
+        bool is_raw_,
+        bool do_range_filter_for_raw_,
+        SegmentReadTasks && tasks_,
+        AfterSegmentRead after_segment_read_)
+        : pool_id(nextPoolId())
+        , table_id(table_id_)
+        , dm_context(dm_context_)
+        , columns_to_read(columns_to_read_)
+        , filter(filter_)
+        , max_version(max_version_)
+        , expected_block_size(expected_block_size_)
+        , is_raw(is_raw_)
+        , do_range_filter_for_raw(do_range_filter_for_raw_)
+        , tasks(std::move(tasks_))
+        , after_segment_read(after_segment_read_)
+        , log(&Poco::Logger::get("SegmentReadTaskPool"))
+        , unordered_input_stream_ref_count(0)
+        , exception_happened(false)
+        , mem_tracker(current_memory_tracker)
     {}
 
+    ~SegmentReadTaskPool()
+    {
+        auto [pop_times, pop_empty_times, max_queue_size] = q.getStat();
+        auto pop_empty_ratio = pop_times > 0 ? pop_empty_times * 1.0 / pop_times : 0.0;
+        auto total_count = blk_stat.totalCount();
+        auto total_bytes = blk_stat.totalBytes();
+        auto blk_avg_bytes = total_count > 0 ? total_bytes / total_count : 0;
+        auto approximate_max_pending_block_bytes = blk_avg_bytes * max_queue_size;
+        LOG_FMT_DEBUG(log, "pool {} table {} pop {} pop_empty {} pop_empty_ratio {} max_queue_size {} blk_avg_bytes {} approximate_max_pending_block_bytes {} MB total_count {} total_bytes {} MB", //
+                      pool_id,
+                      table_id,
+                      pop_times,
+                      pop_empty_times,
+                      pop_empty_ratio,
+                      max_queue_size,
+                      blk_avg_bytes,
+                      approximate_max_pending_block_bytes / 1024.0 / 1024.0,
+                      total_count,
+                      total_bytes / 1024.0 / 1024.0);
+    }
+
     SegmentReadTaskPtr nextTask()
     {
         std::lock_guard lock(mutex);
@@ -74,13 +173,68 @@ class SegmentReadTaskPool : private boost::noncopyable
         return task;
     }
 
+    uint64_t poolId() const { return pool_id; }
+
+    int64_t tableId() const { return table_id; }
+
+    const SegmentReadTasks & getTasks() const { return tasks; }
+
+    BlockInputStreamPtr buildInputStream(SegmentReadTaskPtr & t);
+
+    bool readOneBlock(BlockInputStreamPtr & stream, const SegmentPtr & seg);
+    void popBlock(Block & block);
+
+    std::unordered_map<uint64_t, std::vector<uint64_t>>::const_iterator scheduleSegment(
+        const std::unordered_map<uint64_t, std::vector<uint64_t>> & segments,
+        uint64_t expected_merge_count);
+
+    int64_t increaseUnorderedInputStreamRefCount();
+    int64_t decreaseUnorderedInputStreamRefCount();
+    int64_t getFreeBlockSlots() const;
+    bool valid() const;
+    void setException(const DB::Exception & e);
+    SegmentReadTaskPtr getTask(uint64_t seg_id);
+
 private:
+    int64_t getFreeActiveSegmentCountUnlock();
+    bool exceptionHappened() const;
+    void finishSegment(const SegmentPtr & seg);
+    void pushBlock(Block && block);
+
+    const uint64_t pool_id;
+    const int64_t table_id;
+    DMContextPtr dm_context;
+    ColumnDefines columns_to_read;
+    RSOperatorPtr filter;
+    const uint64_t max_version;
+    const size_t expected_block_size;
+    const bool is_raw;
+    const bool do_range_filter_for_raw;
     SegmentReadTasks tasks;
-
+    AfterSegmentRead after_segment_read;
     std::mutex mutex;
+    std::unordered_set<uint64_t> active_segment_ids;
+    WorkQueue<Block> q;
+    BlockStat blk_stat;
+    Poco::Logger * log;
+
+    std::atomic<int64_t> unordered_input_stream_ref_count;
+
+    std::atomic<bool> exception_happened;
+    DB::Exception exception;
+
+    MemoryTracker * mem_tracker;
+
+    inline static std::atomic<uint64_t> pool_id_gen{1};
+    inline static BlockStat global_blk_stat;
+    static uint64_t nextPoolId()
+    {
+        return pool_id_gen.fetch_add(1, std::memory_order_relaxed);
+    }
 };
 
 using SegmentReadTaskPoolPtr = std::shared_ptr<SegmentReadTaskPool>;
+using SegmentReadTaskPools = std::vector<SegmentReadTaskPoolPtr>;
 
 } // namespace DM
 } // namespace DB
diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp
index 2791a74e9e3..523856578a0 100644
--- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp
+++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp
@@ -163,10 +163,11 @@ bool GlobalStoragePool::gc(const Settings & settings, bool immediately, const Se
     return done_anything;
 }
 
-StoragePool::StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPool & storage_path_pool, const String & name)
+StoragePool::StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPool & storage_path_pool_, const String & name)
     : logger(Logger::get("StoragePool", !name.empty() ? name : DB::toString(ns_id_)))
     , run_mode(global_ctx.getPageStorageRunMode())
     , ns_id(ns_id_)
+    , storage_path_pool(storage_path_pool_)
     , global_context(global_ctx)
     , storage_pool_metrics(CurrentMetrics::StoragePoolV3Only, 0)
 {
@@ -219,18 +220,42 @@ StoragePool::StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPo
         data_storage_v3 = global_storage_pool->data_storage;
         meta_storage_v3 = global_storage_pool->meta_storage;
 
-        log_storage_v2 = PageStorage::create(name + ".log",
-                                             storage_path_pool.getPSDiskDelegatorMulti("log"),
-                                             extractConfig(global_context.getSettingsRef(), StorageType::Log),
-                                             global_context.getFileProvider());
-        data_storage_v2 = PageStorage::create(name + ".data",
-                                              storage_path_pool.getPSDiskDelegatorMulti("data"),
-                                              extractConfig(global_context.getSettingsRef(), StorageType::Data),
-                                              global_ctx.getFileProvider());
-        meta_storage_v2 = PageStorage::create(name + ".meta",
-                                              storage_path_pool.getPSDiskDelegatorMulti("meta"),
-                                              extractConfig(global_context.getSettingsRef(), StorageType::Meta),
-                                              global_ctx.getFileProvider());
+        if (storage_path_pool.isPSV2Deleted())
+        {
+            LOG_FMT_INFO(logger, "PageStorage V2 is already mark deleted. Current pagestorage change from {} to {} [ns_id={}]", //
+                         static_cast<UInt8>(PageStorageRunMode::MIX_MODE), //
+                         static_cast<UInt8>(PageStorageRunMode::ONLY_V3), //
+                         ns_id);
+            log_storage_v2 = nullptr;
+            data_storage_v2 = nullptr;
+            meta_storage_v2 = nullptr;
+            run_mode = PageStorageRunMode::ONLY_V3;
+            storage_path_pool.clearPSV2ObsoleteData();
+        }
+        else
+        {
+            // Although there is no more write to ps v2 in mixed mode, the ps instances will keep running if there is some data in log storage when restart,
+            // so we keep its original config here.
+            // And we rely on the mechanism that writing file will be rotated if no valid pages in non writing files to reduce the disk space usage of these ps instances.
+            log_storage_v2 = PageStorage::create(name + ".log",
+                                                 storage_path_pool.getPSDiskDelegatorMulti("log"),
+                                                 extractConfig(global_context.getSettingsRef(), StorageType::Log),
+                                                 global_context.getFileProvider(),
+                                                 /* use_v3 */ false,
+                                                 /* no_more_write_to_v2 */ true);
+            data_storage_v2 = PageStorage::create(name + ".data",
+                                                  storage_path_pool.getPSDiskDelegatorMulti("data"),
+                                                  extractConfig(global_context.getSettingsRef(), StorageType::Data),
+                                                  global_ctx.getFileProvider(),
+                                                  /* use_v3 */ false,
+                                                  /* no_more_write_to_v2 */ true);
+            meta_storage_v2 = PageStorage::create(name + ".meta",
+                                                  storage_path_pool.getPSDiskDelegatorMulti("meta"),
+                                                  extractConfig(global_context.getSettingsRef(), StorageType::Meta),
+                                                  global_ctx.getFileProvider(),
+                                                  /* use_v3 */ false,
+                                                  /* no_more_write_to_v2 */ true);
+        }
 
         log_storage_reader = std::make_shared<PageReader>(run_mode, ns_id, log_storage_v2, log_storage_v3, nullptr);
         data_storage_reader = std::make_shared<PageReader>(run_mode, ns_id, data_storage_v2, data_storage_v3, nullptr);
@@ -272,7 +297,7 @@ void StoragePool::forceTransformMetaV2toV3()
         const auto & page_transform_entry = meta_transform_storage_reader->getPageEntry(page_transform.page_id);
         if (!page_transform_entry.field_offsets.empty())
         {
-            throw Exception(fmt::format("Can't transfrom meta from V2 to V3, [page_id={}] {}", //
+            throw Exception(fmt::format("Can't transform meta from V2 to V3, [page_id={}] {}", //
                                         page_transform.page_id,
                                         page_transform_entry.toDebugString()),
                             ErrorCodes::LOGICAL_ERROR);
@@ -431,18 +456,23 @@ PageStorageRunMode StoragePool::restore()
         }
         else
         {
-            LOG_FMT_INFO(logger, "Current pool.data translate already done before restored [ns_id={}] ", ns_id);
+            LOG_FMT_INFO(logger, "Current pool.data translate already done before restored [ns_id={}]", ns_id);
         }
 
         // Check number of valid pages in v2
         // If V2 already have no any data in disk, Then change run_mode to ONLY_V3
         if (log_storage_v2->getNumberOfPages() == 0 && data_storage_v2->getNumberOfPages() == 0 && meta_storage_v2->getNumberOfPages() == 0)
         {
-            // TODO: Need drop V2 in disk and check.
-            LOG_FMT_INFO(logger, "Current pagestorage change from {} to {}", //
+            LOG_FMT_INFO(logger, "Current pagestorage change from {} to {} [ns_id={}]", //
                          static_cast<UInt8>(PageStorageRunMode::MIX_MODE),
-                         static_cast<UInt8>(PageStorageRunMode::ONLY_V3));
-
+                         static_cast<UInt8>(PageStorageRunMode::ONLY_V3),
+                         ns_id);
+            if (storage_path_pool.createPSV2DeleteMarkFile())
+            {
+                log_storage_v2->drop();
+                data_storage_v2->drop();
+                meta_storage_v2->drop();
+            }
             log_storage_v2 = nullptr;
             data_storage_v2 = nullptr;
             meta_storage_v2 = nullptr;
diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.h b/dbms/src/Storages/DeltaMerge/StoragePool.h
index 77684ea46cb..2ab07a53ad9 100644
--- a/dbms/src/Storages/DeltaMerge/StoragePool.h
+++ b/dbms/src/Storages/DeltaMerge/StoragePool.h
@@ -79,7 +79,7 @@ class StoragePool : private boost::noncopyable
     using Timepoint = Clock::time_point;
     using Seconds = std::chrono::seconds;
 
-    StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPool & path_pool, const String & name = "");
+    StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPool & storage_path_pool_, const String & name = "");
 
     PageStorageRunMode restore();
 
@@ -181,6 +181,8 @@ class StoragePool : private boost::noncopyable
     // whether the three storage instance is owned by this StoragePool
     const NamespaceId ns_id;
 
+    StoragePathPool & storage_path_pool;
+
     PageStoragePtr log_storage_v2;
     PageStoragePtr data_storage_v2;
     PageStoragePtr meta_storage_v2;
diff --git a/dbms/src/Storages/DeltaMerge/tests/CMakeLists.txt b/dbms/src/Storages/DeltaMerge/tests/CMakeLists.txt
index 14ecdfc5e79..44ca6ccb367 100644
--- a/dbms/src/Storages/DeltaMerge/tests/CMakeLists.txt
+++ b/dbms/src/Storages/DeltaMerge/tests/CMakeLists.txt
@@ -63,6 +63,9 @@ target_link_libraries(dm_test_delta_index_manager dbms gtest_main clickhouse_fun
 add_executable(dm_test_delta_value_space EXCLUDE_FROM_ALL gtest_dm_delta_value_space.cpp)
 target_link_libraries(dm_test_delta_value_space dbms gtest_main clickhouse_functions)
 
+add_executable(dm_test_column_file EXCLUDE_FROM_ALL gtest_dm_column_file.cpp)
+target_link_libraries(dm_test_column_file dbms gtest_main clickhouse_functions)
+
 add_subdirectory (bank EXCLUDE_FROM_ALL)
 
 add_subdirectory (stress EXCLUDE_FROM_ALL)
diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_circular_scan_list.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_circular_scan_list.cpp
new file mode 100644
index 00000000000..ab44b3b2ec2
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/tests/gtest_circular_scan_list.cpp
@@ -0,0 +1,136 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <Storages/DeltaMerge/ReadThread/CircularScanList.h>
+#include <TestUtils/TiFlashTestBasic.h>
+
+#include <memory>
+
+namespace DB::DM::tests
+{
+class Node
+{
+public:
+    explicit Node(uint64_t id_)
+        : id(id_)
+        , table_id(1)
+        , v(true)
+    {}
+
+    bool valid() const
+    {
+        return v;
+    }
+    uint64_t poolId() const
+    {
+        return id;
+    }
+    int64_t tableId() const
+    {
+        return table_id;
+    }
+    void setInvalid()
+    {
+        v = false;
+    }
+
+private:
+    uint64_t id;
+    int64_t table_id;
+    bool v;
+};
+
+TEST(CircularScanListTest, Normal)
+{
+    CircularScanList<Node> lst;
+
+    {
+        ASSERT_EQ(lst.next(), nullptr);
+        auto [valid, invalid] = lst.count(0);
+        ASSERT_EQ(valid, 0);
+        ASSERT_EQ(invalid, 0);
+        ASSERT_EQ(lst.get(1), nullptr);
+    }
+
+    for (uint64_t i = 0; i < 10; i++)
+    {
+        lst.add(std::make_shared<Node>(i));
+    }
+
+    {
+        auto [valid, invalid] = lst.count(0);
+        ASSERT_EQ(valid, 10);
+        ASSERT_EQ(invalid, 0);
+    }
+
+    for (uint64_t i = 0; i < 20; i++)
+    {
+        auto sp = lst.next();
+        ASSERT_EQ(sp->poolId(), i % 10);
+    }
+
+    lst.get(1)->setInvalid();
+    lst.get(3)->setInvalid();
+    lst.get(5)->setInvalid();
+
+    {
+        auto [valid, invalid] = lst.count(0);
+        ASSERT_EQ(valid, 7);
+        ASSERT_EQ(invalid, 3);
+    }
+
+    const std::vector<uint64_t> valid_ids = {0, 2, 4, 6, 7, 8, 9};
+    for (uint64_t i = 0; i < 20; i++)
+    {
+        auto sp = lst.next();
+        ASSERT_EQ(sp->poolId(), valid_ids[i % valid_ids.size()]);
+    }
+
+    {
+        auto [valid, invalid] = lst.count(0);
+        ASSERT_EQ(valid, 7);
+        ASSERT_EQ(invalid, 0);
+    }
+
+    for (uint64_t id : valid_ids)
+    {
+        lst.get(id)->setInvalid();
+    }
+
+    {
+        auto [valid, invalid] = lst.count(0);
+        ASSERT_EQ(valid, 0);
+        ASSERT_EQ(invalid, 7);
+    }
+
+    ASSERT_EQ(lst.next(), nullptr);
+}
+
+TEST(CircularScanListTest, valid)
+{
+    CircularScanList<Node> l;
+    l.add(std::make_shared<Node>(1));
+
+    ASSERT_EQ(l.next()->poolId(), 1);
+    ASSERT_EQ(l.next()->poolId(), 1);
+
+    l.next()->setInvalid();
+
+    ASSERT_EQ(l.next(), nullptr);
+    ASSERT_EQ(l.next(), nullptr);
+    l.add(std::make_shared<Node>(2));
+
+    ASSERT_EQ(l.next()->poolId(), 2);
+    ASSERT_EQ(l.next()->poolId(), 2);
+}
+} // namespace DB::DM::tests
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_column_sharing_cache.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_column_sharing_cache.cpp
new file mode 100644
index 00000000000..82ac6bdecf2
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/tests/gtest_column_sharing_cache.cpp
@@ -0,0 +1,122 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <Columns/IColumn.h>
+#include <DataTypes/DataTypeString.h>
+#include <Storages/DeltaMerge/ReadThread/ColumnSharingCache.h>
+#include <TestUtils/TiFlashTestBasic.h>
+
+#include "gtest/gtest.h"
+
+namespace DB::DM::tests
+{
+
+constexpr int TEST_PACK_ROWS = 1024;
+DataTypePtr TEST_DATA_TYPE = std::make_shared<DataTypeString>();
+
+ColumnPtr createColumn(int packs)
+{
+    int rows = packs * TEST_PACK_ROWS;
+
+    auto mut_col = TEST_DATA_TYPE->createColumn();
+    for (int i = 0; i < rows; i++)
+    {
+        Field f = std::to_string(packs * 100000 + i);
+        mut_col->insert(f);
+    }
+    return std::move(mut_col);
+}
+
+void compareColumn(ColumnPtr & col1, ColumnPtr & col2, int rows)
+{
+    for (int i = 0; i < rows; i++)
+    {
+        ASSERT_EQ((*col1)[i].toString(), (*col2)[i].toString());
+    }
+}
+
+TEST(ColumnSharingCacheTest, AddAndGet)
+{
+    ColumnSharingCache cache;
+
+    auto col = createColumn(8);
+    cache.add(1, 8, col);
+
+    ColumnPtr col1;
+    auto st = cache.get(1, 8, 8 * TEST_PACK_ROWS, col1, TEST_DATA_TYPE);
+    ASSERT_EQ(st, ColumnCacheStatus::GET_HIT);
+    ASSERT_EQ(col1->size(), 8 * TEST_PACK_ROWS);
+    compareColumn(col1, col, col1->size());
+
+    ColumnPtr col2;
+    st = cache.get(1, 7, 7 * TEST_PACK_ROWS, col2, TEST_DATA_TYPE);
+    ASSERT_EQ(st, ColumnCacheStatus::GET_COPY);
+    ASSERT_EQ(col2->size(), 7 * TEST_PACK_ROWS);
+    compareColumn(col2, col, col2->size());
+
+    ColumnPtr col3;
+    st = cache.get(1, 9, 9 * TEST_PACK_ROWS, col3, TEST_DATA_TYPE);
+    ASSERT_EQ(st, ColumnCacheStatus::GET_PART);
+    ASSERT_EQ(col3, nullptr);
+
+    ColumnPtr col4;
+    st = cache.get(2, 8, 8 * TEST_PACK_ROWS, col4, TEST_DATA_TYPE);
+    ASSERT_EQ(st, ColumnCacheStatus::GET_MISS);
+    ASSERT_EQ(col4, nullptr);
+
+    auto col5 = createColumn(7);
+    cache.add(1, 7, col5);
+    ColumnPtr col6;
+    st = cache.get(1, 8, 8 * TEST_PACK_ROWS, col6, TEST_DATA_TYPE);
+    ASSERT_EQ(st, ColumnCacheStatus::GET_HIT);
+    ASSERT_EQ(col6->size(), 8 * TEST_PACK_ROWS);
+    compareColumn(col6, col, col6->size());
+
+    auto col7 = createColumn(9);
+    cache.add(1, 9, col7);
+    ColumnPtr col8;
+    st = cache.get(1, 8, 8 * TEST_PACK_ROWS, col8, TEST_DATA_TYPE);
+    ASSERT_EQ(st, ColumnCacheStatus::GET_COPY);
+    ASSERT_EQ(col8->size(), 8 * TEST_PACK_ROWS);
+    compareColumn(col8, col7, col8->size());
+}
+
+TEST(ColumnSharingCacheTest, Del)
+{
+    ColumnSharingCache cache;
+
+    auto col1 = createColumn(8);
+    cache.add(1, 8, col1);
+
+    auto col2 = createColumn(8);
+    cache.add(9, 8, col2);
+
+    auto col3 = createColumn(8);
+    cache.add(17, 8, col3);
+
+    cache.del(10);
+
+    ColumnPtr col4;
+    auto st = cache.get(9, 8, 8 * TEST_PACK_ROWS, col4, TEST_DATA_TYPE);
+    ASSERT_EQ(st, ColumnCacheStatus::GET_HIT);
+    ASSERT_EQ(col4->size(), 8 * TEST_PACK_ROWS);
+    compareColumn(col4, col2, col4->size());
+
+    ColumnPtr col5;
+    st = cache.get(17, 6, 6 * TEST_PACK_ROWS, col5, TEST_DATA_TYPE);
+    ASSERT_EQ(st, ColumnCacheStatus::GET_COPY);
+    ASSERT_EQ(col5->size(), 6 * TEST_PACK_ROWS);
+    compareColumn(col5, col2, col5->size());
+}
+
+} // namespace DB::DM::tests
\ No newline at end of file
diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_column_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_column_file.cpp
new file mode 100644
index 00000000000..e8744f6fe22
--- /dev/null
+++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_column_file.cpp
@@ -0,0 +1,167 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Interpreters/Context.h>
+#include <Storages/DeltaMerge/ColumnFile/ColumnFileBig.h>
+#include <Storages/DeltaMerge/DMContext.h>
+#include <Storages/DeltaMerge/DeltaMergeStore.h>
+#include <Storages/DeltaMerge/File/DMFileBlockInputStream.h>
+#include <Storages/DeltaMerge/File/DMFileBlockOutputStream.h>
+#include <Storages/DeltaMerge/File/DMFileWriter.h>
+#include <Storages/DeltaMerge/RowKeyRange.h>
+#include <Storages/DeltaMerge/tests/DMTestEnv.h>
+#include <Storages/tests/TiFlashStorageTestBasic.h>
+#include <TestUtils/FunctionTestUtils.h>
+
+#include <vector>
+
+namespace DB
+{
+namespace DM
+{
+namespace tests
+{
+class ColumnFileTest
+    : public DB::base::TiFlashStorageTestBasic
+{
+public:
+    ColumnFileTest() = default;
+
+    static void SetUpTestCase() {}
+
+    void SetUp() override
+    {
+        TiFlashStorageTestBasic::SetUp();
+
+        parent_path = TiFlashStorageTestBasic::getTemporaryPath();
+        path_pool = std::make_unique<StoragePathPool>(db_context->getPathPool().withTable("test", "DMFile_Test", false));
+        storage_pool = std::make_unique<StoragePool>(*db_context, /*ns_id*/ 100, *path_pool, "test.t1");
+        column_cache = std::make_shared<ColumnCache>();
+        dm_context = std::make_unique<DMContext>( //
+            *db_context,
+            *path_pool,
+            *storage_pool,
+            /*hash_salt*/ 0,
+            0,
+            settings.not_compress_columns,
+            false,
+            1,
+            db_context->getSettingsRef());
+    }
+
+    DMContext & dmContext() { return *dm_context; }
+
+    Context & dbContext() { return *db_context; }
+
+private:
+    std::unique_ptr<DMContext> dm_context;
+    /// all these var live as ref in dm_context
+    std::unique_ptr<StoragePathPool> path_pool;
+    std::unique_ptr<StoragePool> storage_pool;
+    DeltaMergeStore::Settings settings;
+
+protected:
+    String parent_path;
+    ColumnCachePtr column_cache;
+};
+
+TEST_F(ColumnFileTest, ColumnFileBigRead)
+try
+{
+    auto table_columns = DMTestEnv::getDefaultColumns();
+    auto dm_file = DMFile::create(1, parent_path, false, std::make_optional<DMChecksumConfig>());
+    const size_t num_rows_write_per_batch = 8192;
+    const size_t batch_num = 3;
+    const UInt64 tso_value = 100;
+    {
+        auto stream = std::make_shared<DMFileBlockOutputStream>(dbContext(), dm_file, *table_columns);
+        stream->writePrefix();
+        for (size_t i = 0; i < batch_num; i += 1)
+        {
+            Block block = DMTestEnv::prepareSimpleWriteBlock(num_rows_write_per_batch * i, num_rows_write_per_batch * (i + 1), false, 100);
+            stream->write(block, {});
+        }
+        stream->writeSuffix();
+    }
+
+    // test read
+    ColumnFileBig column_file_big(dmContext(), dm_file, RowKeyRange::newAll(false, 1));
+    ColumnDefinesPtr column_defines = std::make_shared<ColumnDefines>();
+    column_defines->emplace_back(getExtraHandleColumnDefine(/*is_common_handle=*/false));
+    column_defines->emplace_back(getVersionColumnDefine());
+    auto column_file_big_reader = column_file_big.getReader(dmContext(), /*storage_snap*/ nullptr, column_defines);
+    {
+        size_t num_rows_read = 0;
+        while (Block in = column_file_big_reader->readNextBlock())
+        {
+            ASSERT_EQ(in.columns(), column_defines->size());
+            ASSERT_TRUE(in.has(EXTRA_HANDLE_COLUMN_NAME));
+            ASSERT_TRUE(in.has(VERSION_COLUMN_NAME));
+            auto & pk_column = in.getByName(EXTRA_HANDLE_COLUMN_NAME).column;
+            for (size_t i = 0; i < pk_column->size(); i++)
+            {
+                ASSERT_EQ(pk_column->getInt(i), num_rows_read + i);
+            }
+            auto & version_column = in.getByName(VERSION_COLUMN_NAME).column;
+            for (size_t i = 0; i < version_column->size(); i++)
+            {
+                ASSERT_EQ(version_column->getInt(i), tso_value);
+            }
+            num_rows_read += in.rows();
+        }
+        ASSERT_EQ(num_rows_read, num_rows_write_per_batch * batch_num);
+    }
+
+    {
+        ColumnDefinesPtr column_defines_pk_and_del = std::make_shared<ColumnDefines>();
+        column_defines_pk_and_del->emplace_back(getExtraHandleColumnDefine(/*is_common_handle=*/false));
+        column_defines_pk_and_del->emplace_back(getTagColumnDefine());
+        auto column_file_big_reader2 = column_file_big_reader->createNewReader(column_defines_pk_and_del);
+        size_t num_rows_read = 0;
+        while (Block in = column_file_big_reader2->readNextBlock())
+        {
+            ASSERT_EQ(in.columns(), column_defines_pk_and_del->size());
+            ASSERT_TRUE(in.has(EXTRA_HANDLE_COLUMN_NAME));
+            ASSERT_TRUE(in.has(TAG_COLUMN_NAME));
+            auto & pk_column = in.getByName(EXTRA_HANDLE_COLUMN_NAME).column;
+            for (size_t i = 0; i < pk_column->size(); i++)
+            {
+                ASSERT_EQ(pk_column->getInt(i), num_rows_read + i);
+            }
+            auto & del_column = in.getByName(TAG_COLUMN_NAME).column;
+            for (size_t i = 0; i < del_column->size(); i++)
+            {
+                ASSERT_EQ(del_column->getInt(i), 0);
+            }
+            num_rows_read += in.rows();
+        }
+        ASSERT_EQ(num_rows_read, num_rows_write_per_batch * batch_num);
+    }
+
+    {
+        auto column_file_big_reader3 = column_file_big_reader->createNewReader(table_columns);
+        size_t num_rows_read = 0;
+        while (Block in = column_file_big_reader3->readNextBlock())
+        {
+            ASSERT_EQ(in.columns(), table_columns->size());
+            num_rows_read += in.rows();
+        }
+        ASSERT_EQ(num_rows_read, num_rows_write_per_batch * batch_num);
+    }
+}
+CATCH
+
+} // namespace tests
+} // namespace DM
+} // namespace DB
diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp
index 79038e1a349..89fc676dfee 100644
--- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp
+++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp
@@ -262,6 +262,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
 
@@ -301,7 +302,7 @@ try
     {
         // test readRaw
         const auto & columns = store->getTableColumns();
-        BlockInputStreamPtr in = store->readRaw(*db_context, db_context->getSettingsRef(), columns, 1)[0];
+        BlockInputStreamPtr in = store->readRaw(*db_context, db_context->getSettingsRef(), columns, 1, /* keep_order= */ false)[0];
 
         size_t num_rows_read = 0;
         in->readPrefix();
@@ -405,6 +406,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
 
@@ -491,6 +493,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
 
@@ -563,6 +566,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -601,6 +605,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -687,6 +692,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -773,6 +779,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -805,6 +812,7 @@ try
                                              /* max_version= */ UInt64(1),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -861,6 +869,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -901,6 +910,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -961,6 +971,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -985,6 +996,7 @@ try
                                             /* max_version= */ tso2,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1009,6 +1021,7 @@ try
                                             /* max_version= */ tso1,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1033,6 +1046,7 @@ try
                                             /* max_version= */ tso1 - 1,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1095,6 +1109,7 @@ try
                                             /* max_version= */ tso1,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1133,6 +1148,7 @@ try
                                             /* max_version= */ tso2 - 1,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1172,6 +1188,7 @@ try
                                             /* max_version= */ tso3 - 1,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1198,6 +1215,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1224,6 +1242,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1286,6 +1305,7 @@ try
                                             /* max_version= */ tso1,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1);
@@ -1324,6 +1344,7 @@ try
                                             /* max_version= */ tso2 - 1,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1);
@@ -1363,6 +1384,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1);
@@ -1420,6 +1442,7 @@ try
                                             /* max_version= */ tso1,
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1);
@@ -1458,6 +1481,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1);
@@ -1545,6 +1569,7 @@ try
                                                 /* max_version= */ std::numeric_limits<UInt64>::max(),
                                                 EMPTY_FILTER,
                                                 TRACING_NAME,
+                                                /* keep_order= */ false,
                                                 /* is_fast_mode= */ false,
                                                 /* expected_block_size= */ 1024);
             ASSERT_EQ(ins.size(), 1UL);
@@ -1654,6 +1679,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1762,6 +1788,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1851,6 +1878,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -1955,6 +1983,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2032,6 +2061,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2109,6 +2139,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2186,6 +2217,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2263,6 +2295,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2338,6 +2371,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2412,6 +2446,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2503,6 +2538,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -2638,6 +2674,7 @@ try
                                             /* max_version= */ std::numeric_limits<UInt64>::max(),
                                             EMPTY_FILTER,
                                             TRACING_NAME,
+                                            /* keep_order= */ false,
                                             /* is_fast_mode= */ false,
                                             /* expected_block_size= */ 1024);
         ASSERT_EQ(ins.size(), 1UL);
@@ -2700,6 +2737,7 @@ try
                                                 /* max_version= */ std::numeric_limits<UInt64>::max(),
                                                 EMPTY_FILTER,
                                                 TRACING_NAME,
+                                                /* keep_order= */ false,
                                                 /* is_fast_mode= */ false,
                                                 /* expected_block_size= */ 1024);
             ASSERT_EQ(ins.size(), 1UL);
@@ -2800,6 +2838,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2854,6 +2893,7 @@ try
                               /* max_version= */ std::numeric_limits<UInt64>::max(),
                               EMPTY_FILTER,
                               TRACING_NAME,
+                              /* keep_order= */ false,
                               /* is_fast_mode= */ false,
                               /* expected_block_size= */ 1024)[0];
 
@@ -2971,6 +3011,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
 
@@ -3009,7 +3050,7 @@ try
     {
         // test readRaw
         const auto & columns = store->getTableColumns();
-        BlockInputStreamPtr in = store->readRaw(*db_context, db_context->getSettingsRef(), columns, 1)[0];
+        BlockInputStreamPtr in = store->readRaw(*db_context, db_context->getSettingsRef(), columns, 1, /* keep_order= */ false)[0];
 
         size_t num_rows_read = 0;
         in->readPrefix();
@@ -3101,6 +3142,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -3176,6 +3218,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -3208,6 +3251,7 @@ try
                                              /* max_version= */ UInt64(1),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -3267,6 +3311,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -3318,6 +3363,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_fast_mode= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -3392,6 +3438,7 @@ try
                                                 /* max_version= */ std::numeric_limits<UInt64>::max(),
                                                 EMPTY_FILTER,
                                                 TRACING_NAME,
+                                                /* keep_order= */ false,
                                                 /* is_fast_mode= */ false,
                                                 /* expected_block_size= */ 1024);
             ASSERT_EQ(ins.size(), 1UL);
diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_for_fast_mode.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_for_fast_mode.cpp
index 360f3e96315..8cfa7c07642 100644
--- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_for_fast_mode.cpp
+++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_for_fast_mode.cpp
@@ -89,6 +89,7 @@ TEST_P(DeltaMergeStoreRWTest, TestFastModeWithOnlyInsertWithoutRangeFilter)
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
 
@@ -192,6 +193,7 @@ TEST_P(DeltaMergeStoreRWTest, TestFastModeWithOnlyInsertWithRangeFilter)
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
 
@@ -287,6 +289,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -433,6 +436,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -580,6 +584,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -731,6 +736,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -826,6 +832,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1011,6 +1018,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1052,6 +1060,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1114,6 +1123,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1153,6 +1163,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order = */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1229,6 +1240,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1331,6 +1343,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1445,6 +1458,7 @@ try
                                              /* max_version= */ UInt64(1),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ false,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1513,6 +1527,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
@@ -1571,6 +1586,7 @@ try
                                              /* max_version= */ std::numeric_limits<UInt64>::max(),
                                              EMPTY_FILTER,
                                              TRACING_NAME,
+                                             /* keep_order= */ false,
                                              /* is_raw_read= */ true,
                                              /* expected_block_size= */ 1024)[0];
         size_t num_rows_read = 0;
diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp
index a575d022476..f066237d8e2 100644
--- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp
+++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp
@@ -124,7 +124,7 @@ bool checkMatch(
     store->mergeDeltaAll(context);
 
     const ColumnDefine & col_to_read = check_pk ? getExtraHandleColumnDefine(is_common_handle) : cd;
-    auto streams = store->read(context, context.getSettingsRef(), {col_to_read}, {all_range}, 1, std::numeric_limits<UInt64>::max(), filter, name);
+    auto streams = store->read(context, context.getSettingsRef(), {col_to_read}, {all_range}, 1, std::numeric_limits<UInt64>::max(), filter, name, false);
     streams[0]->readPrefix();
     auto rows = streams[0]->read().rows();
     streams[0]->readSuffix();
diff --git a/dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp
index faa0100e9e2..93877181e55 100644
--- a/dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp
+++ b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp
@@ -190,7 +190,7 @@ void DTWorkload::read(const ColumnDefines & columns, int stream_count, T func)
     auto filter = EMPTY_FILTER;
     int excepted_block_size = 1024;
     uint64_t read_ts = ts_gen->get();
-    auto streams = store->read(*context, context->getSettingsRef(), columns, ranges, stream_count, read_ts, filter, "DTWorkload", opts->is_fast_mode, excepted_block_size);
+    auto streams = store->read(*context, context->getSettingsRef(), columns, ranges, stream_count, read_ts, filter, "DTWorkload", false, opts->is_fast_mode, excepted_block_size);
     std::vector<std::thread> threads;
     threads.reserve(streams.size());
     for (auto & stream : streams)
diff --git a/dbms/src/Storages/Page/PageDefines.h b/dbms/src/Storages/Page/PageDefines.h
index 7feea494eb4..330ed7776a3 100644
--- a/dbms/src/Storages/Page/PageDefines.h
+++ b/dbms/src/Storages/Page/PageDefines.h
@@ -41,7 +41,7 @@ static_assert(PAGE_SIZE_STEP >= ((1 << 10) * 16), "PAGE_SIZE_STEP should be at l
 static_assert((PAGE_SIZE_STEP & (PAGE_SIZE_STEP - 1)) == 0, "PAGE_SIZE_STEP should be power of 2");
 
 // PageStorage V3 define
-static constexpr UInt64 BLOBFILE_LIMIT_SIZE = 512 * MB;
+static constexpr UInt64 BLOBFILE_LIMIT_SIZE = 256 * MB;
 static constexpr UInt64 BLOBSTORE_CACHED_FD_SIZE = 100;
 static constexpr UInt64 PAGE_META_ROLL_SIZE = 2 * MB;
 static constexpr UInt64 MAX_PERSISTED_LOG_FILES = 4;
diff --git a/dbms/src/Storages/Page/PageStorage.cpp b/dbms/src/Storages/Page/PageStorage.cpp
index d8b767a6c15..c56d47d16b3 100644
--- a/dbms/src/Storages/Page/PageStorage.cpp
+++ b/dbms/src/Storages/Page/PageStorage.cpp
@@ -23,12 +23,13 @@ PageStoragePtr PageStorage::create(
     PSDiskDelegatorPtr delegator,
     const PageStorage::Config & config,
     const FileProviderPtr & file_provider,
-    bool use_v3)
+    bool use_v3,
+    bool no_more_insert_to_v2)
 {
     if (use_v3)
         return std::make_shared<PS::V3::PageStorageImpl>(name, delegator, config, file_provider);
     else
-        return std::make_shared<PS::V2::PageStorage>(name, delegator, config, file_provider);
+        return std::make_shared<PS::V2::PageStorage>(name, delegator, config, file_provider, no_more_insert_to_v2);
 }
 
 /***************************
diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h
index 0059c0570c1..759f16f53e5 100644
--- a/dbms/src/Storages/Page/PageStorage.h
+++ b/dbms/src/Storages/Page/PageStorage.h
@@ -138,7 +138,7 @@ class PageStorage : private boost::noncopyable
         SettingUInt64 blob_file_limit_size = BLOBFILE_LIMIT_SIZE;
         SettingUInt64 blob_spacemap_type = 2;
         SettingUInt64 blob_cached_fd_size = BLOBSTORE_CACHED_FD_SIZE;
-        SettingDouble blob_heavy_gc_valid_rate = 0.2;
+        SettingDouble blob_heavy_gc_valid_rate = 0.5;
         SettingUInt64 blob_block_alignment_bytes = 0;
 
         SettingUInt64 wal_roll_size = PAGE_META_ROLL_SIZE;
@@ -208,6 +208,14 @@ class PageStorage : private boost::noncopyable
     void reloadSettings(const Config & new_config) { config.reload(new_config); };
     Config getSettings() const { return config; }
 
+    // Use a more easy gc config for v2 when all of its data will be transformed to v3.
+    static Config getEasyGCConfig()
+    {
+        Config gc_config;
+        gc_config.file_roll_size = PAGE_FILE_SMALL_SIZE;
+        return gc_config;
+    }
+
 public:
     static PageStoragePtr
     create(
@@ -215,7 +223,8 @@ class PageStorage : private boost::noncopyable
         PSDiskDelegatorPtr delegator,
         const PageStorage::Config & config,
         const FileProviderPtr & file_provider,
-        bool use_v3 = false);
+        bool use_v3 = false,
+        bool no_more_insert_to_v2 = false);
 
     PageStorage(
         String name,
diff --git a/dbms/src/Storages/Page/V2/PageStorage.cpp b/dbms/src/Storages/Page/V2/PageStorage.cpp
index 662e9edd686..ba78cf6a75b 100644
--- a/dbms/src/Storages/Page/V2/PageStorage.cpp
+++ b/dbms/src/Storages/Page/V2/PageStorage.cpp
@@ -160,12 +160,14 @@ PageFileSet PageStorage::listAllPageFiles(const FileProviderPtr & file_provider,
 PageStorage::PageStorage(String name,
                          PSDiskDelegatorPtr delegator_, //
                          const Config & config_,
-                         const FileProviderPtr & file_provider_)
+                         const FileProviderPtr & file_provider_,
+                         bool no_more_insert_)
     : DB::PageStorage(name, delegator_, config_, file_provider_)
     , write_files(std::max(1UL, config_.num_write_slots.get()))
     , page_file_log(&Poco::Logger::get("PageFile"))
     , log(&Poco::Logger::get("PageStorage"))
     , versioned_page_entries(storage_name, config.version_set_config, log)
+    , no_more_insert(no_more_insert_)
 {
     // at least 1 write slots
     config.num_write_slots = std::max(1UL, config.num_write_slots.get());
@@ -405,18 +407,20 @@ DB::PageEntry PageStorage::getEntryImpl(NamespaceId /*ns_id*/, PageId page_id, S
 // - Writable, reuse `old_writer` if it is not a nullptr, otherwise, create a new writer from `page_file`
 // - Not writable, renew the `page_file` and its belonging writer.
 //   The <id,level> of the new `page_file` is <max_id + 1, 0> of all `write_files`
+// If `force` is true, always create new page file for writing.
 PageStorage::WriterPtr PageStorage::checkAndRenewWriter( //
     WritingPageFile & writing_file,
     PageFileIdAndLevel max_page_file_id_lvl_hint,
     const String & parent_path_hint,
     PageStorage::WriterPtr && old_writer,
-    const String & logging_msg)
+    const String & logging_msg,
+    bool force)
 {
     WriterPtr write_file_writer;
 
     PageFile & page_file = writing_file.file;
-    bool is_writable = page_file.isValid() && page_file.getType() == PageFile::Type::Formal //
-        && isPageFileSizeFitsWritable(page_file, config);
+    bool is_writable = (!force && page_file.isValid() && page_file.getType() == PageFile::Type::Formal //
+                        && isPageFileSizeFitsWritable(page_file, config));
     if (is_writable)
     {
         if (old_writer)
@@ -451,6 +455,7 @@ PageStorage::WriterPtr PageStorage::checkAndRenewWriter( //
         PageFileIdAndLevel max_writing_id_lvl{max_page_file_id_lvl_hint};
         for (const auto & wf : write_files)
             max_writing_id_lvl = std::max(max_writing_id_lvl, wf.file.fileIdLevel());
+
         delegator->addPageFileUsedSize( //
             PageFileIdAndLevel(max_writing_id_lvl.first + 1, 0),
             0,
@@ -1086,13 +1091,55 @@ bool PageStorage::gcImpl(bool not_skip, const WriteLimiterPtr & write_limiter, c
         {
             external_pages_remover(external_pages, live_normal_pages);
         }
+
+        // This instance will accept no more write, and we check whether there is any valid page in non writing page file,
+        // If not, it's very possible that all data have been moved to v3, so we try to roll all writing files to make them can be gced
+        if (no_more_insert)
+        {
+            bool has_normal_non_writing_files = false;
+            bool has_non_empty_write_file = false;
+            for (const auto & page_file : page_files)
+            {
+                if (page_file.fileIdLevel() < min_writing_file_id_level)
+                {
+                    if (page_file.getType() == PageFile::Type::Formal)
+                    {
+                        has_normal_non_writing_files = true;
+                    }
+                }
+                else
+                {
+                    // writing files
+                    if (page_file.getDiskSize() > 0)
+                    {
+                        has_non_empty_write_file = true;
+                    }
+                }
+                if (has_normal_non_writing_files && has_non_empty_write_file)
+                    break;
+            }
+            if (!has_normal_non_writing_files && has_non_empty_write_file)
+            {
+                std::unique_lock lock(write_mutex);
+                LOG_FMT_DEBUG(log, "{} No valid pages in non writing page files and the writing page files are not all empty. Try to roll all {} writing page files", storage_name, write_files.size());
+                idle_writers.clear();
+                for (auto & write_file : write_files)
+                {
+                    auto writer = checkAndRenewWriter(write_file, {0, 0}, /*parent_path_hint=*/"", nullptr, "", /*force*/ true);
+                    idle_writers.emplace_back(std::move(writer));
+                }
+            }
+        }
     };
 
     GCType gc_type = GCType::Normal;
     // Ignore page files that maybe writing to.
     do
     {
-        if (not_skip) // For page_storage_ctl, don't skip the GC
+        // Don't skip the GC under the case:
+        //  1. For page_storage_ctl
+        //  2. After transform all data from v2 to v3
+        if (not_skip)
         {
             gc_type = GCType::Normal;
             break;
@@ -1103,6 +1150,10 @@ bool PageStorage::gcImpl(bool not_skip, const WriteLimiterPtr & write_limiter, c
             // If only few page files, running gc is useless.
             gc_type = GCType::Skip;
         }
+        else if (no_more_insert)
+        {
+            gc_type = GCType::Normal;
+        }
         else if (last_gc_statistics.equals(statistics_snapshot))
         {
             // No write since last gc. Give it a chance for running GC, ensure that we are able to
diff --git a/dbms/src/Storages/Page/V2/PageStorage.h b/dbms/src/Storages/Page/V2/PageStorage.h
index b85bc2ff355..1fcb76885bb 100644
--- a/dbms/src/Storages/Page/V2/PageStorage.h
+++ b/dbms/src/Storages/Page/V2/PageStorage.h
@@ -88,7 +88,8 @@ class PageStorage : public DB::PageStorage
     PageStorage(String name,
                 PSDiskDelegatorPtr delegator, //
                 const Config & config_,
-                const FileProviderPtr & file_provider_);
+                const FileProviderPtr & file_provider_,
+                bool no_more_insert_ = false);
     ~PageStorage() = default;
 
     void restore() override;
@@ -272,13 +273,17 @@ class PageStorage : public DB::PageStorage
 
     StatisticsInfo last_gc_statistics;
 
+    // true means this instance runs under mix mode
+    bool no_more_insert = false;
+
 private:
     WriterPtr checkAndRenewWriter(
         WritingPageFile & writing_file,
         PageFileIdAndLevel max_page_file_id_lvl_hint,
         const String & parent_path_hint,
         WriterPtr && old_writer = nullptr,
-        const String & logging_msg = "");
+        const String & logging_msg = "",
+        bool force = false);
 };
 
 } // namespace PS::V2
diff --git a/dbms/src/Storages/PathPool.cpp b/dbms/src/Storages/PathPool.cpp
index 0ae42ec7313..2e7edd7435b 100644
--- a/dbms/src/Storages/PathPool.cpp
+++ b/dbms/src/Storages/PathPool.cpp
@@ -182,6 +182,52 @@ StoragePathPool & StoragePathPool::operator=(const StoragePathPool & rhs)
     return *this;
 }
 
+bool StoragePathPool::createPSV2DeleteMarkFile()
+{
+    try
+    {
+        return Poco::File(getPSV2DeleteMarkFilePath()).createFile();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
+        return false;
+    }
+}
+
+bool StoragePathPool::isPSV2Deleted() const
+{
+    return Poco::File(getPSV2DeleteMarkFilePath()).exists();
+}
+
+void StoragePathPool::clearPSV2ObsoleteData()
+{
+    std::lock_guard lock{mutex};
+    auto drop_instance_data = [&](const String & prefix) {
+        for (auto & path_info : latest_path_infos)
+        {
+            try
+            {
+                auto path = fmt::format("{}/{}", path_info.path, prefix);
+                if (Poco::File dir(path); dir.exists())
+                {
+                    // This function is used to clean obsolete data in ps v2 instance at restart,
+                    // so no need to update global_capacity here.
+                    LOG_FMT_INFO(log, "Begin to drop obsolete data[dir={}]", path);
+                    file_provider->deleteDirectory(dir.path(), false, true);
+                }
+            }
+            catch (...)
+            {
+                tryLogCurrentException(log);
+            }
+        }
+    };
+    drop_instance_data("meta");
+    drop_instance_data("log");
+    drop_instance_data("data");
+}
+
 void StoragePathPool::rename(const String & new_database, const String & new_table, bool clean_rename)
 {
     if (unlikely(new_database.empty() || new_table.empty()))
@@ -311,6 +357,11 @@ void StoragePathPool::renamePath(const String & old_path, const String & new_pat
         LOG_FMT_WARNING(log, "Path \"{}\" is missed.", old_path);
 }
 
+String StoragePathPool::getPSV2DeleteMarkFilePath() const
+{
+    return fmt::format("{}/{}", latest_path_infos[0].path, "PS_V2_DELETED");
+}
+
 //==========================================================================================
 // Generic functions
 //==========================================================================================
diff --git a/dbms/src/Storages/PathPool.h b/dbms/src/Storages/PathPool.h
index 40a3cb9a636..4f16511feff 100644
--- a/dbms/src/Storages/PathPool.h
+++ b/dbms/src/Storages/PathPool.h
@@ -406,6 +406,12 @@ class StoragePathPool
     // Those paths are generated from the first path of `latest_path_infos` and `prefix`
     PSDiskDelegatorPtr getPSDiskDelegatorSingle(const String & prefix) { return std::make_shared<PSDiskDelegatorSingle>(*this, prefix); }
 
+    bool createPSV2DeleteMarkFile();
+
+    bool isPSV2Deleted() const;
+
+    void clearPSV2ObsoleteData();
+
     void rename(const String & new_database, const String & new_table, bool clean_rename);
 
     void drop(bool recursive, bool must_success = true);
@@ -415,6 +421,8 @@ class StoragePathPool
 
     void renamePath(const String & old_path, const String & new_path);
 
+    String getPSV2DeleteMarkFilePath() const;
+
 private:
     using DMFilePathMap = std::unordered_map<UInt64, UInt32>;
     struct MainPathInfo
diff --git a/dbms/src/Storages/SelectQueryInfo.cpp b/dbms/src/Storages/SelectQueryInfo.cpp
index 075c0ad0631..9b5dbf0d30f 100644
--- a/dbms/src/Storages/SelectQueryInfo.cpp
+++ b/dbms/src/Storages/SelectQueryInfo.cpp
@@ -29,6 +29,7 @@ SelectQueryInfo::SelectQueryInfo(const SelectQueryInfo & rhs)
     , mvcc_query_info(rhs.mvcc_query_info != nullptr ? std::make_unique<MvccQueryInfo>(*rhs.mvcc_query_info) : nullptr)
     , dag_query(rhs.dag_query != nullptr ? std::make_unique<DAGQueryInfo>(*rhs.dag_query) : nullptr)
     , req_id(rhs.req_id)
+    , keep_order(rhs.keep_order)
 {}
 
 SelectQueryInfo::SelectQueryInfo(SelectQueryInfo && rhs) noexcept
@@ -37,6 +38,7 @@ SelectQueryInfo::SelectQueryInfo(SelectQueryInfo && rhs) noexcept
     , mvcc_query_info(std::move(rhs.mvcc_query_info))
     , dag_query(std::move(rhs.dag_query))
     , req_id(std::move(rhs.req_id))
+    , keep_order(rhs.keep_order)
 {}
 
 } // namespace DB
diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h
index d49d4c831d1..0807647aa26 100644
--- a/dbms/src/Storages/SelectQueryInfo.h
+++ b/dbms/src/Storages/SelectQueryInfo.h
@@ -51,7 +51,7 @@ struct SelectQueryInfo
     std::unique_ptr<DAGQueryInfo> dag_query;
 
     std::string req_id;
-
+    bool keep_order = true;
 
     SelectQueryInfo();
     ~SelectQueryInfo();
diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp
index c39a882d53c..1bf3e7c1b73 100644
--- a/dbms/src/Storages/StorageDeltaMerge.cpp
+++ b/dbms/src/Storages/StorageDeltaMerge.cpp
@@ -632,6 +632,7 @@ BlockInputStreams StorageDeltaMerge::read(
             context.getSettingsRef(),
             columns_to_read,
             num_streams,
+            query_info.keep_order,
             parseSegmentSet(select_query.segment_expression_list),
             extra_table_id_index);
     }
@@ -756,6 +757,7 @@ BlockInputStreams StorageDeltaMerge::read(
         /*max_version=*/mvcc_query_info.read_tso,
         rs_operator,
         query_info.req_id,
+        query_info.keep_order,
         /* is_fast_mode */ tidb_table_info.tiflash_mode == TiDB::TiFlashMode::Fast, // read in normal mode or read in fast mode
         max_block_size,
         parseSegmentSet(select_query.segment_expression_list),
@@ -838,7 +840,8 @@ size_t getRows(DM::DeltaMergeStorePtr & store, const Context & context, const DM
         1,
         std::numeric_limits<UInt64>::max(),
         EMPTY_FILTER,
-        /*tracing_id*/ "getRows")[0];
+        /*tracing_id*/ "getRows",
+        /*keep_order*/ false)[0];
     stream->readPrefix();
     Block block;
     while ((block = stream->read()))
@@ -863,7 +866,8 @@ DM::RowKeyRange getRange(DM::DeltaMergeStorePtr & store, const Context & context
             1,
             std::numeric_limits<UInt64>::max(),
             EMPTY_FILTER,
-            /*tracing_id*/ "getRange")[0];
+            /*tracing_id*/ "getRange",
+            /*keep_order*/ false)[0];
         stream->readPrefix();
         Block block;
         size_t index = 0;
diff --git a/dbms/src/Storages/Transaction/Collator.cpp b/dbms/src/Storages/Transaction/Collator.cpp
index 1b0221a6829..fc40701e3c5 100644
--- a/dbms/src/Storages/Transaction/Collator.cpp
+++ b/dbms/src/Storages/Transaction/Collator.cpp
@@ -13,9 +13,9 @@
 // limitations under the License.
 
 #include <Common/Exception.h>
-#include <Functions/CollationOperatorOptimized.h>
 #include <Poco/String.h>
 #include <Storages/Transaction/Collator.h>
+#include <Storages/Transaction/CollatorUtils.h>
 
 #include <array>
 
@@ -92,15 +92,12 @@ class Pattern : public ITiDBCollator::IPattern
                 tp = MatchType::Match;
                 if (offset < pattern.length())
                 {
-                    auto old_offset = offset;
-                    c = Collator::decodeChar(pattern.data(), old_offset);
-                    if (c == escape || c == '_' || c == '%')
-                        offset = old_offset;
-                    else
-                    {
-                        assert(escape >= 0);
-                        c = static_cast<decltype(c)>(escape); // NOLINT(bugprone-signed-char-misuse)
-                    }
+                    // use next char to match
+                    c = Collator::decodeChar(pattern.data(), offset);
+                }
+                else
+                {
+                    // use `escape` to match
                 }
             }
             else if (c == '_')
@@ -186,22 +183,12 @@ class BinCollator final : public ITiDBCollator
 
     int compare(const char * s1, size_t length1, const char * s2, size_t length2) const override
     {
-        if constexpr (padding)
-            return DB::RtrimStrCompare({s1, length1}, {s2, length2});
-        else
-            return DB::RawStrCompare({s1, length1}, {s2, length2});
+        return DB::BinCollatorCompare<padding>(s1, length1, s2, length2);
     }
 
     StringRef sortKey(const char * s, size_t length, std::string &) const override
     {
-        if constexpr (padding)
-        {
-            return StringRef(rtrim(s, length));
-        }
-        else
-        {
-            return StringRef(s, length);
-        }
+        return DB::BinCollatorSortKey<padding>(s, length);
     }
 
     std::unique_ptr<IPattern> pattern() const override { return std::make_unique<Pattern<BinCollator<T, padding>>>(); }
@@ -593,6 +580,9 @@ TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id)
 {
     switch (id)
     {
+    case ITiDBCollator::UTF8MB4_BIN:
+        static const auto utf8mb4_collator = UTF8MB4_BIN_TYPE(UTF8MB4_BIN);
+        return &utf8mb4_collator;
     case ITiDBCollator::BINARY:
         static const auto binary_collator = BinCollator<char, false>(BINARY);
         return &binary_collator;
@@ -602,9 +592,6 @@ TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id)
     case ITiDBCollator::LATIN1_BIN:
         static const auto latin1_collator = BinCollator<char, true>(LATIN1_BIN);
         return &latin1_collator;
-    case ITiDBCollator::UTF8MB4_BIN:
-        static const auto utf8mb4_collator = UTF8MB4_BIN_TYPE(UTF8MB4_BIN);
-        return &utf8mb4_collator;
     case ITiDBCollator::UTF8_BIN:
         static const auto utf8_collator = UTF8MB4_BIN_TYPE(UTF8_BIN);
         return &utf8_collator;
diff --git a/dbms/src/Storages/Transaction/CollatorUtils.h b/dbms/src/Storages/Transaction/CollatorUtils.h
new file mode 100644
index 00000000000..3f318a5b700
--- /dev/null
+++ b/dbms/src/Storages/Transaction/CollatorUtils.h
@@ -0,0 +1,96 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <common/StringRef.h>
+#include <common/defines.h>
+
+#include <memory>
+
+#define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure))
+
+namespace DB
+{
+
+template <typename T>
+ALWAYS_INLINE inline int signum(T val)
+{
+    return (0 < val) - (val < 0);
+}
+
+// Check equality is much faster than other comparison.
+// - check size first
+// - return 0 if equal else 1
+FLATTEN_INLINE_PURE inline int RawStrEqualCompare(const std::string_view & lhs, const std::string_view & rhs)
+{
+    return StringRef(lhs) == StringRef(rhs) ? 0 : 1;
+}
+
+// Compare str view by memcmp
+FLATTEN_INLINE_PURE inline int RawStrCompare(const std::string_view & v1, const std::string_view & v2)
+{
+    return v1.compare(v2);
+}
+
+constexpr char SPACE = ' ';
+
+FLATTEN_INLINE_PURE inline std::string_view RightTrimRaw(const std::string_view & v)
+{
+    size_t end = v.find_last_not_of(SPACE);
+    return end == std::string_view::npos ? std::string_view{} : std::string_view(v.data(), end + 1);
+}
+
+// Remove tail space
+FLATTEN_INLINE_PURE inline std::string_view RightTrim(const std::string_view & v)
+{
+    if (likely(v.empty() || v.back() != SPACE))
+        return v;
+    return RightTrimRaw(v);
+}
+
+FLATTEN_INLINE_PURE inline std::string_view RightTrimNoEmpty(const std::string_view & v)
+{
+    if (likely(v.back() != SPACE))
+        return v;
+    return RightTrimRaw(v);
+}
+
+FLATTEN_INLINE_PURE inline int RtrimStrCompare(const std::string_view & va, const std::string_view & vb)
+{
+    return RawStrCompare(RightTrim(va), RightTrim(vb));
+}
+
+template <bool padding>
+FLATTEN_INLINE_PURE inline int BinCollatorCompare(const char * s1, size_t length1, const char * s2, size_t length2)
+{
+    if constexpr (padding)
+        return DB::RtrimStrCompare({s1, length1}, {s2, length2});
+    else
+        return DB::RawStrCompare({s1, length1}, {s2, length2});
+}
+
+template <bool padding>
+FLATTEN_INLINE_PURE inline StringRef BinCollatorSortKey(const char * s, size_t length)
+{
+    if constexpr (padding)
+    {
+        return StringRef(RightTrim({s, length}));
+    }
+    else
+    {
+        return StringRef(s, length);
+    }
+}
+} // namespace DB
diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp
index fb31e4476bb..c1394746e1a 100644
--- a/dbms/src/Storages/Transaction/KVStore.cpp
+++ b/dbms/src/Storages/Transaction/KVStore.cpp
@@ -410,20 +410,12 @@ EngineStoreApplyRes KVStore::handleUselessAdminRaftCmd(
 
     curr_region.handleWriteRaftCmd({}, index, term, tmt);
 
-    const auto check_sync_log = [&]() {
-        if (cmd_type != raft_cmdpb::AdminCmdType::CompactLog)
-        {
-            // ignore ComputeHash, VerifyHash or other useless cmd.
-            return false;
-        }
-        else
-        {
-            return canFlushRegionDataImpl(curr_region_ptr, true, /* try_until_succeed */ false, tmt, region_task_lock);
-        }
-    };
-
-    if (check_sync_log())
+    if (cmd_type == raft_cmdpb::AdminCmdType::CompactLog)
     {
+        // Before CompactLog, we ought to make sure all data of this region are persisted.
+        // So proxy will firstly call an FFI `fn_try_flush_data` to trigger a attempt to flush data on TiFlash's side.
+        // If the attempt fails, Proxy will filter execution of this CompactLog, which means every CompactLog observed by TiFlash can ALWAYS succeed now.
+        // ref. https://github.com/pingcap/tidb-engine-ext/blob/e83a37d2d8d8ae1778fe279c5f06a851f8c9e56a/components/raftstore/src/engine_store_ffi/observer.rs#L175
         return EngineStoreApplyRes::Persist;
     }
     return EngineStoreApplyRes::None;
diff --git a/dbms/src/Storages/Transaction/RegionPersister.cpp b/dbms/src/Storages/Transaction/RegionPersister.cpp
index 7ce52c6caa1..8aae893efdc 100644
--- a/dbms/src/Storages/Transaction/RegionPersister.cpp
+++ b/dbms/src/Storages/Transaction/RegionPersister.cpp
@@ -189,7 +189,7 @@ void RegionPersister::forceTransformKVStoreV2toV3()
         const auto & page_transform_entry = page_reader->getPageEntry(page.page_id);
         if (!page_transform_entry.field_offsets.empty())
         {
-            throw Exception(fmt::format("Can't transfrom kvstore from V2 to V3, [page_id={}] {}",
+            throw Exception(fmt::format("Can't transform kvstore from V2 to V3, [page_id={}] {}",
                                         page.page_id,
                                         page_transform_entry.toDebugString()),
                             ErrorCodes::LOGICAL_ERROR);
@@ -276,10 +276,12 @@ RegionMap RegionPersister::restore(const TiFlashRaftProxyHelper * proxy_helper,
         }
         case PageStorageRunMode::MIX_MODE:
         {
+            // The ps v2 instance will be destroyed soon after transform its data to v3,
+            // so we can safely use some aggressive gc config for it.
             auto page_storage_v2 = std::make_shared<PS::V2::PageStorage>(
                 "RegionPersister",
                 delegator,
-                config,
+                PageStorage::getEasyGCConfig(),
                 provider);
             // V3 should not used getPSDiskDelegatorRaft
             // Because V2 will delete all invalid(unrecognized) file when it restore
@@ -300,7 +302,7 @@ RegionMap RegionPersister::restore(const TiFlashRaftProxyHelper * proxy_helper,
                 LOG_FMT_INFO(log, "Current kvstore transform to V3 begin [pages_before_transform={}]", kvstore_remain_pages);
                 forceTransformKVStoreV2toV3();
                 const auto & kvstore_remain_pages_after_transform = page_storage_v2->getNumberOfPages();
-                LOG_FMT_INFO(log, "Current kvstore transfrom to V3 finished. [ns_id={}] [done={}] [pages_before_transform={}] [pages_after_transform={}]", //
+                LOG_FMT_INFO(log, "Current kvstore transform to V3 finished. [ns_id={}] [done={}] [pages_before_transform={}] [pages_after_transform={}]", //
                              ns_id,
                              kvstore_remain_pages_after_transform == 0,
                              kvstore_remain_pages,
@@ -313,8 +315,10 @@ RegionMap RegionPersister::restore(const TiFlashRaftProxyHelper * proxy_helper,
             }
             else // no need do transform
             {
-                LOG_FMT_INFO(log, "Current kvstore translate already done before restored.");
+                LOG_FMT_INFO(log, "Current kvstore transform already done before restored.");
             }
+            // running gc on v2 to decrease its disk space usage
+            page_storage_v2->gcImpl(/*not_skip=*/true, nullptr, nullptr);
 
             // change run_mode to ONLY_V3
             page_storage_v2 = nullptr;
diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp
index 77aab06f6cf..e157d711f1d 100644
--- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp
+++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp
@@ -46,6 +46,7 @@ class RegionKVStoreTest : public ::testing::Test
     static void testKVStore();
     static void testRegion();
     static void testReadIndex();
+    static void testNewProxy();
 
 private:
     static void testRaftSplit(KVStore & kvs, TMTContext & tmt);
@@ -54,6 +55,66 @@ class RegionKVStoreTest : public ::testing::Test
     static void testRaftMergeRollback(KVStore & kvs, TMTContext & tmt);
 };
 
+void RegionKVStoreTest::testNewProxy()
+{
+    std::string path = TiFlashTestEnv::getTemporaryPath("/region_kvs_tmp") + "/basic";
+
+    Poco::File file(path);
+    if (file.exists())
+        file.remove(true);
+    file.createDirectories();
+
+    auto ctx = TiFlashTestEnv::getContext(
+        DB::Settings(),
+        Strings{
+            path,
+        });
+    KVStore & kvs = *ctx.getTMTContext().getKVStore();
+    MockRaftStoreProxy proxy_instance;
+    TiFlashRaftProxyHelper proxy_helper;
+    {
+        proxy_helper = MockRaftStoreProxy::SetRaftStoreProxyFFIHelper(RaftStoreProxyPtr{&proxy_instance});
+        proxy_instance.init(100);
+    }
+    kvs.restore(&proxy_helper);
+    {
+        auto store = metapb::Store{};
+        store.set_id(1234);
+        kvs.setStore(store);
+        ASSERT_EQ(kvs.getStoreID(), store.id());
+    }
+    {
+        ASSERT_EQ(kvs.getRegion(0), nullptr);
+        auto task_lock = kvs.genTaskLock();
+        auto lock = kvs.genRegionWriteLock(task_lock);
+        {
+            auto region = makeRegion(1, RecordKVFormat::genKey(1, 0), RecordKVFormat::genKey(1, 10));
+            lock.regions.emplace(1, region);
+            lock.index.add(region);
+        }
+    }
+    {
+        kvs.tryPersist(1);
+        kvs.gcRegionPersistedCache(Seconds{0});
+    }
+    {
+        // test CompactLog
+        raft_cmdpb::AdminRequest request;
+        raft_cmdpb::AdminResponse response;
+        auto region = kvs.getRegion(1);
+        region->markCompactLog();
+        kvs.setRegionCompactLogConfig(100000, 1000, 1000);
+        request.mutable_compact_log();
+        request.set_cmd_type(::raft_cmdpb::AdminCmdType::CompactLog);
+        // CompactLog always returns true now, even if we can't do a flush.
+        // We use a tryFlushData to pre-filter.
+        ASSERT_EQ(kvs.handleAdminRaftCmd(std::move(request), std::move(response), 1, 5, 1, ctx.getTMTContext()), EngineStoreApplyRes::Persist);
+
+        // Filter
+        ASSERT_EQ(kvs.tryFlushRegionData(1, false, ctx.getTMTContext()), false);
+    }
+}
+
 void RegionKVStoreTest::testReadIndex()
 {
     std::string path = TiFlashTestEnv::getTemporaryPath("/region_kvs_tmp") + "/basic";
@@ -968,20 +1029,17 @@ void RegionKVStoreTest::testKVStore()
 
         request.mutable_compact_log();
         request.set_cmd_type(::raft_cmdpb::AdminCmdType::CompactLog);
-
-        raft_cmdpb::AdminRequest first_request = request;
         raft_cmdpb::AdminResponse first_response = response;
-
-        ASSERT_EQ(kvs.handleAdminRaftCmd(std::move(first_request), std::move(first_response), 7, 22, 6, ctx.getTMTContext()), EngineStoreApplyRes::Persist);
+        ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(first_response), 7, 22, 6, ctx.getTMTContext()), EngineStoreApplyRes::Persist);
 
         raft_cmdpb::AdminResponse second_response = response;
-        ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(second_response), 7, 23, 6, ctx.getTMTContext()), EngineStoreApplyRes::None);
-        request.set_cmd_type(::raft_cmdpb::AdminCmdType::ComputeHash);
+        ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(second_response), 7, 23, 6, ctx.getTMTContext()), EngineStoreApplyRes::Persist);
 
+        request.set_cmd_type(::raft_cmdpb::AdminCmdType::ComputeHash);
         raft_cmdpb::AdminResponse third_response = response;
         ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(third_response), 7, 24, 6, ctx.getTMTContext()), EngineStoreApplyRes::None);
-        request.set_cmd_type(::raft_cmdpb::AdminCmdType::VerifyHash);
 
+        request.set_cmd_type(::raft_cmdpb::AdminCmdType::VerifyHash);
         raft_cmdpb::AdminResponse fourth_response = response;
         ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(fourth_response), 7, 25, 6, ctx.getTMTContext()), EngineStoreApplyRes::None);
 
@@ -1422,5 +1480,12 @@ try
 }
 CATCH
 
+TEST_F(RegionKVStoreTest, NewProxy)
+try
+{
+    testNewProxy();
+}
+CATCH
+
 } // namespace tests
 } // namespace DB
diff --git a/dbms/src/Storages/Transaction/tests/gtest_tidb_collator.cpp b/dbms/src/Storages/Transaction/tests/gtest_tidb_collator.cpp
index 9a6dae3db08..13c51dba2db 100644
--- a/dbms/src/Storages/Transaction/tests/gtest_tidb_collator.cpp
+++ b/dbms/src/Storages/Transaction/tests/gtest_tidb_collator.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <Storages/Transaction/Collator.h>
+#include <Storages/Transaction/CollatorUtils.h>
 #include <gtest/gtest.h>
 
 namespace DB::tests
@@ -66,59 +67,45 @@ const typename CollatorCases::SortKeyCase CollatorCases::sk_cases[] = {
     {"a", {PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x00\x41"), PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x0e\x33")}},
     {"A", {PREVENT_TRUNC("\x41"), PREVENT_TRUNC("\x41"), PREVENT_TRUNC("\x00\x41"), PREVENT_TRUNC("\x41"), PREVENT_TRUNC("\x0e\x33")}},
     {"😃",
-        {PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xff\xfd"), PREVENT_TRUNC("\xf0\x9f\x98\x83"),
-            PREVENT_TRUNC("\xff\xfd")}},
+     {PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xff\xfd"), PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xff\xfd")}},
     {"Foo © bar 𝌆 baz ☃ qux",
-        {PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"),
-            PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"),
-            PREVENT_TRUNC("\x00\x46\x00\x4f\x00\x4f\x00\x20\x00\xa9\x00\x20\x00\x42\x00\x41\x00\x52\x00\x20\xff\xfd\x00\x20\x00\x42\x00\x41"
-                          "\x00\x5a\x00\x20\x26\x03\x00\x20\x00\x51\x00\x55\x00\x58"),
-            PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"),
-            PREVENT_TRUNC("\x0E\xB9\x0F\x82\x0F\x82\x02\x09\x02\xC5\x02\x09\x0E\x4A\x0E\x33\x0F\xC0\x02\x09\xFF\xFD\x02\x09\x0E\x4A\x0E\x33"
-                          "\x10\x6A\x02\x09\x06\xFF\x02\x09\x0F\xB4\x10\x1F\x10\x5A")}},
+     {PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"),
+      PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"),
+      PREVENT_TRUNC("\x00\x46\x00\x4f\x00\x4f\x00\x20\x00\xa9\x00\x20\x00\x42\x00\x41\x00\x52\x00\x20\xff\xfd\x00\x20\x00\x42\x00\x41"
+                    "\x00\x5a\x00\x20\x26\x03\x00\x20\x00\x51\x00\x55\x00\x58"),
+      PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"),
+      PREVENT_TRUNC("\x0E\xB9\x0F\x82\x0F\x82\x02\x09\x02\xC5\x02\x09\x0E\x4A\x0E\x33\x0F\xC0\x02\x09\xFF\xFD\x02\x09\x0E\x4A\x0E\x33"
+                    "\x10\x6A\x02\x09\x06\xFF\x02\x09\x0F\xB4\x10\x1F\x10\x5A")}},
     {"a ", {PREVENT_TRUNC("\x61\x20"), PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x00\x41"), PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x0e\x33")}},
     {"", {PREVENT_TRUNC(""), PREVENT_TRUNC(""), PREVENT_TRUNC(""), PREVENT_TRUNC(""), PREVENT_TRUNC("")}},
     {"ß",
-        {PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\x00\x53"), PREVENT_TRUNC("\xc3\x9f"),
-            PREVENT_TRUNC("\x0F\xEA\x0F\xEA")}},
+     {PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\x00\x53"), PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\x0F\xEA\x0F\xEA")}},
 };
 const typename CollatorCases::PatternCase CollatorCases::pattern_cases[] = {
     {"A",
-        {{"a", {false, false, true, false, true}}, {"A", {true, true, true, true, true}}, {"À", {false, false, true, false, true}},
-            {"", {false, false, false, false, false}}}},
+     {{"a", {false, false, true, false, true}}, {"A", {true, true, true, true, true}}, {"À", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}},
     {"_A",
-        {{"aA", {true, true, true, true, true}}, {"ÀA", {false, false, true, true, true}}, {"ÀÀ", {false, false, true, false, true}},
-            {"", {false, false, false, false, false}}}},
+     {{"aA", {true, true, true, true, true}}, {"ÀA", {false, false, true, true, true}}, {"ÀÀ", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}},
     {"%A",
-        {{"a", {false, false, true, false, true}}, {"ÀA", {true, true, true, true, true}}, {"À", {false, false, true, false, true}},
-            {"", {false, false, false, false, false}}}},
+     {{"a", {false, false, true, false, true}}, {"ÀA", {true, true, true, true, true}}, {"À", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}},
     {"À",
-        {{"a", {false, false, true, false, true}}, {"A", {false, false, true, false, true}}, {"À", {true, true, true, true, true}},
-            {"", {false, false, false, false, false}}}},
+     {{"a", {false, false, true, false, true}}, {"A", {false, false, true, false, true}}, {"À", {true, true, true, true, true}}, {"", {false, false, false, false, false}}}},
     {"_À",
-        {{" À", {true, true, true, true, true}}, {"ÀA", {false, false, true, false, true}}, {"ÀÀ", {false, false, true, true, true}},
-            {"", {false, false, false, false, false}}}},
+     {{" À", {true, true, true, true, true}}, {"ÀA", {false, false, true, false, true}}, {"ÀÀ", {false, false, true, true, true}}, {"", {false, false, false, false, false}}}},
     {"%À",
-        {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"ÀA", {false, false, true, false, true}},
-            {"", {false, false, false, false, false}}}},
+     {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"ÀA", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}},
     {"À_",
-        {{"À ", {true, true, true, true, true}}, {"ÀAA", {false, false, false, false, false}}, {"À", {false, false, false, false, false}},
-            {"", {false, false, false, false, false}}}},
+     {{"À ", {true, true, true, true, true}}, {"ÀAA", {false, false, false, false, false}}, {"À", {false, false, false, false, false}}, {"", {false, false, false, false, false}}}},
     {"À%",
-        {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"AÀ", {false, false, true, false, true}},
-            {"", {false, false, false, false, false}}}},
+     {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"AÀ", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}},
     {"",
-        {{"À", {false, false, false, false, false}}, {"ÀÀÀ", {false, false, false, false, false}},
-            {"AÀ", {false, false, false, false, false}}, {"", {true, true, true, true, true}}}},
+     {{"À", {false, false, false, false, false}}, {"ÀÀÀ", {false, false, false, false, false}}, {"AÀ", {false, false, false, false, false}}, {"", {true, true, true, true, true}}}},
     {"%",
-        {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"AÀ", {true, true, true, true, true}},
-            {"", {true, true, true, true, true}}}},
+     {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"AÀ", {true, true, true, true, true}}, {"", {true, true, true, true, true}}}},
     {"a_%À",
-        {{"ÀÀ", {false, false, false, false, false}}, {"aÀÀ", {true, true, true, true, true}}, {"ÀÀÀÀ", {false, false, true, false, true}},
-            {"ÀÀÀa", {false, false, true, false, true}}}},
+     {{"ÀÀ", {false, false, false, false, false}}, {"aÀÀ", {true, true, true, true, true}}, {"ÀÀÀÀ", {false, false, true, false, true}}, {"ÀÀÀa", {false, false, true, false, true}}}},
     {"À%_a",
-        {{"ÀÀ", {false, false, false, false, false}}, {"aÀÀ", {false, false, true, false, true}}, {"ÀÀÀa", {true, true, true, true, true}},
-            {"aÀÀÀ", {false, false, true, false, true}}}},
+     {{"ÀÀ", {false, false, false, false, false}}, {"aÀÀ", {false, false, true, false, true}}, {"ÀÀÀa", {true, true, true, true, true}}, {"aÀÀÀ", {false, false, true, false, true}}}},
     {"___a", {{"中a", {true, true, false, false, false}}, {"中文字a", {false, false, true, true, true}}}},
     {"𐐭", {{"𐐨", {false, false, true, false, false}}}},
 };
@@ -133,7 +120,7 @@ void testCollator()
         const std::string & s2 = std::get<1>(c);
         int ans = std::get<Collator::collation_case>(std::get<2>(c));
         std::cout << "Compare case (" << s1 << ", " << s2 << ", " << ans << ")" << std::endl;
-        ASSERT_EQ(collator->compare(s1.data(), s1.length(), s2.data(), s2.length()), ans);
+        ASSERT_EQ(signum((collator->compare(s1.data(), s1.length(), s2.data(), s2.length()))), ans);
     }
     for (const auto & c : CollatorCases::sk_cases)
     {
@@ -189,14 +176,29 @@ struct UnicodeCICollator
     static constexpr auto collation_case = CollatorCases::UnicodeCI;
 };
 
-TEST(CollatorSuite, BinCollator) { testCollator<BinCollator>(); }
+TEST(CollatorSuite, BinCollator)
+{
+    testCollator<BinCollator>();
+}
 
-TEST(CollatorSuite, BinPaddingCollator) { testCollator<BinPaddingCollator>(); }
+TEST(CollatorSuite, BinPaddingCollator)
+{
+    testCollator<BinPaddingCollator>();
+}
 
-TEST(CollatorSuite, Utf8BinPaddingCollator) { testCollator<Utf8BinPaddingCollator>(); }
+TEST(CollatorSuite, Utf8BinPaddingCollator)
+{
+    testCollator<Utf8BinPaddingCollator>();
+}
 
-TEST(CollatorSuite, GeneralCICollator) { testCollator<GeneralCICollator>(); }
+TEST(CollatorSuite, GeneralCICollator)
+{
+    testCollator<GeneralCICollator>();
+}
 
-TEST(CollatorSuite, UnicodeCICollator) { testCollator<UnicodeCICollator>(); }
+TEST(CollatorSuite, UnicodeCICollator)
+{
+    testCollator<UnicodeCICollator>();
+}
 
 } // namespace DB::tests
diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h
index 54fed31f88d..01c923f3d7e 100644
--- a/dbms/src/TestUtils/ExecutorTestUtils.h
+++ b/dbms/src/TestUtils/ExecutorTestUtils.h
@@ -30,6 +30,14 @@ ::testing::AssertionResult check_columns_equality(const ColumnsWithTypeAndName &
 
 DB::ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream);
 
+#define WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN \
+    std::vector<bool> bools{false, true}; \
+    for (auto flag : bools)               \
+    {                                     \
+        enablePlanner(flag);
+
+#define WRAP_FOR_DIS_ENABLE_PLANNER_END }
+
 class ExecutorTest : public ::testing::Test
 {
 protected:
@@ -51,16 +59,7 @@ class ExecutorTest : public ::testing::Test
 
     DAGContext & getDAGContext();
 
-    /// for planner
     void enablePlanner(bool is_enable);
-    template <typename FF>
-    void wrapForDisEnablePlanner(FF && ff)
-    {
-        enablePlanner(false);
-        ff();
-        enablePlanner(true);
-        ff();
-    }
 
     static void dagRequestEqual(const String & expected_string, const std::shared_ptr<tipb::DAGRequest> & actual);
 
diff --git a/dbms/src/TestUtils/gtests_dbms_main.cpp b/dbms/src/TestUtils/gtests_dbms_main.cpp
index 26c456b5b31..9c53ccb9084 100644
--- a/dbms/src/TestUtils/gtests_dbms_main.cpp
+++ b/dbms/src/TestUtils/gtests_dbms_main.cpp
@@ -13,6 +13,9 @@
 // limitations under the License.
 
 #include <Common/FailPoint.h>
+#include <Storages/DeltaMerge/ReadThread/ColumnSharingCache.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReadTaskScheduler.h>
+#include <Storages/DeltaMerge/ReadThread/SegmentReader.h>
 #include <TestUtils/TiFlashTestBasic.h>
 
 namespace DB::FailPoints
@@ -25,6 +28,10 @@ int main(int argc, char ** argv)
 {
     DB::tests::TiFlashTestEnv::setupLogger();
     DB::tests::TiFlashTestEnv::initializeGlobalContext();
+    DB::ServerInfo server_info;
+    DB::DM::SegmentReaderPoolManager::instance().init(server_info);
+    DB::DM::SegmentReadTaskScheduler::instance();
+    DB::DM::DMFileReaderPool::instance();
 
 #ifdef FIU_ENABLE
     fiu_init(0); // init failpoint
diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp
index cc8160761e6..c5c25d1adff 100644
--- a/dbms/src/TestUtils/mockExecutor.cpp
+++ b/dbms/src/TestUtils/mockExecutor.cpp
@@ -270,8 +270,10 @@ DAGRequestBuilder & DAGRequestBuilder::aggregation(ASTPtr agg_func, ASTPtr group
 {
     auto agg_funcs = std::make_shared<ASTExpressionList>();
     auto group_by_exprs = std::make_shared<ASTExpressionList>();
-    agg_funcs->children.push_back(agg_func);
-    group_by_exprs->children.push_back(group_by_expr);
+    if (agg_func)
+        agg_funcs->children.push_back(agg_func);
+    if (group_by_expr)
+        group_by_exprs->children.push_back(group_by_expr);
     return buildAggregation(agg_funcs, group_by_exprs);
 }
 
diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h
index db0dbef929e..317b2e362fb 100644
--- a/dbms/src/TestUtils/mockExecutor.h
+++ b/dbms/src/TestUtils/mockExecutor.h
@@ -176,8 +176,13 @@ MockWindowFrame buildDefaultRowsFrame();
 #define And(expr1, expr2) makeASTFunction("and", (expr1), (expr2))
 #define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2))
 #define NOT(expr) makeASTFunction("not", (expr))
+
+// Aggregation functions
 #define Max(expr) makeASTFunction("max", (expr))
+#define Min(expr) makeASTFunction("min", (expr))
+#define Count(expr) makeASTFunction("count", (expr))
 #define Sum(expr) makeASTFunction("sum", (expr))
+
 /// Window functions
 #define RowNumber() makeASTFunction("RowNumber")
 #define Rank() makeASTFunction("Rank")
diff --git a/dbms/src/TiDB/Schema/SchemaBuilder.cpp b/dbms/src/TiDB/Schema/SchemaBuilder.cpp
index 93a36d8e3ee..3486895d7f2 100644
--- a/dbms/src/TiDB/Schema/SchemaBuilder.cpp
+++ b/dbms/src/TiDB/Schema/SchemaBuilder.cpp
@@ -1323,6 +1323,7 @@ void SchemaBuilder<Getter, NameMapper>::applySetTiFlashModeOnPhysicalTable(
 
     TiDB::TableInfo table_info = storage->getTableInfo();
     table_info.tiflash_mode = latest_table_info->tiflash_mode;
+    table_info.schema_version = target_version;
     AlterCommands commands;
 
     LOG_FMT_INFO(log, "Updating tiflash mode for {} to {}", name_mapper.debugCanonicalName(*db_info, table_info), TiFlashModeToString(table_info.tiflash_mode));
diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt
index 2bedb312d07..536cc97081f 100644
--- a/libs/libcommon/CMakeLists.txt
+++ b/libs/libcommon/CMakeLists.txt
@@ -100,19 +100,6 @@ add_library (common ${SPLIT_SHARED}
 if (USE_JEMALLOC)
     message (STATUS "Link jemalloc: ${JEMALLOC_LIBRARIES}")
     set (MALLOC_LIBRARIES ${JEMALLOC_LIBRARIES})
-elseif (USE_TCMALLOC)
-
-    if (DEBUG_TCMALLOC AND NOT GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG)
-        message (WARNING "Requested DEBUG_TCMALLOC but debug lib not found. try install libgoogle-perftools-dev")
-    endif ()
-
-    if (DEBUG_TCMALLOC AND GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG)
-        message (STATUS "Link libtcmalloc_minimal_debug for testing: ${GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG}")
-        set (MALLOC_LIBRARIES ${GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG})
-    else ()
-        message (STATUS "Link libtcmalloc_minimal: ${GPERFTOOLS_TCMALLOC_MINIMAL}")
-        set (MALLOC_LIBRARIES ${GPERFTOOLS_TCMALLOC_MINIMAL})
-    endif ()
 elseif(USE_MIMALLOC)
     message (STATUS "Link mimalloc: ${MIMALLOC_LIBRARIES}")
     set (MALLOC_LIBRARIES ${MIMALLOC_LIBRARIES})
@@ -136,6 +123,12 @@ target_include_directories (common PUBLIC ${COMMON_INCLUDE_DIR})
 
 target_include_directories (common BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
 
+if(APPLE AND ARCH_AARCH64)
+    set(CPU_FEATURES_LIBRARY)
+else()
+    set(CPU_FEATURES_LIBRARY cpu_features)
+endif()
+
 # `libcommon` provides a bridge for many other libs.
 # We expose the linkage to public because we want headers to be inherited.
 target_link_libraries (
@@ -151,7 +144,7 @@ target_link_libraries (
     ${GLIBC_COMPATIBILITY_LIBRARIES}
     ${MEMCPY_LIBRARIES}
     fmt
-    cpu_features
+    ${CPU_FEATURES_LIBRARY}
 )
 
 if (RT_LIBRARY)
diff --git a/libs/libcommon/cmake/find_gperftools.cmake b/libs/libcommon/cmake/find_gperftools.cmake
deleted file mode 100644
index 98016f0e818..00000000000
--- a/libs/libcommon/cmake/find_gperftools.cmake
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2022 PingCAP, Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if (ARCH_FREEBSD OR ARCH_32)
-    option (USE_INTERNAL_GPERFTOOLS_LIBRARY "Set to FALSE to use system gperftools (tcmalloc) library instead of bundled" OFF)
-else ()
-    option (USE_INTERNAL_GPERFTOOLS_LIBRARY "Set to FALSE to use system gperftools (tcmalloc) library instead of bundled" ${NOT_UNBUNDLED})
-endif ()
-
-if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
-    set(ENABLE_TCMALLOC_DEFAULT 0)
-elseif(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
-    set(ENABLE_TCMALLOC_DEFAULT 1)
-elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    set(ENABLE_TCMALLOC_DEFAULT 1)
-endif()
-
-option (ENABLE_TCMALLOC "Set to TRUE to enable tcmalloc" ${ENABLE_TCMALLOC_DEFAULT})
-
-option (DEBUG_TCMALLOC "Set to TRUE to use debug version of libtcmalloc" OFF)
-
-if (ENABLE_TCMALLOC)
-    #contrib/libtcmalloc doesnt build debug version, try find in system
-    if (DEBUG_TCMALLOC OR NOT USE_INTERNAL_GPERFTOOLS_LIBRARY)
-        find_package (Gperftools)
-    endif ()
-
-    if (NOT (GPERFTOOLS_FOUND AND GPERFTOOLS_INCLUDE_DIR AND GPERFTOOLS_TCMALLOC_MINIMAL) AND NOT (ARCH_FREEBSD OR ARCH_32))
-        set (USE_INTERNAL_GPERFTOOLS_LIBRARY 1)
-        set (GPERFTOOLS_INCLUDE_DIR "${TiFlash_SOURCE_DIR}/contrib/libtcmalloc/include")
-        set (GPERFTOOLS_TCMALLOC_MINIMAL tcmalloc_minimal_internal)
-    endif ()
-
-    if (GPERFTOOLS_FOUND OR USE_INTERNAL_GPERFTOOLS_LIBRARY)
-        set (USE_TCMALLOC 1)
-    endif ()
-
-    message (STATUS "Using tcmalloc=${USE_TCMALLOC}: ${GPERFTOOLS_INCLUDE_DIR} : ${GPERFTOOLS_TCMALLOC_MINIMAL}")
-endif ()
diff --git a/libs/libcommon/cmake/find_mimalloc.cmake b/libs/libcommon/cmake/find_mimalloc.cmake
index 92d50a199f0..1468361a912 100644
--- a/libs/libcommon/cmake/find_mimalloc.cmake
+++ b/libs/libcommon/cmake/find_mimalloc.cmake
@@ -19,7 +19,7 @@ if (ENABLE_MIMALLOC)
     set (USE_MIMALLOC 1)
     message (STATUS "Using mimalloc=${USE_MIMALLOC}: ${MIMALLOC_LIBRARIES}")
 
-    if (ENABLE_JEMALLOC OR ENABLE_TCMALLOC)
+    if (ENABLE_JEMALLOC)
         message(FATAL_ERROR "multiple global allocator detected!")
     endif()
 endif ()
diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in
index 0ec263216f4..6f6dc90168c 100644
--- a/libs/libcommon/include/common/config_common.h.in
+++ b/libs/libcommon/include/common/config_common.h.in
@@ -3,7 +3,6 @@
 // .h autogenerated by cmake !
 
 #cmakedefine01 APPLE_SIERRA_OR_NEWER
-#cmakedefine01 USE_TCMALLOC
 #cmakedefine01 USE_JEMALLOC
 #cmakedefine01 USE_JEMALLOC_PROF
 #cmakedefine01 USE_MIMALLOC
diff --git a/libs/libcommon/include/common/detect_features.h b/libs/libcommon/include/common/detect_features.h
index 0291015ead4..c8d0c238209 100644
--- a/libs/libcommon/include/common/detect_features.h
+++ b/libs/libcommon/include/common/detect_features.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #if defined(__aarch64__) || defined(__arm64__) || defined(__arm64) || defined(__ARM64) || defined(__AARCH64__)
+#ifndef __APPLE__
 #include <cpuinfo_aarch64.h>
 namespace common
 {
@@ -24,6 +25,42 @@ using CPUInfo = cpu_features::Aarch64Info;
 extern const CPUInfo cpu_info;
 static inline const CPUFeatures & cpu_feature_flags = cpu_info.features;
 } // namespace common
+#else
+#include <sys/sysctl.h>
+#include <sys/types.h>
+namespace common
+{
+static inline int detect(const char * name)
+{
+    int enabled;
+    size_t enabled_len = sizeof(enabled);
+    const int failure = ::sysctlbyname(name, &enabled, &enabled_len, NULL, 0);
+    return failure ? 0 : enabled;
+}
+static inline const struct CPUFeatures
+{
+    bool asimd = detect("hw.optional.neon");
+    bool sve = false; // currently not supported
+    bool sve2 = false; // currently not supported
+
+    bool atomics = detect("hw.optional.armv8_1_atomics");
+    bool hpfp = detect("hw.optional.neon_hpfp");
+    bool fp16 = detect("hw.optional.neon_fp16");
+    bool fhm = detect("hw.optional.armv8_2_fhm");
+    bool crc32 = detect("hw.optional.armv8_crc32");
+    bool sha512 = detect("hw.optional.armv8_2_sha512");
+    bool sha3 = detect("hw.optional.armv8_2_sha3");
+
+    // the followings are assumed to be true.
+    // see https://github.com/golang/sys/pull/114/files
+    bool cpuid = true;
+    bool aes = true;
+    bool pmull = true;
+    bool sha1 = true;
+    bool sha2 = true;
+} cpu_feature_flags;
+} // namespace common
+#endif
 #endif
 
 #if defined(__x86_64__) || defined(__x86_64) || defined(__amd64) || defined(__amd64__)
diff --git a/tests/fullstack-test/expr/like.test b/tests/fullstack-test/expr/like.test
index af76095d157..5f13c7a8159 100644
--- a/tests/fullstack-test/expr/like.test
+++ b/tests/fullstack-test/expr/like.test
@@ -38,4 +38,29 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; select * from test.t where '
 | a    | b    |
 +------+------+
 | aaaa | %a%  |
-+------+------+
\ No newline at end of file
++------+------+
+
+mysql> insert into test.t values ('1234', '');
+
+mysql> set @@tidb_isolation_read_engines='tiflash'; select a from test.t where a like '1234' escape '4';
++------+
+| a    |
++------+
+| 1234 |
++------+
+
+mysql> set @@tidb_isolation_read_engines='tiflash'; select a from test.t where a like '1234' escape '2';
+
+mysql> set @@tidb_isolation_read_engines='tiflash'; select a from test.t where a like '15234' escape '5';
++------+
+| a    |
++------+
+| 1234 |
++------+
+
+mysql> set @@tidb_isolation_read_engines='tiflash'; select a from test.t where a like '_223_' escape '2';
++------+
+| a    |
++------+
+| 1234 |
++------+
diff --git a/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test b/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test
index 684948145ba..0d22a8a242e 100644
--- a/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test
+++ b/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test
@@ -142,4 +142,27 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t;
 |    1 |    2|
 |    2 |    4|
 |    5 |    7|
-+------+-----+
\ No newline at end of file
++------+-----+
+
+## test the case across 'do query' and 'set tiflash mode'
+
+=> DBGInvoke __enable_schema_sync_service('false') 
+
+## TODO: case1 can not simulation in this script
+# case 1 : start query (set failpoint) --> set tiflash mode and refresh schema --> continue query, and due to query_version < local_schema, query fail
+
+# case 2 : set tiflash mode first, then do query --> local_schema < query_schema_version --> sync schema and do query in the new mode
+mysql> alter table test.t set tiflash mode normal;
+
+# query in normal mode
+mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t;
++------+-----+
+| a    | b   |
++------+-----+
+|    2 |    4|
+|    5 |    7|
++------+-----+
+
+
+=> DBGInvoke __enable_schema_sync_service('true') 
+
diff --git a/tests/tidb-ci/new_collation_fullstack/expr.test b/tests/tidb-ci/new_collation_fullstack/expr.test
index 1e2135c4f2d..c88db20ba2f 100644
--- a/tests/tidb-ci/new_collation_fullstack/expr.test
+++ b/tests/tidb-ci/new_collation_fullstack/expr.test
@@ -88,4 +88,20 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_s
 | min(value) | max(value) | min(value1) | max(value1) |
 +------------+------------+-------------+-------------+
 | abc        | def        | abc         | def         |
-+------------+------------+-------------+-------------+
\ No newline at end of file
++------------+------------+-------------+-------------+
+
+mysql> insert into test.t values (4, '', 'def\n'), (5, '', 'def    ');
+
+mysql> select /*+ read_from_storage(tiflash[t]) */ hex(max(value1)) from test.t;
++------------------+
+| hex(max(value1)) |
++------------------+
+| 6465660A         |
++------------------+
+
+mysql> select /*+ read_from_storage(tiflash[t]) */ hex(min(value1)) from test.t;
++------------------+
+| hex(min(value1)) |
++------------------+
+| 61626320         |
++------------------+