diff --git a/.github/workflows/build-cachelib-docker.yml b/.github/workflows/build-cachelib-docker.yml
index f73339e0d9..be28bc233c 100644
--- a/.github/workflows/build-cachelib-docker.yml
+++ b/.github/workflows/build-cachelib-docker.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     env:
       REPO:           cachelib
-      GITHUB_REPO:    pmem/CacheLib
+      GITHUB_REPO:    intel/CacheLib
       CONTAINER_REG:  ghcr.io/pmem/cachelib
       CONTAINER_REG_USER:   ${{ secrets.GH_CR_USER }}
       CONTAINER_REG_PASS:   ${{ secrets.GH_CR_PAT }}
diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
index 46b903c22f..5f48c6de58 100644
--- a/cachelib/allocator/CacheAllocator-inl.h
+++ b/cachelib/allocator/CacheAllocator-inl.h
@@ -125,6 +125,7 @@ ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts(TierId tid) {
   ShmSegmentOpts opts;
   opts.alignment = sizeof(Slab);
   opts.typeOpts = memoryTierConfigs[tid].getShmTypeOpts();
+  opts.memBindNumaNodes = memoryTierConfigs[tid].getMemBind();
   if (auto *v = std::get_if<PosixSysVSegmentOpts>(&opts.typeOpts)) {
     v->usePosix = config_.usePosixShm;
   }
@@ -1307,7 +1308,7 @@ CacheAllocator<CacheTrait>::moveRegularItemWithSync(
   // make sure that no other thread removed it, and only then replaces it.
   if (!replaceInMMContainer(oldItem, *newItemHdl)) {
     accessContainer_->remove(*newItemHdl);
-    return {};
+    return acquire(&oldItem);
   }
 
   // Replacing into the MM container was successful, but someone could have
@@ -1315,7 +1316,7 @@ CacheAllocator<CacheTrait>::moveRegularItemWithSync(
   // replaceInMMContainer() operation, which would invalidate newItemHdl.
   if (!newItemHdl->isAccessible()) {
     removeFromMMContainer(*newItemHdl);
-    return {};
+    return acquire(&oldItem);
   }
 
   // no one can add or remove chained items at this point
@@ -1640,7 +1641,13 @@ typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
     TierId tid, PoolId pid, Item& item) {
   if(item.isChainedItem()) return {}; // TODO: We do not support ChainedItem yet
-  if(item.isExpired()) return acquire(&item);
+  if(item.isExpired()) {
+    auto handle = removeIf(item, [](const Item& it) {
+                                    return it.getRefCount() == 0;
+                                  });
+
+    if (handle) { return handle; }
+  }
 
   TierId nextTier = tid; // TODO - calculate this based on some admission policy
   while (++nextTier < getNumTiers()) { // try to evict down to the next memory tiers
@@ -3066,16 +3073,12 @@ CacheAllocator<CacheTrait>::evictNormalItem(Item& item,
   // We remove the item from both access and mm containers. It doesn't matter
   // if someone else calls remove on the item at this moment, the item cannot
   // be freed as long as we have the moving bit set.
-  auto handle = accessContainer_->removeIf(item, std::move(predicate));
-
+  auto handle = removeIf(item, std::move(predicate));
   if (!handle) {
     return handle;
   }
 
-  XDCHECK_EQ(reinterpret_cast<uintptr_t>(handle.get()),
-             reinterpret_cast<uintptr_t>(&item));
   XDCHECK_EQ(1u, handle->getRefCount());
-  removeFromMMContainer(item);
 
   // now that we are the only handle and we actually removed something from
   // the RAM cache, we enqueue it to nvmcache.
@@ -3187,6 +3190,21 @@ CacheAllocator<CacheTrait>::evictChainedItemForSlabRelease(ChainedItem& child) {
   return parentHandle;
 }
 
+template <typename CacheTrait>
+template <typename Fn>
+typename CacheAllocator<CacheTrait>::WriteHandle
+CacheAllocator<CacheTrait>::removeIf(Item& item, Fn&& predicate) {
+  auto handle = accessContainer_->removeIf(item, std::forward<Fn>(predicate));
+
+  if (handle) {
+    XDCHECK_EQ(reinterpret_cast<uintptr_t>(handle.get()),
+             reinterpret_cast<uintptr_t>(&item));
+    removeFromMMContainer(item);
+  }
+
+  return handle;
+}
+
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::removeIfExpired(const ReadHandle& handle) {
   if (!handle) {
@@ -3195,14 +3213,7 @@ bool CacheAllocator<CacheTrait>::removeIfExpired(const ReadHandle& handle) {
 
   // We remove the item from both access and mm containers.
   // We want to make sure the caller is the only one holding the handle.
-  auto removedHandle =
-      accessContainer_->removeIf(*(handle.getInternal()), itemExpiryPredicate);
-  if (removedHandle) {
-    removeFromMMContainer(*(handle.getInternal()));
-    return true;
-  }
-
-  return false;
+  return (bool)removeIf(*(handle.getInternal()), itemExpiryPredicate);
 }
 
 template <typename CacheTrait>
diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
index 02557dfe24..9cf04cc1a9 100644
--- a/cachelib/allocator/CacheAllocator.h
+++ b/cachelib/allocator/CacheAllocator.h
@@ -1496,8 +1496,9 @@ class CacheAllocator : public CacheBase {
   // @param oldItem     Reference to the item being moved
   // @param newItemHdl  Reference to the handle of the new item being moved into
   //
-  // @return true  If the move was completed, and the containers were updated
-  //               successfully.
+  // @return            the handle to the oldItem if the move was completed
+  //                    and the oldItem can be recycled.
+  //                    Otherwise an empty handle is returned.
   template <typename P>
   WriteHandle moveRegularItemWithSync(Item& oldItem, WriteHandle& newItemHdl, P&& predicate);
 
@@ -1806,6 +1807,12 @@ class CacheAllocator : public CacheBase {
   // handle on failure. caller can retry.
   WriteHandle evictChainedItemForSlabRelease(ChainedItem& item);
 
+  // Helper function to remove a item if predicates is true.
+  //
+  // @return last handle to the item on success. empty handle on failure.
+  template <typename Fn>
+  WriteHandle removeIf(Item& item, Fn&& predicate);
+
   // Helper function to remove a item if expired.
   //
   // @return true if it item expire and removed successfully.
diff --git a/cachelib/allocator/MemoryTierCacheConfig.h b/cachelib/allocator/MemoryTierCacheConfig.h
index ae07a92516..662983ea84 100644
--- a/cachelib/allocator/MemoryTierCacheConfig.h
+++ b/cachelib/allocator/MemoryTierCacheConfig.h
@@ -53,6 +53,16 @@ class MemoryTierCacheConfig {
 
   size_t getRatio() const noexcept { return ratio; }
 
+  // Allocate memory only from specified NUMA nodes
+  MemoryTierCacheConfig& setMemBind(const std::vector<size_t>& _numaNodes) {
+    numaNodes = _numaNodes;
+    return *this;
+  }
+
+  std::vector<size_t> getMemBind() const {
+    return numaNodes;
+  }
+
   size_t calculateTierSize(size_t totalCacheSize, size_t partitionNum) const {
     // TODO: Call this method when tiers are enabled in allocator
     // to calculate tier sizes in bytes.
@@ -82,6 +92,9 @@ class MemoryTierCacheConfig {
   // Options specific to shm type
   ShmTypeOpts shmOpts;
 
+  // Numa node(s) to bind the tier
+  std::vector<size_t> numaNodes;
+
   MemoryTierCacheConfig() = default;
 };
 } // namespace cachelib
diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp b/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp
index 90ef34be41..0484b843f2 100644
--- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp
+++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp
@@ -23,9 +23,13 @@ namespace tests {
 using LruAllocatorMemoryTiersTest = AllocatorMemoryTiersTest<LruAllocator>;
 
 // TODO(MEMORY_TIER): add more tests with different eviction policies
-TEST_F(LruAllocatorMemoryTiersTest, MultiTiersInvalid) { this->testMultiTiersInvalid(); }
-TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValid) { this->testMultiTiersValid(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersFromFileInvalid) { this->testMultiTiersFormFileInvalid(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersFromFileValid) { this->testMultiTiersFromFileValid(); }
 TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValidMixed) { this->testMultiTiersValidMixed(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersNumaBindingsSysVValid) { this->testMultiTiersNumaBindingsSysVValid(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersNumaBindingsPosixValid) { this->testMultiTiersNumaBindingsPosixValid(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersRemoveDuringEviction) { this->testMultiTiersRemoveDuringEviction(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersReplaceDuringEviction) { this->testMultiTiersReplaceDuringEviction(); }
 
 } // end of namespace tests
 } // end of namespace cachelib
diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h
index dba8cfd2dd..3ff6c6a90a 100644
--- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h
+++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h
@@ -20,14 +20,44 @@
 #include "cachelib/allocator/MemoryTierCacheConfig.h"
 #include "cachelib/allocator/tests/TestBase.h"
 
+#include <folly/synchronization/Latch.h>
+
 namespace facebook {
 namespace cachelib {
 namespace tests {
 
 template <typename AllocatorT>
 class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
+ private:
+  template<typename MvCallback>
+  void testMultiTiersAsyncOpDuringMove(std::unique_ptr<AllocatorT>& alloc,
+                                       PoolId& pool, bool& quit, MvCallback&& moveCb) {
+    typename AllocatorT::Config config;
+    config.setCacheSize(4 * Slab::kSize);
+    config.enableCachePersistence("/tmp");
+    config.configureMemoryTiers({
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind({0}),
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind({0})
+    });
+
+    config.enableMovingOnSlabRelease(moveCb, {} /* ChainedItemsMoveSync */,
+                                     -1 /* movingAttemptsLimit */);
+
+    alloc = std::make_unique<AllocatorT>(AllocatorT::SharedMemNew, config);
+    ASSERT(alloc != nullptr);
+    pool = alloc->addPool("default", alloc->getCacheMemoryStats().cacheSize);
+
+    int i = 0;
+    while(!quit) {
+      auto handle = alloc->allocate(pool, std::to_string(++i), std::string("value").size());
+      ASSERT(handle != nullptr);
+      ASSERT_NO_THROW(alloc->insertOrReplace(handle));
+    }
+  }
  public:
-  void testMultiTiersInvalid() {
+  void testMultiTiersFormFileInvalid() {
     typename AllocatorT::Config config;
     config.setCacheSize(100 * Slab::kSize);
     config.configureMemoryTiers({
@@ -42,7 +72,7 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
                  std::invalid_argument);
   }
 
-  void testMultiTiersValid() {
+  void testMultiTiersFromFileValid() {
     typename AllocatorT::Config config;
     config.setCacheSize(100 * Slab::kSize);
     config.enableCachePersistence("/tmp");
@@ -83,6 +113,111 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
     ASSERT(handle != nullptr);
     ASSERT_NO_THROW(alloc->insertOrReplace(handle));
   }
+
+  void testMultiTiersNumaBindingsSysVValid() {
+    typename AllocatorT::Config config;
+    config.setCacheSize(100 * Slab::kSize);
+    config.enableCachePersistence("/tmp");
+    config.configureMemoryTiers({
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind({0}),
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind({0})
+    });
+
+    auto alloc = std::make_unique<AllocatorT>(AllocatorT::SharedMemNew, config);
+    ASSERT(alloc != nullptr);
+
+    auto pool = alloc->addPool("default", alloc->getCacheMemoryStats().cacheSize);
+    auto handle = alloc->allocate(pool, "key", std::string("value").size());
+    ASSERT(handle != nullptr);
+    ASSERT_NO_THROW(alloc->insertOrReplace(handle));
+  }
+
+  void testMultiTiersNumaBindingsPosixValid() {
+    typename AllocatorT::Config config;
+    config.setCacheSize(100 * Slab::kSize);
+    config.enableCachePersistence("/tmp");
+    config.usePosixForShm();
+    config.configureMemoryTiers({
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind({0}),
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind({0})
+    });
+
+    auto alloc = std::make_unique<AllocatorT>(AllocatorT::SharedMemNew, config);
+    ASSERT(alloc != nullptr);
+
+    auto pool = alloc->addPool("default", alloc->getCacheMemoryStats().cacheSize);
+    auto handle = alloc->allocate(pool, "key", std::string("value").size());
+    ASSERT(handle != nullptr);
+    ASSERT_NO_THROW(alloc->insertOrReplace(handle));
+  }
+
+  void testMultiTiersRemoveDuringEviction() {
+    std::unique_ptr<AllocatorT> alloc;
+    PoolId pool;
+    std::unique_ptr<std::thread> t;
+    folly::Latch latch(1);
+    bool quit = false;
+
+    auto moveCb = [&] (typename AllocatorT::Item& oldItem,
+                       typename AllocatorT::Item& newItem,
+                       typename AllocatorT::Item* /* parentPtr */) {
+      
+      auto key = oldItem.getKey();
+      t = std::make_unique<std::thread>([&](){
+            // remove() function is blocked by wait context
+            // till item is moved to next tier. So that, we should
+            // notify latch before calling remove()
+            latch.count_down();
+            alloc->remove(key);
+          });
+      // wait till async thread is running
+      latch.wait();
+      memcpy(newItem.getMemory(), oldItem.getMemory(), oldItem.getSize());
+      quit = true;
+    };
+
+    testMultiTiersAsyncOpDuringMove(alloc, pool, quit, moveCb);
+
+    t->join();
+  }
+
+  void testMultiTiersReplaceDuringEviction() {
+    std::unique_ptr<AllocatorT> alloc;
+    PoolId pool;
+    std::unique_ptr<std::thread> t;
+    folly::Latch latch(1);
+    bool quit = false;
+
+    auto moveCb = [&] (typename AllocatorT::Item& oldItem,
+                       typename AllocatorT::Item& newItem,
+                       typename AllocatorT::Item* /* parentPtr */) {
+      auto key = oldItem.getKey();
+      if(!quit) {
+        // we need to replace only once because subsequent allocate calls
+        // will cause evictions recursevly
+        quit = true;
+        t = std::make_unique<std::thread>([&](){
+              auto handle = alloc->allocate(pool, key, std::string("new value").size());
+              // insertOrReplace() function is blocked by wait context
+              // till item is moved to next tier. So that, we should
+              // notify latch before calling insertOrReplace()
+              latch.count_down();
+              ASSERT_NO_THROW(alloc->insertOrReplace(handle));
+            });
+        // wait till async thread is running
+        latch.wait();
+      }
+      memcpy(newItem.getMemory(), oldItem.getMemory(), oldItem.getSize());
+    };
+
+    testMultiTiersAsyncOpDuringMove(alloc, pool, quit, moveCb);
+
+    t->join();
+  }
 };
 } // namespace tests
 } // namespace cachelib
diff --git a/cachelib/cachebench/CMakeLists.txt b/cachelib/cachebench/CMakeLists.txt
index 1a1063104c..f935e6e706 100644
--- a/cachelib/cachebench/CMakeLists.txt
+++ b/cachelib/cachebench/CMakeLists.txt
@@ -89,5 +89,6 @@ if (BUILD_TESTS)
   add_test (consistency/tests/ValueHistoryTest.cpp)
   add_test (consistency/tests/ValueTrackerTest.cpp)
   add_test (util/tests/NandWritesTest.cpp)
+  add_test (util/tests/MemoryTierConfigTest.cpp)
   add_test (cache/tests/TimeStampTickerTest.cpp)
 endif()
diff --git a/cachelib/cachebench/util/CacheConfig.cpp b/cachelib/cachebench/util/CacheConfig.cpp
index f12992dd9e..29cd9cb6a3 100644
--- a/cachelib/cachebench/util/CacheConfig.cpp
+++ b/cachelib/cachebench/util/CacheConfig.cpp
@@ -137,8 +137,53 @@ std::shared_ptr<RebalanceStrategy> CacheConfig::getRebalanceStrategy() const {
 MemoryTierConfig::MemoryTierConfig(const folly::dynamic& configJson) {
   JSONSetVal(configJson, file);
   JSONSetVal(configJson, ratio);
+  JSONSetVal(configJson, memBindNodes);
 
-  checkCorrectSize<MemoryTierConfig, 40>();
+  checkCorrectSize<MemoryTierConfig, 72>();
+}
+
+static bool starts_with() {return true;}
+
+std::vector<size_t> MemoryTierConfig::parseNumaNodes() {
+  std::vector<size_t> numaNodes;
+
+  std::vector<folly::StringPiece> tokens;
+  folly::split(",", memBindNodes, tokens, true /*ignore empty*/);
+  for(const auto &token : tokens) {
+    if(token.startsWith("!")) {
+      throw std::invalid_argument(folly::sformat(
+        "invalid NUMA nodes binding in memory tier config: {} "
+        "inverse !N or !N-N is not supported "
+        "nodes may be specified as N,N,N or N-N or N,N-N or N-N,N-N and so forth.",
+        token));
+    }
+    else if(token.startsWith("+")) {
+      throw std::invalid_argument(folly::sformat(
+        "invalid NUMA nodes binding in memory tier config: {} "
+        "relative nodes are not supported. "
+        "nodes may be specified as N,N,N or N-N or N,N-N or N-N,N-N and so forth.",
+        token));
+    }
+    else if (token.contains("-")) {
+      size_t begin, end;
+      if(folly::split("-", token, begin, end) && begin < end) {
+        while(begin <=end) {
+          numaNodes.push_back(begin++);
+        }
+      } else {
+        throw std::invalid_argument(folly::sformat(
+        "invalid NUMA nodes binding in memory tier config: {} "
+        "Invalid range format. "
+        "nodes may be specified as N,N,N or N-N or N,N-N or N-N,N-N and so forth.",
+        token));
+      }
+    }
+    else {
+      numaNodes.push_back(folly::to<size_t>(token));
+    }
+  }
+
+  return numaNodes;
 }
 
 } // namespace cachebench
diff --git a/cachelib/cachebench/util/CacheConfig.h b/cachelib/cachebench/util/CacheConfig.h
index b7829e28c7..7a8c9020b0 100644
--- a/cachelib/cachebench/util/CacheConfig.h
+++ b/cachelib/cachebench/util/CacheConfig.h
@@ -48,11 +48,13 @@ struct MemoryTierConfig : public JSONConfig {
   MemoryTierCacheConfig getMemoryTierCacheConfig() {
     MemoryTierCacheConfig config = memoryTierCacheConfigFromSource();
     config.setRatio(ratio);
+    config.setMemBind(parseNumaNodes());
     return config;
   }
 
   std::string file{""};
   size_t ratio{0};
+  std::string memBindNodes{""};
 
 private:
   MemoryTierCacheConfig memoryTierCacheConfigFromSource() {
@@ -62,6 +64,8 @@ struct MemoryTierConfig : public JSONConfig {
       return MemoryTierCacheConfig::fromFile(file);
     }
   }
+
+  std::vector<size_t> parseNumaNodes();
 };
 
 struct CacheConfig : public JSONConfig {
diff --git a/cachelib/cachebench/util/tests/MemoryTierConfigTest.cpp b/cachelib/cachebench/util/tests/MemoryTierConfigTest.cpp
new file mode 100644
index 0000000000..afd2bf80ad
--- /dev/null
+++ b/cachelib/cachebench/util/tests/MemoryTierConfigTest.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Copyright 2022-present Facebook. All Rights Reserved.
+
+#include <algorithm>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "cachelib/cachebench/util/CacheConfig.h"
+
+namespace facebook {
+namespace cachelib {
+namespace cachebench {
+
+TEST(MemoryTierConfigTest, MemBind_SingleNumaNode) {
+  const std::string configString =
+    "{"
+    "  \"ratio\": 1,"
+    "  \"memBindNodes\": 1"
+    "}";
+
+  const std::vector<size_t> expectedNumaNodes = {1};
+
+  auto configJson = folly::parseJson(folly::json::stripComments(configString));
+  
+  MemoryTierConfig memoryTierConfig(configJson);
+  MemoryTierCacheConfig tierCacheConfig = memoryTierConfig.getMemoryTierCacheConfig();
+
+  auto parsedNumaNodes = tierCacheConfig.getMemBind();
+  ASSERT_TRUE(std::equal(expectedNumaNodes.begin(), expectedNumaNodes.end(), parsedNumaNodes.begin()));
+}
+
+TEST(MemoryTierConfigTest, MemBind_RangeNumaNodes) {
+  const std::string configString =
+    "{"
+    "  \"ratio\": 1,"
+    "  \"memBindNodes\": \"0-2\""
+    "}";
+
+  const std::vector<size_t> expectedNumaNodes = {0, 1, 2};
+
+  auto configJson = folly::parseJson(folly::json::stripComments(configString));
+  
+  MemoryTierConfig memoryTierConfig(configJson);
+  MemoryTierCacheConfig tierCacheConfig = memoryTierConfig.getMemoryTierCacheConfig();
+
+  auto parsedNumaNodes = tierCacheConfig.getMemBind();
+  ASSERT_TRUE(std::equal(expectedNumaNodes.begin(), expectedNumaNodes.end(), parsedNumaNodes.begin()));
+}
+
+TEST(MemoryTierConfigTest, MemBind_SingleAndRangeNumaNodes) {
+  const std::string configString =
+    "{"
+    "  \"ratio\": 1,"
+    "  \"memBindNodes\": \"0,2-5\""
+    "}";
+
+  const std::vector<size_t> expectedNumaNodes = {0, 2, 3, 4, 5};
+
+  auto configJson = folly::parseJson(folly::json::stripComments(configString));
+  
+  MemoryTierConfig memoryTierConfig(configJson);
+  MemoryTierCacheConfig tierCacheConfig = memoryTierConfig.getMemoryTierCacheConfig();
+
+  auto parsedNumaNodes = tierCacheConfig.getMemBind();
+  ASSERT_TRUE(std::equal(expectedNumaNodes.begin(), expectedNumaNodes.end(), parsedNumaNodes.begin()));
+}
+
+} // namespace facebook
+} // namespace cachelib
+} // namespace cachebench
\ No newline at end of file
diff --git a/cachelib/shm/CMakeLists.txt b/cachelib/shm/CMakeLists.txt
index 4f97c0e763..83a798949c 100644
--- a/cachelib/shm/CMakeLists.txt
+++ b/cachelib/shm/CMakeLists.txt
@@ -25,6 +25,7 @@ add_library (cachelib_shm
 add_dependencies(cachelib_shm thrift_generated_files)
 target_link_libraries(cachelib_shm PUBLIC
   cachelib_common
+  numa
 )
 
 install(TARGETS cachelib_shm
diff --git a/cachelib/shm/PosixShmSegment.cpp b/cachelib/shm/PosixShmSegment.cpp
index 027fee8bb8..1bdeec253d 100644
--- a/cachelib/shm/PosixShmSegment.cpp
+++ b/cachelib/shm/PosixShmSegment.cpp
@@ -21,6 +21,8 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <numa.h>
+#include <numaif.h>
 
 #include "cachelib/common/Utils.h"
 
@@ -176,6 +178,7 @@ void* PosixShmSegment::mapAddress(void* addr) const {
     util::throwSystemError(EINVAL, "Address already mapped");
   }
   XDCHECK(retAddr == addr || addr == nullptr);
+  memBind(addr);
   return retAddr;
 }
 
@@ -183,6 +186,44 @@ void PosixShmSegment::unMap(void* addr) const {
   detail::munmapImpl(addr, getSize());
 }
 
+static void forcePageAllocation(void* addr, size_t size, size_t pageSize) {
+  for(volatile char* curAddr = (char*)addr; curAddr < (char*)addr+size; curAddr += pageSize) {
+    *curAddr = *curAddr;
+  }
+}
+
+void PosixShmSegment::memBind(void* addr) const {
+  if(opts_.memBindNumaNodes.empty()) return;
+
+  struct bitmask *oldNodeMask = numa_allocate_nodemask();
+  int oldMode = 0;
+  struct bitmask *nodesMask = numa_allocate_nodemask();
+  auto guard = folly::makeGuard([&] { numa_bitmask_free(nodesMask); numa_bitmask_free(oldNodeMask); });
+
+  for(auto node : opts_.memBindNumaNodes) {
+    numa_bitmask_setbit(nodesMask, node);
+  }
+
+  // mbind() cannot be used because mmap was called with MAP_SHARED flag
+  // But we can set memory policy for current thread and force page allcoation.
+  // The following logic is used:
+  // 1. Remember current memory policy for the current thread
+  // 2. Set new memory policy as specifiec by config
+  // 3. Force page allocation by touching every page in the segment
+  // 4. Restore memory policy
+
+  // Remember current memory policy 
+  get_mempolicy(&oldMode, oldNodeMask->maskp, oldNodeMask->size, nullptr, 0);
+
+  // Set memory bindings
+  set_mempolicy(MPOL_BIND, nodesMask->maskp, nodesMask->size);
+
+  forcePageAllocation(addr, getSize(), detail::getPageSize(opts_.pageSize));
+
+  // Restore memory policy for the thread
+  set_mempolicy(oldMode, nodesMask->maskp, nodesMask->size);
+}
+
 std::string PosixShmSegment::createKeyForName(
     const std::string& name) noexcept {
   // ensure that the slash is always there in the head. repetitive
diff --git a/cachelib/shm/PosixShmSegment.h b/cachelib/shm/PosixShmSegment.h
index 6aaeb004e7..bf43b2ca55 100644
--- a/cachelib/shm/PosixShmSegment.h
+++ b/cachelib/shm/PosixShmSegment.h
@@ -108,6 +108,8 @@ class PosixShmSegment : public ShmBase {
   void createReferenceMapping();
   void deleteReferenceMapping() const;
 
+  void memBind(void* addr) const;
+
   // file descriptor associated with the shm. This has FD_CLOEXEC set
   // and once opened, we close this only on destruction of this object
   int fd_{kInvalidFD};
diff --git a/cachelib/shm/ShmCommon.h b/cachelib/shm/ShmCommon.h
index 0998f2f951..8ed5202b62 100644
--- a/cachelib/shm/ShmCommon.h
+++ b/cachelib/shm/ShmCommon.h
@@ -93,6 +93,7 @@ struct ShmSegmentOpts {
   PageSizeT pageSize{PageSizeT::NORMAL};
   bool readOnly{false};
   size_t alignment{1}; // alignment for mapping.
+  std::vector<size_t> memBindNumaNodes;
   // opts specific to segment type
   ShmTypeOpts typeOpts{PosixSysVSegmentOpts(false)};
 
diff --git a/cachelib/shm/SysVShmSegment.cpp b/cachelib/shm/SysVShmSegment.cpp
index e13d605aa5..8b13246ded 100644
--- a/cachelib/shm/SysVShmSegment.cpp
+++ b/cachelib/shm/SysVShmSegment.cpp
@@ -18,8 +18,11 @@
 
 #include <folly/hash/Hash.h>
 #include <folly/logging/xlog.h>
+#include <folly/ScopeGuard.h>
 #include <sys/mman.h>
 #include <sys/shm.h>
+#include <numa.h>
+#include <numaif.h>
 
 #include "cachelib/common/Utils.h"
 
@@ -184,6 +187,50 @@ void shmCtlImpl(int shmid, int cmd, shmid_ds* buf) {
   }
 }
 
+void mbindImpl(void *addr, unsigned long len, int mode,
+               const std::vector<size_t>& memBindNumaNodes,
+               unsigned int flags) {
+  struct bitmask *nodesMask = numa_allocate_nodemask();
+  auto guard = folly::makeGuard([&] { numa_bitmask_free(nodesMask); });
+
+  for(auto node : memBindNumaNodes) {
+    numa_bitmask_setbit(nodesMask, node);
+  }              
+  
+  long ret = mbind(addr, len, mode, nodesMask->maskp, nodesMask->size, flags);
+  if(ret == 0) return;
+
+  switch (errno) {
+  case EFAULT:
+    util::throwSystemError(errno);
+    break;
+  case EINVAL:
+    util::throwSystemError(errno, "Invalid parameters when bind segment to NUMA node(s)");
+    break;
+  case EIO:
+    if(flags & MPOL_MF_STRICT) {
+      util::throwSystemError(errno, "Segment already allocated on another NUMA node that does not follow the policy.");
+    }
+    if(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL )) {
+      util::throwSystemError(errno, "Segment already allocated but kernel was unable to move it to specified NUMA node(s).");
+    }
+    util::throwSystemError(errno, "Invalid errno");
+    break;
+  case ENOMEM:
+    util::throwSystemError(errno, "Could not bind memory. Insufficient kernel memory was available");
+    break;
+  case EPERM:
+    if(flags & MPOL_MF_MOVE_ALL) {
+      util::throwSystemError(errno, "Process does not have the CAP_SYS_NICE privilege to bind segment with MPOL_MF_MOVE_ALL flag");
+    }
+    util::throwSystemError(errno, "Invalid errno");
+    break;
+  default:
+    XDCHECK(false);
+    util::throwSystemError(errno, "Invalid errno");
+  }
+}
+
 } // namespace detail
 
 void ensureSizeforHugePage(size_t size) {
@@ -270,11 +317,17 @@ void* SysVShmSegment::mapAddress(void* addr) const {
 
   void* retAddr = detail::shmAttachImpl(shmid_, addr, shmFlags);
   XDCHECK(retAddr == addr || addr == nullptr);
+  memBind(retAddr);
   return retAddr;
 }
 
 void SysVShmSegment::unMap(void* addr) const { detail::shmDtImpl(addr); }
 
+void SysVShmSegment::memBind(void* addr) const {
+  if(opts_.memBindNumaNodes.empty()) return;
+  detail::mbindImpl(addr, getSize(), MPOL_BIND, opts_.memBindNumaNodes, 0);
+}
+
 void SysVShmSegment::markForRemoval() {
   if (isMarkedForRemoval()) {
     return;
diff --git a/cachelib/shm/SysVShmSegment.h b/cachelib/shm/SysVShmSegment.h
index fcebe03eb1..5a57215508 100644
--- a/cachelib/shm/SysVShmSegment.h
+++ b/cachelib/shm/SysVShmSegment.h
@@ -100,6 +100,7 @@ class SysVShmSegment : public ShmBase {
   void lockPagesInMemory() const;
   void createReferenceMapping();
   void deleteReferenceMapping() const;
+  void memBind(void* addr) const;
 
   //  the key identifier for the shared memory
   KeyType key_{kInvalidKey};
diff --git a/contrib/prerequisites-centos8.sh b/contrib/prerequisites-centos8.sh
index 7e6cfad1d8..26be9201b3 100755
--- a/contrib/prerequisites-centos8.sh
+++ b/contrib/prerequisites-centos8.sh
@@ -57,7 +57,8 @@ sudo dnf --enablerepo="$POWERTOOLS_REPO" install -y \
   libsodium-static \
   libdwarf-static \
   boost-static \
-  double-conversion-static
+  double-conversion-static \
+  numactl-devel
 
 #Do not install these from OS packages - they are typically outdated.
 #gflags-devel \
diff --git a/docker/images/install-cachelib-deps.sh b/docker/images/install-cachelib-deps.sh
index dd920d9064..6d8fbdef7b 100755
--- a/docker/images/install-cachelib-deps.sh
+++ b/docker/images/install-cachelib-deps.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
-git clone https://github.com/pmem/CacheLib CacheLib
+git clone -b develop https://github.com/intel/CacheLib CacheLib
 
 ./CacheLib/contrib/prerequisites-centos8.sh
 
diff --git a/examples/multitier_cache/main.cpp b/examples/multitier_cache/main.cpp
index 28990c341f..800c0c7cfa 100644
--- a/examples/multitier_cache/main.cpp
+++ b/examples/multitier_cache/main.cpp
@@ -57,7 +57,7 @@ bool put(CacheKey key, const std::string& value) {
   if (!handle) {
     return false; // cache may fail to evict due to too many pending writes
   }
-  std::memcpy(handle->getWritableMemory(), value.data(), value.size());
+  std::memcpy(handle->getMemory(), value.data(), value.size());
   gCache_->insertOrReplace(handle);
   return true;
 }
diff --git a/run_tests.sh b/run_tests.sh
index 97fc7cda72..f7814f5edc 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -1,12 +1,7 @@
 #!/bin/bash
 
 # Newline separated list of tests to ignore
-BLACKLIST="allocator-test-AllocationClassTest
-allocator-test-AllocatorTypeTest
-allocator-test-NvmCacheTests
-allocator-test-NavySetupTest
-common-test-TimeTests
-common-test-UtilTests
+BLACKLIST="allocator-test-NavySetupTest
 shm-test-test_page_size"
 
 if [ "$1" == "long" ]; then