Skip to content

Commit

Permalink
NavyConfig: add an option to configure the fiber stack size of NavyTh…
Browse files Browse the repository at this point in the history
…read

Summary:
We increased the fiber stack size to 64KB in D50756161 for VCache, which could be too big for many
applications including memcache, wasting the DRAM unnecessarily; 64KB * 100 preallocated guarded
stack * 8 threads = 51.2MB. This change adds an option to specify the fiber stack size used by the
NavyThreads.

Reviewed By: therealgymmy

Differential Revision: D51644816

fbshipit-source-id: 2b5f363998405654c61a90ed758c5d33935e402b
  • Loading branch information
Jaesoo Lee authored and facebook-github-bot committed Dec 9, 2023
1 parent dfe0af8 commit 2315a36
Show file tree
Hide file tree
Showing 21 changed files with 88 additions and 30 deletions.
13 changes: 12 additions & 1 deletion cachelib/allocator/nvmcache/NavyConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,18 +160,28 @@ BigHashConfig& BigHashConfig::setSizePctAndMaxItemSize(
void NavyConfig::setReaderAndWriterThreads(unsigned int readerThreads,
unsigned int writerThreads,
unsigned int maxNumReads,
unsigned int maxNumWrites) {
unsigned int maxNumWrites,
unsigned int stackSizeKB) {
readerThreads_ = readerThreads;
writerThreads_ = writerThreads;
maxNumReads_ = maxNumReads;
maxNumWrites_ = maxNumWrites;
stackSize_ = stackSizeKB * 1024;

if ((maxNumReads > 0 && maxNumWrites == 0) ||
(maxNumReads == 0 && maxNumWrites > 0)) {
throw std::invalid_argument(
"maxNumReads and maxNumWrites should be both 0 or both >0");
}

// Limit the fiber stack size to 1MB to prevent any misconfiguration;
// The 1MB is too large for most use cases and there will be
// lots of memory amounts to >800MB per thread wasted
if (stackSizeKB >= 1024) {
throw std::invalid_argument(
"Maximum fiber stack size for each thread should be less than 1024 KB");
}

if (maxNumReads > 0 || maxNumWrites > 0) {
if ((maxNumReads % readerThreads_) || (maxNumWrites % writerThreads_)) {
throw std::invalid_argument(folly::sformat(
Expand Down Expand Up @@ -278,6 +288,7 @@ std::map<std::string, std::string> NavyConfig::serialize() const {
folly::to<std::string>(navyReqOrderingShards_);
configMap["navyConfig::maxNumReads"] = folly::to<std::string>(maxNumReads_);
configMap["navyConfig::maxNumWrites"] = folly::to<std::string>(maxNumWrites_);
configMap["navyConfig::stackSize"] = folly::to<std::string>(stackSize_);

// Other settings
configMap["navyConfig::maxConcurrentInserts"] =
Expand Down
13 changes: 11 additions & 2 deletions cachelib/allocator/nvmcache/NavyConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ class NavyConfig {

unsigned int getMaxNumReads() const { return maxNumReads_; }
unsigned int getMaxNumWrites() const { return maxNumWrites_; }
unsigned int getStackSize() const { return stackSize_; }
// ============ other settings =============
uint32_t getMaxConcurrentInserts() const { return maxConcurrentInserts_; }
uint64_t getMaxParcelMemoryMB() const { return maxParcelMemoryMB_; }
Expand Down Expand Up @@ -599,7 +600,8 @@ class NavyConfig {
void setReaderAndWriterThreads(unsigned int readerThreads,
unsigned int writerThreads,
unsigned int maxNumReads = 0,
unsigned int maxNumWrites = 0);
unsigned int maxNumWrites = 0,
unsigned int stackSizeKB = 0);

// Set Navy request ordering shards (expressed as power of two).
// @throw std::invalid_argument if the input value is 0.
Expand Down Expand Up @@ -671,9 +673,16 @@ class NavyConfig {
// This value needs to be non-zero.
uint64_t navyReqOrderingShards_{20};

// Max number of concurrent reads/writes in whole Navy
// Max number of concurrent reads/writes in whole Navy.
// This needs to be a multiple of the number of readers and writers.
// Setting this to non-0 will enable async IO where fibers are used
// for Navy operations including device IO
unsigned int maxNumReads_{0};
unsigned int maxNumWrites_{0};

// Stack size of fibers when async-io is enabled. 0 for default
unsigned int stackSize_{0};

// ============ Other settings =============
// Maximum number of concurrent inserts we allow globally for Navy.
// 0 means unlimited.
Expand Down
7 changes: 6 additions & 1 deletion cachelib/allocator/nvmcache/NavySetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ uint64_t setupBigHash(const navy::BigHashConfig& bigHashConfig,
// @param blockCacheOffset this block cache starts from this address (inclusive)
// @param useRaidFiles if set to true, the device will setup using raid.
// @param itemDestructorEnabled
// @param stackSize size of the stack used by the region_manager thread
// @param proto
//
// @return The end offset (exclusive) of the setup blockcache.
Expand All @@ -131,6 +132,7 @@ uint64_t setupBlockCache(const navy::BlockCacheConfig& blockCacheConfig,
uint64_t blockCacheOffset,
bool usesRaidFiles,
bool itemDestructorEnabled,
uint32_t stackSize,
cachelib::navy::EnginePairProto& proto) {
auto regionSize = blockCacheConfig.getRegionSize();
if (regionSize != alignUp(regionSize, ioAlignSize)) {
Expand Down Expand Up @@ -174,6 +176,7 @@ uint64_t setupBlockCache(const navy::BlockCacheConfig& blockCacheConfig,

blockCache->setNumInMemBuffers(blockCacheConfig.getNumInMemBuffers());
blockCache->setItemDestructorEnabled(itemDestructorEnabled);
blockCache->setStackSize(stackSize);
blockCache->setPreciseRemove(blockCacheConfig.isPreciseRemove());

proto.setBlockCache(std::move(blockCache));
Expand Down Expand Up @@ -267,7 +270,7 @@ void setupCacheProtos(const navy::NavyConfig& config,
blockCacheEndOffset = setupBlockCache(
enginesConfig.blockCache(), blockCacheSize, ioAlignSize,
blockCacheStartOffset, config.usesRaidFiles(), itemDestructorEnabled,
*enginePairProto);
config.getStackSize(), *enginePairProto);
}
if (blockCacheEndOffset > bigHashStartOffset) {
throw std::invalid_argument(folly::sformat(
Expand Down Expand Up @@ -304,6 +307,7 @@ std::unique_ptr<cachelib::navy::JobScheduler> createJobScheduler(
auto writerThreads = config.getWriterThreads();
auto maxNumReads = config.getMaxNumReads();
auto maxNumWrites = config.getMaxNumWrites();
auto stackSize = config.getStackSize();
auto reqOrderShardsPower = config.getNavyReqOrderingShards();
if (maxNumReads == 0 && maxNumWrites == 0) {
return cachelib::navy::createOrderedThreadPoolJobScheduler(
Expand All @@ -314,6 +318,7 @@ std::unique_ptr<cachelib::navy::JobScheduler> createJobScheduler(
writerThreads,
maxNumReads,
maxNumWrites,
stackSize,
reqOrderShardsPower);
}
} // namespace
Expand Down
1 change: 1 addition & 0 deletions cachelib/allocator/nvmcache/tests/NavyConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ TEST(NavyConfigTest, Serialization) {
expectedConfigMap["navyConfig::navyReqOrderingShards"] = "30";
expectedConfigMap["navyConfig::maxNumReads"] = "0";
expectedConfigMap["navyConfig::maxNumWrites"] = "0";
expectedConfigMap["navyConfig::stackSize"] = "0";

EXPECT_EQ(configMap, expectedConfigMap);
}
Expand Down
3 changes: 2 additions & 1 deletion cachelib/cachebench/cache/Cache-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ Cache<Allocator>::Cache(const CacheConfig& config,
nvmConfig.navyConfig.setReaderAndWriterThreads(config_.navyReaderThreads,
config_.navyWriterThreads,
config_.navyMaxNumReads,
config_.navyMaxNumWrites);
config_.navyMaxNumWrites,
config_.navyStackSizeKB);

// Set enableIoUring (and override qDepth) if async io is enabled
if (config_.navyMaxNumReads || config_.navyMaxNumWrites ||
Expand Down
1 change: 1 addition & 0 deletions cachelib/cachebench/util/CacheConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
JSONSetVal(configJson, navyWriterThreads);
JSONSetVal(configJson, navyMaxNumReads);
JSONSetVal(configJson, navyMaxNumWrites);
JSONSetVal(configJson, navyStackSizeKB);
JSONSetVal(configJson, navyQDepth);
JSONSetVal(configJson, navyEnableIoUring);
JSONSetVal(configJson, navyCleanRegions);
Expand Down
3 changes: 3 additions & 0 deletions cachelib/cachebench/util/CacheConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ struct CacheConfig : public JSONConfig {
uint32_t navyMaxNumReads{0};
uint32_t navyMaxNumWrites{0};

// Default stack size of Navy fibers when async IO is enabled
uint32_t navyStackSizeKB{16};

// qdepth to be used; override if already set automatically
// by navyMaxNumReads and navyMaxNumWrites
uint32_t navyQDepth{0};
Expand Down
4 changes: 4 additions & 0 deletions cachelib/navy/Factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ class BlockCacheProtoImpl final : public BlockCacheProto {
config_.itemDestructorEnabled = itemDestructorEnabled;
}

void setStackSize(uint32_t stackSize) override {
config_.stackSize = stackSize;
}

void setPreciseRemove(bool preciseRemove) override {
config_.preciseRemove = preciseRemove;
}
Expand Down
3 changes: 3 additions & 0 deletions cachelib/navy/Factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ class BlockCacheProto {
// (Optional) Set if the item destructor feature is enabled.
virtual void setItemDestructorEnabled(bool itemDestructorEnabled) = 0;

// (Optional) Set the fiber stack size of region_manager thread
virtual void setStackSize(uint32_t stackSize) = 0;

// (Optional) Set if the preciseRemove flag.
virtual void setPreciseRemove(bool preciseRemove) = 0;
};
Expand Down
1 change: 1 addition & 0 deletions cachelib/navy/block_cache/BlockCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ BlockCache::BlockCache(Config&& config, ValidConfigTag)
*config.device,
config.cleanRegionsPool,
config.cleanRegionThreads,
config.stackSize,
bindThis(&BlockCache::onRegionReclaim, *this),
bindThis(&BlockCache::onRegionCleanup, *this),
std::move(config.evictionPolicy),
Expand Down
4 changes: 3 additions & 1 deletion cachelib/navy/block_cache/BlockCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@ class BlockCache final : public Engine {
JobScheduler* scheduler{};
// Clean region pool size
uint32_t cleanRegionsPool{1};
// The number of threads for reclaim and flush
// The number of region_manager threads for reclaim and flush
uint32_t cleanRegionThreads{1};
// The fiber stack size of region_manager threads
uint32_t stackSize{0};
// Number of in-memory buffers where writes are buffered before flushed
// on to the device
uint32_t numInMemBuffers{1};
Expand Down
4 changes: 3 additions & 1 deletion cachelib/navy/block_cache/RegionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ RegionManager::RegionManager(uint32_t numRegions,
Device& device,
uint32_t numCleanRegions,
uint32_t numWorkers,
uint32_t stackSize,
RegionEvictCallback evictCb,
RegionCleanupCallback cleanupCb,
std::unique_ptr<EvictionPolicy> policy,
Expand Down Expand Up @@ -59,7 +60,8 @@ RegionManager::RegionManager(uint32_t numRegions,

for (uint32_t i = 0; i < numWorkers; i++) {
auto name = fmt::format("region_manager_{}", i);
workers_.emplace_back(std::make_unique<NavyThread>(name));
workers_.emplace_back(
std::make_unique<NavyThread>(name, NavyThread::Options(stackSize)));
workers_.back()->addTaskRemote(
[name]() { XLOGF(INFO, "{} started", name); });
}
Expand Down
4 changes: 4 additions & 0 deletions cachelib/navy/block_cache/RegionManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class RegionManager {
// @param numCleanRegions How many regions reclamator maintains in
// the clean pool
// @param scheduler JobScheduler to run reclamation jobs
// @param numWorkers Number of threads to run reclamation jobs
// @param stackSize Fiber stack size for each worker thread.
// 0 for default
// @param evictCb Callback invoked when region evicted
// @param cleanupCb Callback invoked when region cleaned up
// @param policy eviction policy
Expand All @@ -84,6 +87,7 @@ class RegionManager {
Device& device,
uint32_t numCleanRegions,
uint32_t numWorkers,
uint32_t stackSize,
RegionEvictCallback evictCb,
RegionCleanupCallback cleanupCb,
std::unique_ptr<EvictionPolicy> policy,
Expand Down
6 changes: 3 additions & 3 deletions cachelib/navy/block_cache/tests/AllocatorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ TEST(Allocator, RegionSyncInMemBuffers) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy), 3, 0, kFlushRetryLimit);
Allocator allocator{*rm, kNumPriorities};

Expand Down Expand Up @@ -141,7 +141,7 @@ TEST(Allocator, TestInMemBufferStates) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy), 3, 0, kFlushRetryLimit);
Allocator allocator{*rm, kNumPriorities};

Expand Down Expand Up @@ -231,7 +231,7 @@ TEST(Allocator, UsePriorities) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 3 /* numPriorities */,
kFlushRetryLimit);
Expand Down
24 changes: 12 additions & 12 deletions cachelib/navy/block_cache/tests/RegionManagerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ TEST(RegionManager, ReclaimLruAsFifo) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

Expand All @@ -80,7 +80,7 @@ TEST(RegionManager, ReclaimLru) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

Expand All @@ -107,7 +107,7 @@ TEST(RegionManager, Recovery) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

Expand Down Expand Up @@ -143,7 +143,7 @@ TEST(RegionManager, Recovery) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

Expand Down Expand Up @@ -176,8 +176,8 @@ TEST(RegionManager, ReadWrite) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, kBaseOffset, *device, 1, 1, std::move(evictCb),
std::move(cleanupCb), std::make_unique<LruPolicy>(4),
kNumRegions, kRegionSize, kBaseOffset, *device, 1, 1, 0,
std::move(evictCb), std::move(cleanupCb), std::make_unique<LruPolicy>(4),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

ENABLE_INJECT_PAUSE_IN_SCOPE();
Expand Down Expand Up @@ -231,7 +231,7 @@ TEST(RegionManager, RecoveryLRUOrder) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

Expand All @@ -255,7 +255,7 @@ TEST(RegionManager, RecoveryLRUOrder) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

Expand Down Expand Up @@ -283,7 +283,7 @@ TEST(RegionManager, Fragmentation) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

Expand Down Expand Up @@ -317,7 +317,7 @@ TEST(RegionManager, Fragmentation) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy),
kNumRegions /* numInMemBuffers */, 0, kFlushRetryLimit);

Expand Down Expand Up @@ -357,7 +357,7 @@ TEST(RegionManager, cleanupRegionFailureSync) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy), kNumInMemBuffer, 0,
kFlushRetryLimit);

Expand Down Expand Up @@ -465,7 +465,7 @@ TEST(RegionManager, cleanupRegionFailureAsync) {
RegionEvictCallback evictCb{[](RegionId, BufferView) { return 0; }};
RegionCleanupCallback cleanupCb{[](RegionId, BufferView) {}};
auto rm = std::make_unique<RegionManager>(
kNumRegions, kRegionSize, 0, *device, 1, 1, std::move(evictCb),
kNumRegions, kRegionSize, 0, *device, 1, 1, 0, std::move(evictCb),
std::move(cleanupCb), std::move(policy), kNumInMemBuffer, 0,
kFlushRetryLimit);

Expand Down
6 changes: 4 additions & 2 deletions cachelib/navy/common/NavyThread.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <folly/fibers/TimedMutex.h>
#include <folly/io/async/EventBase.h>
#include <folly/io/async/ScopedEventBaseThread.h>
#include <folly/logging/xlog.h>

#include <atomic>
#include <memory>
Expand All @@ -47,6 +46,8 @@ class NavyThread {
static constexpr size_t kDefaultStackSize{64 * 1024};
constexpr Options() {}

explicit Options(size_t size) : stackSize(size) {}

/**
* Maximum stack size for fibers which will be used for executing all the
* tasks.
Expand All @@ -61,7 +62,8 @@ class NavyThread {
th_ = std::make_unique<folly::ScopedEventBaseThread>(name.str());

folly::fibers::FiberManager::Options opts;
opts.stackSize = options.stackSize;
opts.stackSize =
options.stackSize ? options.stackSize : Options::kDefaultStackSize;
fm_ = &folly::fibers::getFiberManager(*th_->getEventBase(), opts);
}

Expand Down
Loading

0 comments on commit 2315a36

Please sign in to comment.