Skip to content

Commit bf02f04

Browse files
committed
Fix race in acquire
The assumption for moving items was that once item is unmarked no one can add new waiters for that item. However, since incrementing item ref count was not done under the MoveMap lock, there was a race: item could have been unmarked right after incRef returned incFailedMoving.
1 parent 08bf0b4 commit bf02f04

File tree

2 files changed

+24
-15
lines changed

2 files changed

+24
-15
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,17 +1023,21 @@ CacheAllocator<CacheTrait>::acquire(Item* it) {
10231023

10241024
SCOPE_FAIL { stats_.numRefcountOverflow.inc(); };
10251025

1026-
// TODO: do not block incRef for child items to avoid deadlock
1027-
auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
1028-
auto incRes = incRef(*it, failIfMoving);
1029-
if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
1030-
return WriteHandle{it, *this};
1031-
} else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
1032-
// item is being evicted
1033-
return WriteHandle{};
1034-
} else {
1035-
// item is being moved - wait for completion
1036-
return handleWithWaitContextForMovingItem(*it);
1026+
while (true) {
1027+
// TODO: do not block incRef for child items to avoid deadlock
1028+
auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
1029+
auto incRes = incRef(*it, failIfMoving);
1030+
if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
1031+
return WriteHandle{it, *this};
1032+
} else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
1033+
// item is being evicted
1034+
return WriteHandle{};
1035+
} else {
1036+
// item is being moved - wait for completion
1037+
WriteHandle handle;
1038+
if (tryGetHandleWithWaitContextForMovingItem(*it, handle))
1039+
return handle;
1040+
}
10371041
}
10381042
}
10391043

@@ -1255,20 +1259,25 @@ CacheAllocator<CacheTrait>::insertOrReplace(const WriteHandle& handle) {
12551259
* 3. Wait until the moving thread will complete its job.
12561260
*/
12571261
template <typename CacheTrait>
1258-
typename CacheAllocator<CacheTrait>::WriteHandle
1259-
CacheAllocator<CacheTrait>::handleWithWaitContextForMovingItem(Item& item) {
1262+
bool
1263+
CacheAllocator<CacheTrait>::tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle) {
12601264
auto shard = getShardForKey(item.getKey());
12611265
auto& movesMap = getMoveMapForShard(shard);
12621266
{
12631267
auto lock = getMoveLockForShard(shard);
12641268

1269+
// item might have been evicted or moved before the lock was acquired
1270+
if (!item.isMoving())
1271+
return false;
1272+
12651273
WriteHandle hdl{*this};
12661274
auto waitContext = hdl.getItemWaitContext();
12671275

12681276
auto ret = movesMap.try_emplace(item.getKey(), std::make_unique<MoveCtx>());
12691277
ret.first->second->addWaiter(std::move(waitContext));
12701278

1271-
return hdl;
1279+
handle = std::move(hdl);
1280+
return true;
12721281
}
12731282
}
12741283

cachelib/allocator/CacheAllocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2250,7 +2250,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
22502250

22512251
size_t memoryTierSize(TierId tid) const;
22522252

2253-
WriteHandle handleWithWaitContextForMovingItem(Item& item);
2253+
bool tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle);
22542254

22552255
size_t wakeUpWaitersLocked(folly::StringPiece key, WriteHandle&& handle);
22562256

0 commit comments

Comments
 (0)