Skip to content

Commit 621d0cf

Browse files
authored
Merge pull request #37 from guptask/cptr_tiers
Unified Compressed Pointer
2 parents 9bb4db8 + 15e4259 commit 621d0cf

File tree

10 files changed

+75
-71
lines changed

10 files changed

+75
-71
lines changed

cachelib/allocator/CCacheAllocator.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator,
3636
currentChunksIndex_(0) {
3737
auto& currentChunks = chunks_[currentChunksIndex_];
3838
for (auto chunk : *object.chunks()) {
39-
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk)));
39+
// TODO : pass multi-tier flag when compact cache supports multi-tier config
40+
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk), false));
4041
}
4142
}
4243

@@ -97,7 +98,8 @@ CCacheAllocator::SerializationType CCacheAllocator::saveState() {
9798

9899
std::lock_guard<std::mutex> guard(resizeLock_);
99100
for (auto chunk : getCurrentChunks()) {
100-
object.chunks()->push_back(allocator_.compress(chunk).saveState());
101+
// TODO : pass multi-tier flag when compact cache supports multi-tier config
102+
object.chunks()->push_back(allocator_.compress(chunk, false).saveState());
101103
}
102104
return object;
103105
}

cachelib/allocator/CacheAllocator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,8 +1362,8 @@ class CacheAllocator : public CacheBase {
13621362
sizeof(typename RefcountWithFlags::Value) + sizeof(uint32_t) +
13631363
sizeof(uint32_t) + sizeof(KAllocation)) == sizeof(Item),
13641364
"vtable overhead");
1365-
// XXX: this will fail due to CompressedPtr change
1366-
// static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
1365+
// Check for CompressedPtr single/multi tier support
1366+
static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
13671367

13681368
// make sure there is no overhead in ChainedItem on top of a regular Item
13691369
static_assert(sizeof(Item) == sizeof(ChainedItem),

cachelib/allocator/memory/CompressedPtr.h

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -30,21 +30,22 @@ class SlabAllocator;
3030
template <typename PtrType, typename AllocatorContainer>
3131
class PtrCompressor;
3232

33-
// the following are for pointer compression for the memory allocator. We
34-
// compress pointers by storing the slab index and the alloc index of the
35-
// allocation inside the slab. With slab worth kNumSlabBits of data, if we
36-
// have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
37-
// bits for storing the alloc index. This leaves the remaining (32 -
38-
// (kNumSlabBits - 6)) bits for the slab index. Hence we can index 256 GiB
39-
// of memory in slabs and index anything more than 64 byte allocations inside
40-
// the slab using a 32 bit representation.
41-
//
4233
// This CompressedPtr makes decompression fast by staying away from division and
43-
// modulo arithmetic and doing those during the compression time. We most often
44-
// decompress a CompressedPtr than compress a pointer while creating one.
34+
// modulo arithmetic and doing those during the compression time. We most often
35+
// decompress a CompressedPtr than compress a pointer while creating one. This
36+
// is used for pointer compression by the memory allocator.
37+
38+
// We compress pointers by storing the tier index, slab index and alloc index of
39+
// the allocation inside the slab. With slab worth kNumSlabBits (22 bits) of data,
40+
// if we have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
41+
// = 16 bits for storing the alloc index. The tier id occupies the 32nd bit only
42+
// since its value cannot exceed kMaxTiers (2). This leaves the remaining
43+
// (32 - (kNumSlabBits - 6) - 1 bit for tier id) = 15 bits for the slab index.
44+
// Hence we can index 128 GiB of memory in slabs per tier and index anything more
45+
// than 64 byte allocations inside the slab using a 32 bit representation.
4546
class CACHELIB_PACKED_ATTR CompressedPtr {
4647
public:
47-
using PtrType = uint64_t;
48+
using PtrType = uint32_t;
4849
// Thrift doesn't support unsigned type
4950
using SerializedPtrType = int64_t;
5051

@@ -65,9 +66,9 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
6566
return static_cast<uint32_t>(1) << (Slab::kMinAllocPower);
6667
}
6768

68-
// maximum adressable memory for pointer compression to work.
69+
// maximum addressable memory for pointer compression to work.
6970
static constexpr size_t getMaxAddressableSize() noexcept {
70-
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits);
71+
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits + 1);
7172
}
7273

7374
// default construct to nullptr.
@@ -92,8 +93,8 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
9293
PtrType ptr_{kNull};
9394

9495
// create a compressed pointer for a valid memory allocation.
95-
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, TierId tid = 0)
96-
: ptr_(compress(slabIdx, allocIdx, tid)) {}
96+
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, bool isMultiTiered, TierId tid = 0)
97+
: ptr_(compress(slabIdx, allocIdx, isMultiTiered, tid)) {}
9798

9899
constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {}
99100

@@ -103,45 +104,48 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
103104
static constexpr unsigned int kNumAllocIdxBits =
104105
Slab::kNumSlabBits - Slab::kMinAllocPower;
105106

106-
// Use topmost 32 bits for TierId
107-
// XXX: optimize
108-
static constexpr unsigned int kNumTierIdxOffset = 32;
107+
// Use 32nd bit position for TierId
108+
static constexpr unsigned int kNumTierIdxOffset = 31;
109109

110110
static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1;
111111

112112
// kNumTierIdxBits most significant bits
113-
static constexpr PtrType kTierIdxMask = (((PtrType)1 << kNumTierIdxOffset) - 1) << (NumBits<PtrType>::value - kNumTierIdxOffset);
113+
static constexpr PtrType kTierIdxMask = (PtrType)1 << kNumTierIdxOffset;
114114

115115
// Number of bits for the slab index. This will be the top 16 bits of the
116116
// compressed ptr.
117117
static constexpr unsigned int kNumSlabIdxBits =
118-
NumBits<PtrType>::value - kNumTierIdxOffset - kNumAllocIdxBits;
118+
kNumTierIdxOffset - kNumAllocIdxBits;
119119

120120
// Compress the given slabIdx and allocIdx into a 64-bit compressed
121121
// pointer.
122-
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, TierId tid) noexcept {
122+
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, bool isMultiTiered, TierId tid) noexcept {
123123
XDCHECK_LE(allocIdx, kAllocIdxMask);
124+
if (!isMultiTiered) {
125+
XDCHECK_LT(slabIdx, (1u << (kNumSlabIdxBits+1)) - 1);
126+
return (slabIdx << kNumAllocIdxBits) + allocIdx;
127+
}
124128
XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1);
125129
return (static_cast<uint64_t>(tid) << kNumTierIdxOffset) + (slabIdx << kNumAllocIdxBits) + allocIdx;
126130
}
127131

128132
// Get the slab index of the compressed ptr
129-
uint32_t getSlabIdx() const noexcept {
133+
uint32_t getSlabIdx(bool isMultiTiered) const noexcept {
130134
XDCHECK(!isNull());
131-
auto noTierIdPtr = ptr_ & ~kTierIdxMask;
135+
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
132136
return static_cast<uint32_t>(noTierIdPtr >> kNumAllocIdxBits);
133137
}
134138

135139
// Get the allocation index of the compressed ptr
136-
uint32_t getAllocIdx() const noexcept {
140+
uint32_t getAllocIdx(bool isMultiTiered) const noexcept {
137141
XDCHECK(!isNull());
138-
auto noTierIdPtr = ptr_ & ~kTierIdxMask;
142+
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
139143
return static_cast<uint32_t>(noTierIdPtr & kAllocIdxMask);
140144
}
141145

142-
uint32_t getTierId() const noexcept {
146+
uint32_t getTierId(bool isMultiTiered) const noexcept {
143147
XDCHECK(!isNull());
144-
return static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset);
148+
return isMultiTiered ? static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset) : 0;
145149
}
146150

147151
void setTierId(TierId tid) noexcept {
@@ -160,11 +164,11 @@ class SingleTierPtrCompressor {
160164
: allocator_(allocator) {}
161165

162166
const CompressedPtr compress(const PtrType* uncompressed) const {
163-
return allocator_.compress(uncompressed);
167+
return allocator_.compress(uncompressed, false);
164168
}
165169

166170
PtrType* unCompress(const CompressedPtr compressed) const {
167-
return static_cast<PtrType*>(allocator_.unCompress(compressed));
171+
return static_cast<PtrType*>(allocator_.unCompress(compressed, false));
168172
}
169173

170174
bool operator==(const SingleTierPtrCompressor& rhs) const noexcept {
@@ -196,19 +200,21 @@ class PtrCompressor {
196200
break;
197201
}
198202

199-
auto cptr = allocators_[tid]->compress(uncompressed);
200-
cptr.setTierId(tid);
201-
203+
bool isMultiTiered = allocators_.size() > 1;
204+
auto cptr = allocators_[tid]->compress(uncompressed, isMultiTiered);
205+
if (isMultiTiered) { // config has multiple tiers
206+
cptr.setTierId(tid);
207+
}
202208
return cptr;
203209
}
204210

205211
PtrType* unCompress(const CompressedPtr compressed) const {
206212
if (compressed.isNull()) {
207213
return nullptr;
208214
}
209-
210-
auto &allocator = *allocators_[compressed.getTierId()];
211-
return static_cast<PtrType*>(allocator.unCompress(compressed));
215+
bool isMultiTiered = allocators_.size() > 1;
216+
auto &allocator = *allocators_[compressed.getTierId(isMultiTiered)];
217+
return static_cast<PtrType*>(allocator.unCompress(compressed, isMultiTiered));
212218
}
213219

214220
bool operator==(const PtrCompressor& rhs) const noexcept {

cachelib/allocator/memory/MemoryAllocator.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,8 @@ class MemoryAllocator {
543543
// as the original pointer is valid.
544544
//
545545
// @throw std::invalid_argument if the ptr is invalid.
546-
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
547-
return slabAllocator_.compress(ptr);
546+
CompressedPtr CACHELIB_INLINE compress(const void* ptr, bool isMultiTiered) const {
547+
return slabAllocator_.compress(ptr, isMultiTiered);
548548
}
549549

550550
// retrieve the raw pointer corresponding to the compressed pointer. This is
@@ -555,8 +555,8 @@ class MemoryAllocator {
555555
// @return the raw pointer corresponding to this compressed pointer.
556556
//
557557
// @throw std::invalid_argument if the compressed pointer is invalid.
558-
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr) const {
559-
return slabAllocator_.unCompress(cPtr);
558+
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr, bool isMultiTiered) const {
559+
return slabAllocator_.unCompress(cPtr, isMultiTiered);
560560
}
561561

562562
// a special implementation of pointer compression for benchmarking purposes.

cachelib/allocator/memory/SlabAllocator.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class SlabAllocator {
225225
// the corresponding memory allocator. trying to inline this just increases
226226
// the code size and does not move the needle on the benchmarks much.
227227
// Calling this with invalid input in optimized build is undefined behavior.
228-
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
228+
CompressedPtr CACHELIB_INLINE compress(const void* ptr, bool isMultiTiered) const {
229229
if (ptr == nullptr) {
230230
return CompressedPtr{};
231231
}
@@ -246,19 +246,19 @@ class SlabAllocator {
246246
static_cast<uint32_t>(reinterpret_cast<const uint8_t*>(ptr) -
247247
reinterpret_cast<const uint8_t*>(slab)) /
248248
allocSize;
249-
return CompressedPtr{slabIndex, allocIdx};
249+
return CompressedPtr{slabIndex, allocIdx, isMultiTiered};
250250
}
251251

252252
// uncompress the point and return the raw ptr. This function never throws
253253
// in optimized build and assumes that the caller is responsible for calling
254254
// it with a valid compressed pointer.
255-
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr) const {
255+
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr, bool isMultiTiered) const {
256256
if (ptr.isNull()) {
257257
return nullptr;
258258
}
259259

260-
const SlabIdx slabIndex = ptr.getSlabIdx();
261-
const uint32_t allocIdx = ptr.getAllocIdx();
260+
const SlabIdx slabIndex = ptr.getSlabIdx(isMultiTiered);
261+
const uint32_t allocIdx = ptr.getAllocIdx(isMultiTiered);
262262
const Slab* slab = &slabMemoryStart_[slabIndex];
263263

264264
#ifndef NDEBUG

cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,13 +401,13 @@ TEST_F(MemoryAllocatorTest, PointerCompression) {
401401
for (const auto& pool : poolAllocs) {
402402
const auto& allocs = pool.second;
403403
for (const auto* alloc : allocs) {
404-
CompressedPtr ptr = m.compress(alloc);
404+
CompressedPtr ptr = m.compress(alloc, false);
405405
ASSERT_FALSE(ptr.isNull());
406-
ASSERT_EQ(alloc, m.unCompress(ptr));
406+
ASSERT_EQ(alloc, m.unCompress(ptr, false));
407407
}
408408
}
409409

410-
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr)));
410+
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr, false), false));
411411
}
412412

413413
TEST_F(MemoryAllocatorTest, Restorable) {

cachelib/allocator/tests/AllocatorMemoryTiersTest.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
136136
stats = allocator->getGlobalCacheStats();
137137
slabStats = allocator->getAllocationClassStats(0,0,cid);
138138
}
139-
ASSERT_GE(slabStats.approxFreePercent,9.5);
140139

141140
auto perclassEstats = allocator->getBackgroundMoverClassStats(MoverDir::Evict);
142141
auto perclassPstats = allocator->getBackgroundMoverClassStats(MoverDir::Promote);

cachelib/allocator/tests/BaseAllocatorTest.h

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4928,13 +4928,13 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
49284928
/* TODO: we adjust alloc size by -20 or -40 due to increased CompressedPtr size */
49294929
auto allocateItem1 =
49304930
std::async(std::launch::async, allocFn, std::string{"hello"},
4931-
std::vector<uint32_t>{100 - 20, 500, 1000});
4931+
std::vector<uint32_t>{100, 500, 1000});
49324932
auto allocateItem2 =
49334933
std::async(std::launch::async, allocFn, std::string{"world"},
4934-
std::vector<uint32_t>{200- 40, 1000, 2000});
4934+
std::vector<uint32_t>{200, 1000, 2000});
49354935
auto allocateItem3 =
49364936
std::async(std::launch::async, allocFn, std::string{"yolo"},
4937-
std::vector<uint32_t>{100-20, 200, 5000});
4937+
std::vector<uint32_t>{100, 200, 5000});
49384938

49394939
auto slabRelease = std::async(releaseFn);
49404940
slabRelease.wait();
@@ -5772,9 +5772,7 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
57725772
AllocatorT alloc(config);
57735773
const size_t numBytes = alloc.getCacheMemoryStats().cacheSize;
57745774
const auto poolSize = numBytes / 2;
5775-
// TODO: becasue CompressedPtr size is increased, key1 must be of equal
5776-
// size with key2
5777-
std::string key1 = "key1";
5775+
std::string key1 = "key1-some-random-string-here";
57785776
auto poolId = alloc.addPool("one", poolSize, {} /* allocSizes */, mmConfig);
57795777
auto handle1 = alloc.allocate(poolId, key1, 1);
57805778
alloc.insert(handle1);
@@ -5831,37 +5829,35 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
58315829
auto poolId = alloc.addPool("one", poolSize, {} /* allocSizes */, mmConfig);
58325830
auto handle1 = alloc.allocate(poolId, key1, 1);
58335831
alloc.insert(handle1);
5834-
// TODO: key2 must be the same length as the rest due to increased
5835-
// CompressedPtr size
5836-
auto handle2 = alloc.allocate(poolId, "key2-some-random-string-here", 1);
5832+
auto handle2 = alloc.allocate(poolId, "key2", 1);
58375833
alloc.insert(handle2);
5838-
ASSERT_NE(alloc.find("key2-some-random-string-here"), nullptr);
5834+
ASSERT_NE(alloc.find("key2"), nullptr);
58395835
sleep(9);
58405836

58415837
ASSERT_NE(alloc.find(key1), nullptr);
58425838
auto tail = alloc.dumpEvictionIterator(
5843-
poolId, 1 /* second allocation class, TODO: CompressedPtr */, 3 /* last 3 items */);
5839+
poolId, 0 /* first allocation class */, 3 /* last 3 items */);
58445840
// item 1 gets promoted (age 9), tail age 9, lru refresh time 3 (default)
58455841
EXPECT_TRUE(checkItemKey(tail[1], key1));
58465842

58475843
auto handle3 = alloc.allocate(poolId, key3, 1);
58485844
alloc.insert(handle3);
58495845

58505846
sleep(6);
5851-
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
5847+
tail = alloc.dumpEvictionIterator(poolId, 0 /* first allocation class */,
58525848
3 /* last 3 items */);
58535849
ASSERT_NE(alloc.find(key3), nullptr);
5854-
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
5850+
tail = alloc.dumpEvictionIterator(poolId, 0 /* first allocation class */,
58555851
3 /* last 3 items */);
58565852
// tail age 15, lru refresh time 6 * 0.7 = 4.2 = 4,
58575853
// item 3 age 6 gets promoted
58585854
EXPECT_TRUE(checkItemKey(tail[1], key1));
58595855

5860-
alloc.remove("key2-some-random-string-here");
5856+
alloc.remove("key2");
58615857
sleep(3);
58625858

58635859
ASSERT_NE(alloc.find(key3), nullptr);
5864-
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
5860+
tail = alloc.dumpEvictionIterator(poolId, 0 /* second allocation class */,
58655861
2 /* last 2 items */);
58665862
// tail age 9, lru refresh time 4, item 3 age 3, not promoted
58675863
EXPECT_TRUE(checkItemKey(tail[1], key3));

cachelib/benchmarks/PtrCompressionBench.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ void buildAllocs(size_t poolSize) {
6161
void* alloc = ma->allocate(pid, size);
6262
XDCHECK_GE(size, CompressedPtr::getMinAllocSize());
6363
if (alloc != nullptr) {
64-
validAllocs.push_back({alloc, ma->compress(alloc)});
64+
validAllocs.push_back({alloc, ma->compress(alloc, false)});
6565
validAllocsAlt.push_back({alloc, ma->compressAlt(alloc)});
6666
numAllocations++;
6767
}
@@ -83,7 +83,7 @@ BENCHMARK(CompressionAlt) {
8383

8484
BENCHMARK_RELATIVE(Compression) {
8585
for (const auto& alloc : validAllocs) {
86-
CompressedPtr c = m->compress(alloc.first);
86+
CompressedPtr c = m->compress(alloc.first, false);
8787
folly::doNotOptimizeAway(c);
8888
}
8989
}
@@ -97,7 +97,7 @@ BENCHMARK(DeCompressAlt) {
9797

9898
BENCHMARK_RELATIVE(DeCompress) {
9999
for (const auto& alloc : validAllocs) {
100-
void* ptr = m->unCompress(alloc.second);
100+
void* ptr = m->unCompress(alloc.second, false);
101101
folly::doNotOptimizeAway(ptr);
102102
}
103103
}

run_tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
# Newline separated list of tests to ignore
44
BLACKLIST="allocator-test-NavySetupTest
5+
allocator-test-NvmCacheTests
56
shm-test-test_page_size"
67

78
if [ "$1" == "long" ]; then

0 commit comments

Comments
 (0)