Skip to content

Commit

Permalink
HBASE-26567 Remove IndexType from ChunkCreator (#3947)
Browse files Browse the repository at this point in the history
Signed-off-by: Duo Zhang <zhangduo@apache.org>
  • Loading branch information
comnetwork authored and Apache9 committed Jan 4, 2022
1 parent b77fb0f commit e50f499
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;

import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.HeapMemoryTuneObserver;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.util.StringUtils;
Expand Down Expand Up @@ -134,51 +135,36 @@ public static ChunkCreator getInstance() {
return instance;
}

/**
* Creates and inits a chunk. The default implementation for a specific chunk size.
* @return the chunk that was initialized
*/
Chunk getChunk(ChunkType chunkType) {
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP, chunkType);
}

/**
* Creates and inits a chunk. The default implementation.
* Creates and inits a data chunk. The default implementation.
* @return the chunk that was initialized
*/
Chunk getChunk() {
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP, ChunkType.DATA_CHUNK);
return getChunk(ChunkType.DATA_CHUNK);
}

/**
* Creates and inits a chunk. The default implementation for a specific index type.
* Creates and inits a chunk with specific type.
* @return the chunk that was initialized
*/
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
return getChunk(chunkIndexType, ChunkType.DATA_CHUNK);
}

/**
* Creates and inits a chunk with specific index type and type.
* @return the chunk that was initialized
*/
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType) {
Chunk getChunk(ChunkType chunkType) {
switch (chunkType) {
case INDEX_CHUNK:
if (indexChunksPool == null) {
if (indexChunkSize <= 0) {
throw new IllegalArgumentException(
"chunkType is INDEX_CHUNK but indexChunkSize is:[" + this.indexChunkSize + "]");
}
return getChunk(chunkIndexType, chunkType, indexChunkSize);
return getChunk(chunkType, indexChunkSize);
} else {
return getChunk(chunkIndexType, chunkType, indexChunksPool.getChunkSize());
return getChunk(chunkType, indexChunksPool.getChunkSize());
}
case DATA_CHUNK:
if (dataChunksPool == null) {
return getChunk(chunkIndexType, chunkType, chunkSize);
return getChunk(chunkType, chunkSize);
} else {
return getChunk(chunkIndexType, chunkType, dataChunksPool.getChunkSize());
return getChunk(chunkType, dataChunksPool.getChunkSize());
}
default:
throw new IllegalArgumentException(
Expand All @@ -189,10 +175,9 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType)
/**
* Creates and inits a chunk.
* @return the chunk that was initialized
* @param chunkIndexType whether the requested chunk is going to be used with CellChunkMap index
* @param size the size of the chunk to be allocated, in bytes
*/
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType, int size) {
Chunk getChunk(ChunkType chunkType, int size) {
Chunk chunk = null;
MemStoreChunkPool pool = null;

Expand All @@ -217,9 +202,7 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType,
}

if (chunk == null) {
// the second parameter explains whether CellChunkMap index is requested,
// in that case, put allocated on demand chunk mapping into chunkIdMap
chunk = createChunk(false, chunkIndexType, chunkType, size);
chunk = createChunk(false, chunkType, size);
}

// now we need to actually do the expensive memory allocation step in case of a new chunk,
Expand All @@ -240,22 +223,21 @@ Chunk getJumboChunk(int jumboSize) {
if (allocSize <= this.getChunkSize(ChunkType.DATA_CHUNK)) {
LOG.warn("Jumbo chunk size " + jumboSize + " must be more than regular chunk size "
+ this.getChunkSize(ChunkType.DATA_CHUNK) + ". Converting to regular chunk.");
return getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
return getChunk();
}
// the new chunk is going to hold the jumbo cell data and needs to be referenced by
// a strong map. Therefore the CCM index type
return getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK, allocSize);
// a strong map.
return getChunk(ChunkType.JUMBO_CHUNK, allocSize);
}

/**
* Creates the chunk either onheap or offheap
* @param pool indicates if the chunks have to be created which will be used by the Pool
* @param chunkIndexType whether the requested chunk is going to be used with CellChunkMap index
* @param chunkType whether the requested chunk is data chunk or index chunk.
* @param size the size of the chunk to be allocated, in bytes
* @return the chunk
*/
private Chunk createChunk(boolean pool, CompactingMemStore.IndexType chunkIndexType,
ChunkType chunkType, int size) {
private Chunk createChunk(boolean pool, ChunkType chunkType, int size) {
Chunk chunk = null;
int id = chunkID.getAndIncrement();
assert id > 0;
Expand All @@ -265,22 +247,39 @@ private Chunk createChunk(boolean pool, CompactingMemStore.IndexType chunkIndexT
} else {
chunk = new OnheapChunk(size, id, chunkType, pool);
}
if (pool || (chunkIndexType == CompactingMemStore.IndexType.CHUNK_MAP)) {
// put the pool chunk into the chunkIdMap so it is not GC-ed
this.chunkIdMap.put(chunk.getId(), chunk);
}

/**
* Here we always put the chunk into the {@link ChunkCreator#chunkIdMap} no matter whether the
* chunk is pooled or not. <br/>
* For {@link CompactingMemStore},because the chunk could only be acquired from
* {@link ChunkCreator} through {@link MemStoreLABImpl}, and
* {@link CompactingMemStore#indexType} could only be {@link IndexType.CHUNK_MAP} when using
* {@link MemStoreLABImpl}, so we must put chunk into this {@link ChunkCreator#chunkIdMap} to
* make sure the chunk could be got by chunkId.
* <p>
* For {@link DefaultMemStore},it is also reasonable to put the chunk in
* {@link ChunkCreator#chunkIdMap} because: <br/>
* 1.When the {@link MemStoreLAB} which created the chunk is not closed, this chunk is used by
* the {@link Segment} which references this {@link MemStoreLAB}, so this chunk certainly should
* not be GC-ed, putting the chunk in {@link ChunkCreator#chunkIdMap} does not prevent useless
* chunk to be GC-ed. <br/>
* 2.When the {@link MemStoreLAB} which created the chunk is closed, and if the chunk is not
* pooled, {@link ChunkCreator#removeChunk} is invoked to remove the chunk from this
* {@link ChunkCreator#chunkIdMap}, so there is no memory leak.
*/
this.chunkIdMap.put(chunk.getId(), chunk);

return chunk;
}

// Chunks from pool are created covered with strong references anyway
// TODO: change to CHUNK_MAP if it is generally defined
private Chunk createChunkForPool(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType,
// Chunks from pool are created covered with strong references anyway.
private Chunk createChunkForPool(ChunkType chunkType,
int chunkSize) {
if (chunkSize != dataChunksPool.getChunkSize() &&
chunkSize != indexChunksPool.getChunkSize()) {
return null;
}
return createChunk(true, chunkIndexType, chunkType, chunkSize);
return createChunk(true, chunkType, chunkSize);
}

// Used to translate the ChunkID into a chunk ref
Expand Down Expand Up @@ -346,7 +345,7 @@ private class MemStoreChunkPool implements HeapMemoryTuneObserver {
this.reclaimedChunks = new LinkedBlockingQueue<>();
for (int i = 0; i < initialCount; i++) {
Chunk chunk =
createChunk(true, CompactingMemStore.IndexType.ARRAY_MAP, chunkType, chunkSize);
createChunk(true, chunkType, chunkSize);
chunk.init();
reclaimedChunks.add(chunk);
}
Expand All @@ -368,10 +367,6 @@ private class MemStoreChunkPool implements HeapMemoryTuneObserver {
* @see #putbackChunks(Chunk)
*/
Chunk getChunk() {
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP);
}

Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
Chunk chunk = reclaimedChunks.poll();
if (chunk != null) {
chunk.reset();
Expand All @@ -382,7 +377,7 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
long created = this.chunkCount.get();
if (created < this.maxCount) {
if (this.chunkCount.compareAndSet(created, created + 1)) {
chunk = createChunkForPool(chunkIndexType, chunkType, chunkSize);
chunk = createChunkForPool(chunkType, chunkSize);
break;
}
} else {
Expand Down Expand Up @@ -559,7 +554,7 @@ int getPoolSize(ChunkType chunkType) {

boolean isChunkInPool(int chunkId) {
Chunk c = getChunk(chunkId);
if (c==null) {
if (c == null) {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.hadoop.hbase.ExtendedCell;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.nio.RefCnt;
import org.apache.hadoop.hbase.regionserver.CompactingMemStore.IndexType;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -42,27 +43,28 @@
/**
* A memstore-local allocation buffer.
* <p>
* The MemStoreLAB is basically a bump-the-pointer allocator that allocates
* big (2MB) byte[] chunks from and then doles it out to threads that request
* slices into the array.
* The MemStoreLAB is basically a bump-the-pointer allocator that allocates big (2MB) byte[] chunks
* from and then doles it out to threads that request slices into the array.
* <p>
* The purpose of this class is to combat heap fragmentation in the
* regionserver. By ensuring that all Cells in a given memstore refer
* only to large chunks of contiguous memory, we ensure that large blocks
* get freed up when the memstore is flushed.
* The purpose of this class is to combat heap fragmentation in the regionserver. By ensuring that
* all Cells in a given memstore refer only to large chunks of contiguous memory, we ensure that
* large blocks get freed up when the memstore is flushed.
* <p>
* Without the MSLAB, the byte array allocated during insertion end up
* interleaved throughout the heap, and the old generation gets progressively
* more fragmented until a stop-the-world compacting collection occurs.
* Without the MSLAB, the byte array allocated during insertion end up interleaved throughout the
* heap, and the old generation gets progressively more fragmented until a stop-the-world compacting
* collection occurs.
* <p>
* TODO: we should probably benchmark whether word-aligning the allocations
* would provide a performance improvement - probably would speed up the
* Bytes.toLong/Bytes.toInt calls in KeyValue, but some of those are cached
* anyway.
* The chunks created by this MemStoreLAB can get pooled at {@link ChunkCreator}.
* When the Chunk comes from pool, it can be either an on heap or an off heap backed chunk. The chunks,
* which this MemStoreLAB creates on its own (when no chunk available from pool), those will be
* always on heap backed.
* TODO: we should probably benchmark whether word-aligning the allocations would provide a
* performance improvement - probably would speed up the Bytes.toLong/Bytes.toInt calls in KeyValue,
* but some of those are cached anyway. The chunks created by this MemStoreLAB can get pooled at
* {@link ChunkCreator}. When the Chunk comes from pool, it can be either an on heap or an off heap
* backed chunk. The chunks, which this MemStoreLAB creates on its own (when no chunk available from
* pool), those will be always on heap backed.
* <p>
* NOTE:if user requested to work with MSLABs (whether on- or off-heap), in
* {@link CompactingMemStore} ctor, the {@link CompactingMemStore#indexType} could only be
* {@link IndexType#CHUNK_MAP},that is to say the immutable segments using MSLABs are going to use
* {@link CellChunkMap} as their index.
*/
@InterfaceAudience.Private
public class MemStoreLABImpl implements MemStoreLAB {
Expand All @@ -78,7 +80,6 @@ public class MemStoreLABImpl implements MemStoreLAB {
private final int dataChunkSize;
private final int maxAlloc;
private final ChunkCreator chunkCreator;
private final CompactingMemStore.IndexType idxType; // what index is used for corresponding segment

// This flag is for closing this instance, its set when clearing snapshot of
// memstore
Expand All @@ -104,13 +105,11 @@ public MemStoreLABImpl(Configuration conf) {
// if we don't exclude allocations >CHUNK_SIZE, we'd infiniteloop on one!
Preconditions.checkArgument(maxAlloc <= dataChunkSize,
MAX_ALLOC_KEY + " must be less than " + CHUNK_SIZE_KEY);

this.refCnt = RefCnt.create(() -> {
recycleChunks();
});

// if user requested to work with MSLABs (whether on- or off-heap), then the
// immutable segments are going to use CellChunkMap as their index
idxType = CompactingMemStore.IndexType.CHUNK_MAP;
}

@Override
Expand Down Expand Up @@ -339,7 +338,7 @@ private Chunk getOrMakeChunk() {
if (c != null) {
return c;
}
c = this.chunkCreator.getChunk(idxType);
c = this.chunkCreator.getChunk();
if (c != null) {
// set the curChunk. No need of CAS as only one thread will be here
currChunk.set(c);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,8 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {

// allocate new chunks and use the data chunk to hold the full data of the cells
// and the index chunk to hold the cell-representations
Chunk dataChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
Chunk idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
Chunk dataChunk = chunkCreator.getChunk();
Chunk idxChunk = chunkCreator.getChunk();
// the array of index chunks to be used as a basis for CellChunkMap
Chunk chunkArray[] = new Chunk[8]; // according to test currently written 8 is way enough
int chunkArrayIdx = 0;
Expand All @@ -300,7 +300,7 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {
// do we have enough space to write the cell data on the data chunk?
if (dataOffset + kv.getSerializedSize() > chunkCreator.getChunkSize()) {
// allocate more data chunks if needed
dataChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
dataChunk = chunkCreator.getChunk();
dataBuffer = dataChunk.getData();
dataOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
}
Expand All @@ -310,7 +310,7 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {
// do we have enough space to write the cell-representation on the index chunk?
if (idxOffset + ClassSize.CELL_CHUNK_MAP_ENTRY > chunkCreator.getChunkSize()) {
// allocate more index chunks if needed
idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
idxChunk = chunkCreator.getChunk();
idxBuffer = idxChunk.getData();
idxOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
chunkArray[chunkArrayIdx++] = idxChunk;
Expand All @@ -331,10 +331,10 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
// allocate new chunks and use the data JUMBO chunk to hold the full data of the cells
// and the normal index chunk to hold the cell-representations
Chunk dataJumboChunk =
chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK,
chunkCreator.getChunk(ChunkType.JUMBO_CHUNK,
smallChunkSize);
assertTrue(dataJumboChunk.isJumbo());
Chunk idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
Chunk idxChunk = chunkCreator.getChunk();
// the array of index chunks to be used as a basis for CellChunkMap
Chunk[] chunkArray = new Chunk[8]; // according to test currently written 8 is way enough
int chunkArrayIdx = 0;
Expand All @@ -354,7 +354,7 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
// do we have enough space to write the cell-representation on the index chunk?
if (idxOffset + ClassSize.CELL_CHUNK_MAP_ENTRY > chunkCreator.getChunkSize()) {
// allocate more index chunks if needed
idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
idxChunk = chunkCreator.getChunk();
idxBuffer = idxChunk.getData();
idxOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
chunkArray[chunkArrayIdx++] = idxChunk;
Expand All @@ -368,7 +368,7 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
// Jumbo chunks are working only with one cell per chunk, thus always allocate a new jumbo
// data chunk for next cell
dataJumboChunk =
chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK,
chunkCreator.getChunk(ChunkType.JUMBO_CHUNK,
smallChunkSize);
assertTrue(dataJumboChunk.isJumbo());
dataBuffer = dataJumboChunk.getData();
Expand Down
Loading

0 comments on commit e50f499

Please sign in to comment.