-
Notifications
You must be signed in to change notification settings - Fork 3.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HBASE-27264 Add options to consider compressed size when delimiting b…
…locks during hfile writes (#4675) Signed-off-by: Tak Lon (Stephen) Wu <taklwu@apache.org> Signed-off-by: Ankit Singhal <ankit@apache.org>
- Loading branch information
1 parent
aaad3a7
commit 3acf920
Showing
6 changed files
with
283 additions
and
6 deletions.
There are no files selected for viewing
59 changes: 59 additions & 0 deletions
59
...-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCompressedSizePredicator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.hadoop.hbase.io.hfile; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
/** | ||
* Allows for defining different compression rate predicates on its implementing classes. Useful | ||
* when compression is in place, and we want to define block size based on the compressed size, | ||
* rather than the default behaviour that considers the uncompressed size only. Since we don't | ||
* actually know the compressed size until we actual apply compression in the block byte buffer, we | ||
* need to "predicate" this compression rate and minimize compression execution to avoid excessive | ||
* resources usage. Different approaches for predicating the compressed block size can be defined by | ||
* implementing classes. The <code>updateLatestBlockSizes</code> allows for updating uncompressed | ||
* and compressed size values, and is called during block finishing (when we finally apply | ||
* compression on the block data). Final block size predicate logic is implemented in | ||
* <code>shouldFinishBlock</code>, which is called by the block writer once uncompressed size has | ||
* reached the configured BLOCK size, and additional checks should be applied to decide if the block | ||
* can be finished. | ||
*/ | ||
@InterfaceAudience.Private | ||
public interface BlockCompressedSizePredicator { | ||
|
||
String BLOCK_COMPRESSED_SIZE_PREDICATOR = "hbase.block.compressed.size.predicator"; | ||
|
||
String MAX_BLOCK_SIZE_UNCOMPRESSED = "hbase.block.max.size.uncompressed"; | ||
|
||
/** | ||
* Updates the predicator with both compressed and uncompressed sizes of latest block written. To | ||
* be called once the block is finshed and flushed to disk after compression. | ||
* @param context the HFileContext containg the configured max block size. | ||
* @param uncompressed the uncompressed size of last block written. | ||
* @param compressed the compressed size of last block written. | ||
*/ | ||
void updateLatestBlockSizes(HFileContext context, int uncompressed, int compressed); | ||
|
||
/** | ||
* Decides if the block should be finished based on the comparison of its uncompressed size | ||
* against an adjusted size based on a predicated compression factor. | ||
* @param uncompressed true if the block should be finished. n | ||
*/ | ||
boolean shouldFinishBlock(int uncompressed); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
62 changes: 62 additions & 0 deletions
62
...rc/main/java/org/apache/hadoop/hbase/io/hfile/PreviousBlockCompressionRatePredicator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.hadoop.hbase.io.hfile; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
/** | ||
* This BlockCompressedSizePredicator implementation adjusts the block size limit based on the | ||
* compression rate of the block contents read so far. For the first block, adjusted size would be | ||
* zero, so it performs a compression of current block contents and calculate compression rate and | ||
* adjusted size. For subsequent blocks, decision whether the block should be finished or not will | ||
* be based on the compression rate calculated for the previous block. | ||
*/ | ||
@InterfaceAudience.Private | ||
public class PreviousBlockCompressionRatePredicator implements BlockCompressedSizePredicator { | ||
|
||
private int adjustedBlockSize; | ||
private int compressionRatio = 1; | ||
private int configuredMaxBlockSize; | ||
|
||
/** | ||
* Recalculates compression rate for the last block and adjusts the block size limit as: | ||
* BLOCK_SIZE * (uncompressed/compressed). | ||
* @param context HFIleContext containing the configured max block size. | ||
* @param uncompressed the uncompressed size of last block written. | ||
* @param compressed the compressed size of last block written. | ||
*/ | ||
@Override | ||
public void updateLatestBlockSizes(HFileContext context, int uncompressed, int compressed) { | ||
configuredMaxBlockSize = context.getBlocksize(); | ||
compressionRatio = uncompressed / compressed; | ||
adjustedBlockSize = context.getBlocksize() * compressionRatio; | ||
} | ||
|
||
/** | ||
* Returns <b>true</b> if the passed uncompressed size is larger than the limit calculated by | ||
* <code>updateLatestBlockSizes</code>. | ||
* @param uncompressed true if the block should be finished. n | ||
*/ | ||
@Override | ||
public boolean shouldFinishBlock(int uncompressed) { | ||
if (uncompressed >= configuredMaxBlockSize) { | ||
return uncompressed >= adjustedBlockSize; | ||
} | ||
return false; | ||
} | ||
} |
49 changes: 49 additions & 0 deletions
49
...erver/src/main/java/org/apache/hadoop/hbase/io/hfile/UncompressedBlockSizePredicator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.hadoop.hbase.io.hfile; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
/** | ||
* This BlockCompressedSizePredicator implementation doesn't actually performs any predicate and | ||
* simply returns <b>true</b> on <code>shouldFinishBlock</code>. This is the default implementation | ||
* if <b>hbase.block.compressed.size.predicator</b> property is not defined. | ||
*/ | ||
@InterfaceAudience.Private | ||
public class UncompressedBlockSizePredicator implements BlockCompressedSizePredicator { | ||
|
||
/** | ||
* Empty implementation. Does nothing. | ||
* @param uncompressed the uncompressed size of last block written. | ||
* @param compressed the compressed size of last block written. | ||
*/ | ||
@Override | ||
public void updateLatestBlockSizes(HFileContext context, int uncompressed, int compressed) { | ||
} | ||
|
||
/** | ||
* Dummy implementation that always returns true. This means, we will be only considering the | ||
* block uncompressed size for deciding when to finish a block. | ||
* @param uncompressed true if the block should be finished. n | ||
*/ | ||
@Override | ||
public boolean shouldFinishBlock(int uncompressed) { | ||
return true; | ||
} | ||
|
||
} |
Oops, something went wrong.