-
Notifications
You must be signed in to change notification settings - Fork 3.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HBASE-25869 WAL value compression #3244
Merged
Merged
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
bb4a06b
HBASE-25869 WAL value compression
apurtell 34698a6
- We were fully flushing the deflater at the end of every value
apurtell f678ce6
Address first round of review feedback
apurtell 1924a4f
Remove invalid unit test after latest changes
apurtell 7f20cd7
WALCellCodec#compressValue should properly handle the case where our …
apurtell 86f7a57
Address another round of review feedback
apurtell 47b392a
Switch to Hadoop compression codecs.
apurtell 80d346a
Fix findbugs warning in CompressionContext
apurtell b032da3
Address final round of review feedback
apurtell a0a3384
Extend DelegatingInputStream from FilterInputStream
apurtell f2202ac
Avoid a copy during decompression with new BoundedDelegatingInputStream.
apurtell e4f8f7f
Fix whitespace and javadoc formatting nits in latest precommit report
apurtell File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
111 changes: 111 additions & 0 deletions
111
hbase-common/src/main/java/org/apache/hadoop/hbase/io/BoundedDelegatingInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hadoop.hbase.io; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
/** | ||
* This is a stream that will only supply bytes from its delegate up to a certain limit. | ||
* When there is an attempt to set the position beyond that it will signal that the input | ||
* is finished. | ||
*/ | ||
@InterfaceAudience.Private | ||
public class BoundedDelegatingInputStream extends DelegatingInputStream { | ||
|
||
protected long limit; | ||
protected long pos; | ||
|
||
public BoundedDelegatingInputStream(InputStream in, long limit) { | ||
super(in); | ||
this.limit = limit; | ||
this.pos = 0; | ||
} | ||
|
||
public void setDelegate(InputStream in, long limit) { | ||
this.in = in; | ||
this.limit = limit; | ||
this.pos = 0; | ||
} | ||
|
||
/** | ||
* Call the delegate's {@code read()} method if the current position is less than the limit. | ||
* @return the byte read or -1 if the end of stream or the limit has been reached. | ||
*/ | ||
@Override | ||
public int read() throws IOException { | ||
if (pos >= limit) { | ||
return -1; | ||
} | ||
int result = in.read(); | ||
pos++; | ||
return result; | ||
} | ||
|
||
/** | ||
* Call the delegate's {@code read(byte[], int, int)} method if the current position is less | ||
* than the limit. | ||
* @param b read buffer | ||
* @param off Start offset | ||
* @param len The number of bytes to read | ||
* @return the number of bytes read or -1 if the end of stream or the limit has been reached. | ||
*/ | ||
@Override | ||
public int read(final byte[] b, final int off, final int len) throws IOException { | ||
if (pos >= limit) { | ||
return -1; | ||
} | ||
long readLen = Math.min(len, limit - pos); | ||
int read = in.read(b, off, (int)readLen); | ||
if (read < 0) { | ||
return -1; | ||
} | ||
pos += read; | ||
return read; | ||
} | ||
|
||
/** | ||
* Call the delegate's {@code skip(long)} method. | ||
* @param len the number of bytes to skip | ||
* @return the actual number of bytes skipped | ||
*/ | ||
@Override | ||
public long skip(final long len) throws IOException { | ||
long skipped = in.skip(Math.min(len, limit - pos)); | ||
pos += skipped; | ||
return skipped; | ||
} | ||
|
||
/** | ||
* Call the delegate's {@code available()} method. | ||
* @return the delegate's available bytes if the current position is less than the | ||
* limit, or 0 otherwise. | ||
*/ | ||
@Override | ||
public int available() throws IOException { | ||
if (pos >= limit) { | ||
return 0; | ||
} | ||
int available = in.available(); | ||
return (int) Math.min(available, limit - pos); | ||
} | ||
|
||
} |
54 changes: 54 additions & 0 deletions
54
hbase-common/src/main/java/org/apache/hadoop/hbase/io/DelegatingInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hadoop.hbase.io; | ||
|
||
import java.io.FilterInputStream; | ||
import java.io.InputStream; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
/** | ||
* An input stream that delegates all operations to another input stream. | ||
* The delegate can be switched out for another at any time but to minimize the | ||
* possibility of violating the InputStream contract it would be best to replace | ||
* the delegate only once it has been fully consumed. <p> For example, a | ||
* ByteArrayInputStream, which is implicitly bounded by the size of the underlying | ||
* byte array can be converted into an unbounded stream fed by multiple instances | ||
* of ByteArrayInputStream, switched out one for the other in sequence. | ||
* <p> | ||
* Although multithreaded access is allowed, users of this class will want to take | ||
* care to order operations on this stream and the swap out of one delegate for | ||
* another in a way that provides a valid view of stream contents. | ||
*/ | ||
@InterfaceAudience.Private | ||
public class DelegatingInputStream extends FilterInputStream { | ||
|
||
public DelegatingInputStream(InputStream in) { | ||
super(in); | ||
} | ||
|
||
public InputStream getDelegate() { | ||
return this.in; | ||
} | ||
|
||
public void setDelegate(InputStream in) { | ||
this.in = in; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ | |
import org.apache.hadoop.hbase.HBaseInterfaceAudience; | ||
import org.apache.hadoop.hbase.HConstants; | ||
import org.apache.hadoop.hbase.codec.Codec; | ||
import org.apache.hadoop.hbase.io.compress.Compression; | ||
import org.apache.hadoop.hbase.io.crypto.Cipher; | ||
import org.apache.hadoop.hbase.io.crypto.Encryption; | ||
import org.apache.hadoop.hbase.io.crypto.Encryptor; | ||
|
@@ -144,9 +145,22 @@ private boolean initializeCompressionContext(Configuration conf, Path path) thro | |
boolean doCompress = conf.getBoolean(HConstants.ENABLE_WAL_COMPRESSION, false); | ||
if (doCompress) { | ||
try { | ||
final boolean useTagCompression = | ||
conf.getBoolean(CompressionContext.ENABLE_WAL_TAGS_COMPRESSION, true); | ||
final boolean useValueCompression = | ||
conf.getBoolean(CompressionContext.ENABLE_WAL_VALUE_COMPRESSION, false); | ||
final Compression.Algorithm valueCompressionType = | ||
useValueCompression ? CompressionContext.getValueCompressionAlgorithm(conf) : | ||
Compression.Algorithm.NONE; | ||
if (LOG.isTraceEnabled()) { | ||
LOG.trace("Initializing compression context for {}: isRecoveredEdits={}" + | ||
", hasTagCompression={}, hasValueCompression={}, valueCompressionType={}", path, | ||
CommonFSUtils.isRecoveredEdits(path), useTagCompression, useValueCompression, | ||
valueCompressionType); | ||
} | ||
this.compressionContext = | ||
new CompressionContext(LRUDictionary.class, CommonFSUtils.isRecoveredEdits(path), | ||
conf.getBoolean(CompressionContext.ENABLE_WAL_TAGS_COMPRESSION, true)); | ||
useTagCompression, useValueCompression, valueCompressionType); | ||
} catch (Exception e) { | ||
throw new IOException("Failed to initiate CompressionContext", e); | ||
} | ||
|
@@ -165,17 +179,29 @@ public void init(FileSystem fs, Path path, Configuration conf, boolean overwrita | |
|
||
initOutput(fs, path, overwritable, bufferSize, replication, blocksize); | ||
|
||
boolean doTagCompress = doCompress | ||
&& conf.getBoolean(CompressionContext.ENABLE_WAL_TAGS_COMPRESSION, true); | ||
length.set(writeMagicAndWALHeader(ProtobufLogReader.PB_WAL_MAGIC, buildWALHeader(conf, | ||
WALHeader.newBuilder().setHasCompression(doCompress).setHasTagCompression(doTagCompress)))); | ||
boolean doTagCompress = doCompress && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Duplication of config parsing between |
||
conf.getBoolean(CompressionContext.ENABLE_WAL_TAGS_COMPRESSION, true); | ||
boolean doValueCompress = doCompress && | ||
conf.getBoolean(CompressionContext.ENABLE_WAL_VALUE_COMPRESSION, false); | ||
WALHeader.Builder headerBuilder = WALHeader.newBuilder() | ||
.setHasCompression(doCompress) | ||
.setHasTagCompression(doTagCompress) | ||
.setHasValueCompression(doValueCompress); | ||
if (doValueCompress) { | ||
headerBuilder.setValueCompressionAlgorithm( | ||
CompressionContext.getValueCompressionAlgorithm(conf).ordinal()); | ||
} | ||
length.set(writeMagicAndWALHeader(ProtobufLogReader.PB_WAL_MAGIC, | ||
buildWALHeader(conf, headerBuilder))); | ||
|
||
initAfterHeader(doCompress); | ||
|
||
// instantiate trailer to default value. | ||
trailer = WALTrailer.newBuilder().build(); | ||
|
||
if (LOG.isTraceEnabled()) { | ||
LOG.trace("Initialized protobuf WAL=" + path + ", compression=" + doCompress); | ||
LOG.trace("Initialized protobuf WAL={}, compression={}, tagCompression={}" + | ||
", valueCompression={}", path, doCompress, doTagCompress, doValueCompress); | ||
} | ||
} | ||
|
||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can merge this and DelegatingInputStream? Don't think we need both of them separately.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought we could keep them both.