-
Notifications
You must be signed in to change notification settings - Fork 3.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HBASE-25869 WAL value compression (#3244)
WAL storage can be expensive, especially if the cell values represented in the edits are large, consisting of blobs or significant lengths of text. Such WALs might need to be kept around for a fairly long time to satisfy replication constraints on a space limited (or space-contended) filesystem. We have a custom dictionary compression scheme for cell metadata that is engaged when WAL compression is enabled in site configuration. This is fine for that application, where we can expect the universe of values and their lengths in the custom dictionaries to be constrained. For arbitrary cell values it is better to use one of the available compression codecs, which are suitable for arbitrary albeit compressible data. Signed-off-by: Bharath Vissapragada <bharathv@apache.org> Signed-off-by: Duo Zhang <zhangduo@apache.org> Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
- Loading branch information
Showing
14 changed files
with
847 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
111 changes: 111 additions & 0 deletions
111
hbase-common/src/main/java/org/apache/hadoop/hbase/io/BoundedDelegatingInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hadoop.hbase.io; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
/** | ||
* This is a stream that will only supply bytes from its delegate up to a certain limit. | ||
* When there is an attempt to set the position beyond that it will signal that the input | ||
* is finished. | ||
*/ | ||
@InterfaceAudience.Private | ||
public class BoundedDelegatingInputStream extends DelegatingInputStream { | ||
|
||
protected long limit; | ||
protected long pos; | ||
|
||
public BoundedDelegatingInputStream(InputStream in, long limit) { | ||
super(in); | ||
this.limit = limit; | ||
this.pos = 0; | ||
} | ||
|
||
public void setDelegate(InputStream in, long limit) { | ||
this.in = in; | ||
this.limit = limit; | ||
this.pos = 0; | ||
} | ||
|
||
/** | ||
* Call the delegate's {@code read()} method if the current position is less than the limit. | ||
* @return the byte read or -1 if the end of stream or the limit has been reached. | ||
*/ | ||
@Override | ||
public int read() throws IOException { | ||
if (pos >= limit) { | ||
return -1; | ||
} | ||
int result = in.read(); | ||
pos++; | ||
return result; | ||
} | ||
|
||
/** | ||
* Call the delegate's {@code read(byte[], int, int)} method if the current position is less | ||
* than the limit. | ||
* @param b read buffer | ||
* @param off Start offset | ||
* @param len The number of bytes to read | ||
* @return the number of bytes read or -1 if the end of stream or the limit has been reached. | ||
*/ | ||
@Override | ||
public int read(final byte[] b, final int off, final int len) throws IOException { | ||
if (pos >= limit) { | ||
return -1; | ||
} | ||
long readLen = Math.min(len, limit - pos); | ||
int read = in.read(b, off, (int)readLen); | ||
if (read < 0) { | ||
return -1; | ||
} | ||
pos += read; | ||
return read; | ||
} | ||
|
||
/** | ||
* Call the delegate's {@code skip(long)} method. | ||
* @param len the number of bytes to skip | ||
* @return the actual number of bytes skipped | ||
*/ | ||
@Override | ||
public long skip(final long len) throws IOException { | ||
long skipped = in.skip(Math.min(len, limit - pos)); | ||
pos += skipped; | ||
return skipped; | ||
} | ||
|
||
/** | ||
* Call the delegate's {@code available()} method. | ||
* @return the delegate's available bytes if the current position is less than the | ||
* limit, or 0 otherwise. | ||
*/ | ||
@Override | ||
public int available() throws IOException { | ||
if (pos >= limit) { | ||
return 0; | ||
} | ||
int available = in.available(); | ||
return (int) Math.min(available, limit - pos); | ||
} | ||
|
||
} |
54 changes: 54 additions & 0 deletions
54
hbase-common/src/main/java/org/apache/hadoop/hbase/io/DelegatingInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hadoop.hbase.io; | ||
|
||
import java.io.FilterInputStream; | ||
import java.io.InputStream; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
/** | ||
* An input stream that delegates all operations to another input stream. | ||
* The delegate can be switched out for another at any time but to minimize the | ||
* possibility of violating the InputStream contract it would be best to replace | ||
* the delegate only once it has been fully consumed. <p> For example, a | ||
* ByteArrayInputStream, which is implicitly bounded by the size of the underlying | ||
* byte array can be converted into an unbounded stream fed by multiple instances | ||
* of ByteArrayInputStream, switched out one for the other in sequence. | ||
* <p> | ||
* Although multithreaded access is allowed, users of this class will want to take | ||
* care to order operations on this stream and the swap out of one delegate for | ||
* another in a way that provides a valid view of stream contents. | ||
*/ | ||
@InterfaceAudience.Private | ||
public class DelegatingInputStream extends FilterInputStream { | ||
|
||
public DelegatingInputStream(InputStream in) { | ||
super(in); | ||
} | ||
|
||
public InputStream getDelegate() { | ||
return this.in; | ||
} | ||
|
||
public void setDelegate(InputStream in) { | ||
this.in = in; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.