Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add retained size to compressor #234

Merged
merged 2 commits into from
Sep 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/main/java/io/airlift/compress/v3/Compressor.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,9 @@ public interface Compressor
* @return number of bytes written to the output
*/
int compress(MemorySegment input, MemorySegment output);

default int getRetainedSizeInBytes(int inputLength)
{
return 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ public int compress(MemorySegment input, MemorySegment output)
}
}

@Override
public int getRetainedSizeInBytes(int inputLength)
{
return Lz4RawCompressor.computeTableSize(inputLength);
}

private static void verifyRange(byte[] data, int offset, int length)
{
requireNonNull(data, "data is null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static io.airlift.compress.v3.lz4.Lz4Constants.SIZE_OF_LONG;
import static io.airlift.compress.v3.lz4.Lz4Constants.SIZE_OF_SHORT;
import static io.airlift.compress.v3.lz4.UnsafeUtil.UNSAFE;
import static java.lang.Math.clamp;

final class Lz4RawCompressor
{
Expand Down Expand Up @@ -300,12 +301,12 @@ private static long encodeRunLength(
return output;
}

private static int computeTableSize(int inputSize)
static int computeTableSize(int inputSize)
{
// smallest power of 2 larger than inputSize
int target = Integer.highestOneBit(inputSize - 1) << 1;

// keep it between MIN_TABLE_SIZE and MAX_TABLE_SIZE
return Math.max(Math.min(target, MAX_TABLE_SIZE), MIN_TABLE_SIZE);
return clamp(target, MIN_TABLE_SIZE, MAX_TABLE_SIZE);
}
}
6 changes: 6 additions & 0 deletions src/main/java/io/airlift/compress/v3/lzo/LzoCompressor.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ public int compress(MemorySegment input, MemorySegment output)
}
}

@Override
public int getRetainedSizeInBytes(int inputLength)
{
return MAX_TABLE_SIZE;
}

private static void verifyRange(byte[] data, int offset, int length)
{
requireNonNull(data, "data is null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static io.airlift.compress.v3.lzo.LzoConstants.SIZE_OF_LONG;
import static io.airlift.compress.v3.lzo.LzoConstants.SIZE_OF_SHORT;
import static io.airlift.compress.v3.lzo.UnsafeUtil.UNSAFE;
import static java.lang.Math.clamp;

final class LzoRawCompressor
{
Expand Down Expand Up @@ -384,6 +385,6 @@ private static int computeTableSize(int inputSize)
int target = Integer.highestOneBit(inputSize - 1) << 1;

// keep it between MIN_TABLE_SIZE and MAX_TABLE_SIZE
return Math.max(Math.min(target, MAX_TABLE_SIZE), MIN_TABLE_SIZE);
return clamp(target, MIN_TABLE_SIZE, MAX_TABLE_SIZE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ public int compress(MemorySegment input, MemorySegment output)
}
}

@Override
public int getRetainedSizeInBytes(int inputLength)
{
return SnappyRawCompressor.getHashTableSize(inputLength);
}

private static void verifyRange(byte[] data, int offset, int length)
{
requireNonNull(data, "data is null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static io.airlift.compress.v3.snappy.SnappyConstants.SIZE_OF_LONG;
import static io.airlift.compress.v3.snappy.SnappyConstants.SIZE_OF_SHORT;
import static io.airlift.compress.v3.snappy.UnsafeUtil.UNSAFE;
import static java.lang.Math.clamp;

final class SnappyRawCompressor
{
Expand Down Expand Up @@ -344,7 +345,7 @@ private static long emitCopy(Object outputBase, long output, long input, long ma
}

@SuppressWarnings("IllegalToken")
private static int getHashTableSize(int inputSize)
static int getHashTableSize(int inputSize)
{
// Use smaller hash table when input.size() is smaller, since we
// fill the table, incurring O(hash table size) overhead for
Expand All @@ -356,7 +357,7 @@ private static int getHashTableSize(int inputSize)
int target = Integer.highestOneBit(inputSize - 1) << 1;

// keep it between MIN_TABLE_SIZE and MAX_TABLE_SIZE
return Math.max(Math.min(target, MAX_HASH_TABLE_SIZE), 256);
return clamp(target, 256, MAX_HASH_TABLE_SIZE);
}

// Any hash function will produce a valid compressed stream, but a good
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import static io.airlift.compress.v3.zstd.Constants.MAX_BLOCK_SIZE;
import static io.airlift.compress.v3.zstd.Util.checkArgument;
import static java.lang.Math.clamp;

class CompressionContext
{
Expand All @@ -31,7 +32,7 @@ public CompressionContext(CompressionParameters parameters, long baseAddress, in
{
this.parameters = parameters;

int windowSize = Math.max(1, Math.min(parameters.getWindowSize(), inputSize));
int windowSize = clamp(inputSize, 1, parameters.getWindowSize());
int blockSize = Math.min(MAX_BLOCK_SIZE, windowSize);
int divider = (parameters.getSearchLength() == 3) ? 3 : 4;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import static io.airlift.compress.v3.zstd.Constants.MIN_WINDOW_LOG;
import static io.airlift.compress.v3.zstd.Util.cycleLog;
import static io.airlift.compress.v3.zstd.Util.highestBit;
import static java.lang.Math.clamp;

class CompressionParameters
{
Expand Down Expand Up @@ -316,7 +317,7 @@ else if (estimatedInputSize <= 256 * 1024) {
int row = DEFAULT_COMPRESSION_LEVEL;

if (compressionLevel != 0) { // TODO: figure out better way to indicate default compression level
row = Math.min(Math.max(0, compressionLevel), MAX_COMPRESSION_LEVEL);
row = clamp(compressionLevel, 0, MAX_COMPRESSION_LEVEL);
}

return DEFAULT_COMPRESSION_PARAMETERS[table][row];
Expand Down