Skip to content

Commit d59b32a

Browse files
JohnPJenkinsjulienledem
authored andcommitted
PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder
https://issues.apache.org/jira/browse/PARQUET-852 Author: John Jenkins <jjenkins@kcg.com> Closes #4022 from JohnPJenkins/PARQUET-852 and squashes the following commits: 334acec [John Jenkins] PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder
1 parent 4297134 commit d59b32a

File tree

2 files changed

+34
-14
lines changed

2 files changed

+34
-14
lines changed

parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
/*
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
55
* regarding copyright ownership. The ASF licenses this file
66
* to you under the Apache License, Version 2.0 (the
77
* "License"); you may not use this file except in compliance
88
* with the License. You may obtain a copy of the License at
9-
*
9+
*
1010
* http://www.apache.org/licenses/LICENSE-2.0
11-
*
11+
*
1212
* Unless required by applicable law or agreed to in writing,
1313
* software distributed under the License is distributed on an
1414
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -39,11 +39,14 @@ public class ByteBasedBitPackingEncoder {
3939
private static final Logger LOG = LoggerFactory.getLogger(ByteBasedBitPackingEncoder.class);
4040

4141
private static final int VALUES_WRITTEN_AT_A_TIME = 8;
42+
private static final int MAX_SLAB_SIZE_MULT = 64 * 1024;
43+
private static final int INITIAL_SLAB_SIZE_MULT = 1024;
4244

4345
private final int bitWidth;
4446
private final BytePacker packer;
4547
private final int[] input = new int[VALUES_WRITTEN_AT_A_TIME];
46-
private final int slabSize;
48+
private int slabSize;
49+
private long totalFullSlabSize;
4750
private int inputSize;
4851
private byte[] packed;
4952
private int packedPosition;
@@ -56,8 +59,9 @@ public class ByteBasedBitPackingEncoder {
5659
public ByteBasedBitPackingEncoder(int bitWidth, Packer packer) {
5760
this.bitWidth = bitWidth;
5861
this.inputSize = 0;
62+
this.totalFullSlabSize = 0;
5963
// must be a multiple of bitWidth
60-
this.slabSize = bitWidth * 64 * 1024;
64+
this.slabSize = (bitWidth == 0) ? 1 : (bitWidth * INITIAL_SLAB_SIZE_MULT);
6165
initPackedSlab();
6266
this.packer = packer.newBytePacker(bitWidth);
6367
}
@@ -75,6 +79,10 @@ public void writeInt(int value) throws IOException {
7579
pack();
7680
if (packedPosition == slabSize) {
7781
slabs.add(BytesInput.from(packed));
82+
totalFullSlabSize += slabSize;
83+
if (slabSize < bitWidth * MAX_SLAB_SIZE_MULT) {
84+
slabSize *= 2;
85+
}
7886
initPackedSlab();
7987
}
8088
}
@@ -99,7 +107,7 @@ private void initPackedSlab() {
99107
public BytesInput toBytes() throws IOException {
100108
int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);
101109

102-
LOG.debug("writing {} bytes", (slabs.size() * slabSize + packedByteLength));
110+
LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
103111
if (inputSize > 0) {
104112
for (int i = inputSize; i < input.length; i++) {
105113
input[i] = 0;
@@ -113,18 +121,24 @@ public BytesInput toBytes() throws IOException {
113121
* @return size of the data as it would be written
114122
*/
115123
public long getBufferSize() {
116-
return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth);
124+
return BytesUtils.paddedByteCountFromBits((totalValues + inputSize) * bitWidth);
117125
}
118126

119127
/**
120128
* @return total memory allocated
121129
*/
122130
public long getAllocatedSize() {
123-
return (slabs.size() * slabSize) + packed.length + input.length * 4;
131+
return totalFullSlabSize + packed.length + input.length * 4;
124132
}
125133

126134
public String memUsageString(String prefix) {
127135
return String.format("%s ByteBitPacking %d slabs, %d bytes", prefix, slabs.size(), getAllocatedSize());
128136
}
129137

138+
/**
139+
* @return number of full slabs along with the current slab (debug aid)
140+
*/
141+
int getNumSlabs() {
142+
return slabs.size() + 1;
143+
}
130144
}

parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
/*
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
55
* regarding copyright ownership. The ASF licenses this file
66
* to you under the Apache License, Version 2.0 (the
77
* "License"); you may not use this file except in compliance
88
* with the License. You may obtain a copy of the License at
9-
*
9+
*
1010
* http://www.apache.org/licenses/LICENSE-2.0
11-
*
11+
*
1212
* Unless required by applicable law or agreed to in writing,
1313
* software distributed under the License is distributed on an
1414
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,22 +18,28 @@
1818
*/
1919
package org.apache.parquet.column.values.bitpacking;
2020

21+
import org.apache.parquet.bytes.BytesUtils;
2122
import org.junit.Test;
2223

24+
import static org.junit.Assert.assertEquals;
25+
2326
public class TestByteBasedBitPackingEncoder {
2427

2528
@Test
2629
public void testSlabBoundary() {
27-
for (int i = 0; i < 32; i++) {
30+
for (int i = 0; i <= 32; i++) {
2831
final ByteBasedBitPackingEncoder encoder = new ByteBasedBitPackingEncoder(i, Packer.BIG_ENDIAN);
29-
// make sure to write more than a slab
30-
for (int j = 0; j < 64 * 1024 * 32 + 10; j++) {
32+
// make sure to write through the progression of slabs
33+
final int totalValues = 191 * 1024 * 8 + 10;
34+
for (int j = 0; j < totalValues; j++) {
3135
try {
3236
encoder.writeInt(j);
3337
} catch (Exception e) {
3438
throw new RuntimeException(i + ": error writing " + j, e);
3539
}
3640
}
41+
assertEquals(BytesUtils.paddedByteCountFromBits(totalValues * i), encoder.getBufferSize());
42+
assertEquals(i == 0 ? 1 : 9, encoder.getNumSlabs());
3743
}
3844
}
3945

0 commit comments

Comments
 (0)