Skip to content

Commit 87b63f4

Browse files
authoredApr 24, 2024··
fix: update GzipReadableByteChannel to be tolerant of one byte reads (#2512)
If the ReadChannel chunkSize is 0 there will be no library buffering performed during read calls. If gzip decompression support is enabled on the read channel, there is a possibility someone could read fewer than the 4 bytes we initially read. This change updates the byte tracking to ensure single byte reads are supported.
1 parent 7055cfc commit 87b63f4

File tree

2 files changed

+84
-9
lines changed

2 files changed

+84
-9
lines changed
 

‎google-cloud-storage/src/main/java/com/google/cloud/storage/GzipReadableByteChannel.java

+18-9
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ final class GzipReadableByteChannel implements UnbufferedReadableByteChannel {
3636

3737
private boolean retEOF = false;
3838
private ScatteringByteChannel delegate;
39+
private ByteBuffer leftovers;
3940

4041
GzipReadableByteChannel(UnbufferedReadableByteChannel source, ApiFuture<String> contentEncoding) {
4142
this.source = source;
@@ -51,11 +52,11 @@ public long read(ByteBuffer[] dsts, int offset, int length) throws IOException {
5152
// if our delegate is null, that means this is the first read attempt
5253
if (delegate == null) {
5354
// try to determine if the underlying data coming out of `source` is gzip
54-
byte[] first4 = new byte[4]; // 4 bytes = 32-bits
55-
final ByteBuffer wrap = ByteBuffer.wrap(first4);
56-
// Step 1: initiate a read of the first 4 bytes of the object
55+
byte[] firstByte = new byte[1];
56+
ByteBuffer wrap = ByteBuffer.wrap(firstByte);
57+
// Step 1: initiate a read of the first byte of the object
5758
// this will have minimal overhead as the messages coming from gcs are inherently windowed
58-
// if the object size is between 5 and 2MiB the remaining bytes will be held in the channel
59+
// if the object size is between 2 and 2MiB the remaining bytes will be held in the channel
5960
// for later read.
6061
source.read(wrap);
6162
try {
@@ -65,13 +66,13 @@ public long read(ByteBuffer[] dsts, int offset, int length) throws IOException {
6566
// this will have a copy impact as we are no longer controlling all the buffers
6667
if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
6768
// to wire gzip decompression into the byte path:
68-
// Create an input stream of the first4 bytes we already read
69-
ByteArrayInputStream first4again = new ByteArrayInputStream(first4);
69+
// Create an input stream of the firstByte bytes we already read
70+
ByteArrayInputStream firstByteAgain = new ByteArrayInputStream(firstByte);
7071
// Create an InputStream facade of source
7172
InputStream sourceInputStream = Channels.newInputStream(source);
72-
// create a new InputStream with the first4 bytes prepended to source
73+
// create a new InputStream with the firstByte bytes prepended to source
7374
SequenceInputStream first4AndSource =
74-
new SequenceInputStream(first4again, sourceInputStream);
75+
new SequenceInputStream(firstByteAgain, sourceInputStream);
7576
// add gzip decompression
7677
GZIPInputStream decompress =
7778
new GZIPInputStream(new OptimisticAvailabilityInputStream(first4AndSource));
@@ -84,14 +85,22 @@ public long read(ByteBuffer[] dsts, int offset, int length) throws IOException {
8485
// to source
8586
wrap.flip();
8687
bytesRead += Buffers.copy(wrap, dsts, offset, length);
88+
if (wrap.hasRemaining()) {
89+
leftovers = wrap;
90+
}
8791
delegate = source;
8892
}
8993
} catch (InterruptedException | ExecutionException e) {
9094
throw new IOException(e);
9195
}
96+
} else if (leftovers != null && leftovers.hasRemaining()) {
97+
bytesRead += Buffers.copy(leftovers, dsts, offset, length);
98+
if (!leftovers.hasRemaining()) {
99+
leftovers = null;
100+
}
92101
}
93102

94-
// Because we're pre-reading a few bytes of the object in order to determine if we need to
103+
// Because we're pre-reading a byte of the object in order to determine if we need to
95104
// plumb in gzip decompress, there is the possibility we will reach EOF while probing.
96105
// In order to maintain correctness of EOF propagation, determine if we will need to signal EOF
97106
// upon the next read.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.storage;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
21+
import com.google.cloud.ReadChannel;
22+
import com.google.cloud.storage.Storage.BlobSourceOption;
23+
import com.google.cloud.storage.TransportCompatibility.Transport;
24+
import com.google.cloud.storage.it.runner.StorageITRunner;
25+
import com.google.cloud.storage.it.runner.annotations.Backend;
26+
import com.google.cloud.storage.it.runner.annotations.CrossRun;
27+
import com.google.cloud.storage.it.runner.annotations.Inject;
28+
import com.google.cloud.storage.it.runner.registry.ObjectsFixture;
29+
import com.google.cloud.storage.it.runner.registry.ObjectsFixture.ObjectAndContent;
30+
import java.io.IOException;
31+
import java.nio.ByteBuffer;
32+
import org.junit.Test;
33+
import org.junit.runner.RunWith;
34+
35+
@RunWith(StorageITRunner.class)
36+
@CrossRun(
37+
backends = {Backend.PROD},
38+
transports = {Transport.HTTP, Transport.GRPC})
39+
public final class ITReadChannelGzipHandlingTest {
40+
41+
@Inject public Storage storage;
42+
@Inject public ObjectsFixture objFixture;
43+
44+
@Test
45+
public void nonGzipObjectReadOneByteAtATimeNoLibraryBuffering() throws IOException {
46+
ObjectAndContent obj512KiB = objFixture.getObj512KiB();
47+
BlobInfo info = obj512KiB.getInfo();
48+
BlobId blobId = info.getBlobId();
49+
byte[] bytes = new byte[1];
50+
BlobSourceOption attemptGzipDecompression = BlobSourceOption.shouldReturnRawInputStream(false);
51+
try (ReadChannel reader = storage.reader(blobId, attemptGzipDecompression)) {
52+
reader.setChunkSize(0);
53+
54+
// read zero bytes, to trigger things to startup but don't actually pull out any bytes yes
55+
reader.read(ByteBuffer.allocate(0));
56+
57+
byte[] content = obj512KiB.getContent().getBytes();
58+
for (int i = 0; i < info.getSize(); i++) {
59+
int read = reader.read(ByteBuffer.wrap(bytes));
60+
assertThat(read).isEqualTo(1);
61+
byte b = bytes[0];
62+
assertThat(b).isEqualTo(content[i]);
63+
}
64+
}
65+
}
66+
}

0 commit comments

Comments
 (0)
Please sign in to comment.