forked from awslabs/analytics-accelerator-s3
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement very first version of seekable stream (awslabs#15)
Implement very first version of seekable stream In this commit we are adding the first version of seekable stream that calls the object client under the hood. For now, seeking is extremely trivial and reading data amounts to proxying down read calls to the GetObject stream. The testing part is more important. On top of unit tests we set up the ability to test against a mocked version of S3. In the test scope we also add an in-memory stream backed by a byte array. Reading out of this stream and seeking in this stream is very easy to implement correctly and this can provide a good base for ensuring correctness in the future. We also add Checkstyle to enforce Javadoc comments.
- Loading branch information
Showing
16 changed files
with
573 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ plugins { | |
|
||
// Formatting | ||
id("com.diffplug.spotless") | ||
checkstyle | ||
} | ||
|
||
jacoco { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<!DOCTYPE module PUBLIC | ||
"-//Puppy Crawl//DTD Check Configuration 1.3//EN" | ||
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd"> | ||
|
||
<module name="Checker"> | ||
<module name="SuppressionFilter"> | ||
<property name="file" value="${config_loc}/suppression.xml"/> | ||
</module> | ||
|
||
<module name="TreeWalker"> | ||
<module name="MissingJavadocMethod"> | ||
<property name="scope" value="public"/> | ||
<property name="allowedAnnotations" value="Override,BeforeAll,Before,Test"/> | ||
</module> | ||
<module name="MissingJavadocPackage"/> | ||
<module name="MissingJavadocType"> | ||
<property name="scope" value="public"/> | ||
</module> | ||
</module> | ||
</module> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<?xml version="1.0"?> | ||
|
||
<!DOCTYPE suppressions PUBLIC | ||
"-//Checkstyle//DTD SuppressionFilter Configuration 1.0//EN" | ||
"https://checkstyle.org/dtds/suppressions_1_0.dtd"> | ||
|
||
<suppressions> | ||
<suppress checks="MissingJavadocType" files=".*/src/test/java/.*"/> | ||
</suppressions> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 0 additions & 18 deletions
18
input-stream/src/main/java/com/amazon/connector/s3/InputStream.java
This file was deleted.
Oops, something went wrong.
88 changes: 88 additions & 0 deletions
88
input-stream/src/main/java/com/amazon/connector/s3/S3SeekableInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
package com.amazon.connector.s3; | ||
|
||
import com.amazon.connector.s3.util.S3URI; | ||
import com.google.common.base.Preconditions; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.Objects; | ||
import software.amazon.awssdk.services.s3.model.GetObjectRequest; | ||
|
||
/** | ||
* High throughput seekable stream used to read data from Amazon S3. | ||
* | ||
* <p>Don't share between threads. The current implementation is not thread safe in that calling | ||
* {@link #seek(long) seek} will modify the position of the stream and the behaviour of calling | ||
* {@link #seek(long) seek} and {@link #read() read} concurrently from two different threads is | ||
* undefined. | ||
*/ | ||
public class S3SeekableInputStream extends SeekableInputStream { | ||
private final ObjectClient objectClient; | ||
private final S3URI uri; | ||
|
||
private long position; | ||
private InputStream stream; | ||
|
||
/** | ||
* Creates a new instance of {@link S3SeekableInputStream}. | ||
* | ||
* @param objectClient an instance of {@link ObjectClient}. | ||
* @param uri location of the S3 object this stream is fetching data from | ||
*/ | ||
public S3SeekableInputStream(ObjectClient objectClient, S3URI uri) throws IOException { | ||
Preconditions.checkNotNull(objectClient, "objectClient must not be null"); | ||
Preconditions.checkNotNull(uri, "S3 URI must not be null"); | ||
|
||
this.objectClient = objectClient; | ||
this.uri = uri; | ||
|
||
this.position = 0; | ||
requestBytes(position); | ||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
int byteRead = stream.read(); | ||
|
||
if (byteRead < 0) { | ||
return -1; | ||
} | ||
|
||
this.position++; | ||
return byteRead; | ||
} | ||
|
||
@Override | ||
public void seek(long pos) throws IOException { | ||
try { | ||
requestBytes(pos); | ||
this.position = pos; | ||
} catch (Exception e) { | ||
throw new IOException(String.format("Unable to seek to position %s", pos)); | ||
} | ||
} | ||
|
||
@Override | ||
public long getPos() { | ||
return this.position; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
super.close(); | ||
this.stream.close(); | ||
} | ||
|
||
private void requestBytes(long pos) throws IOException { | ||
if (Objects.nonNull(this.stream)) { | ||
this.stream.close(); | ||
} | ||
|
||
this.stream = | ||
this.objectClient.getObject( | ||
GetObjectRequest.builder() | ||
.bucket(uri.getBucket()) | ||
.key(uri.getKey()) | ||
.range(String.format("bytes=%s-", pos)) | ||
.build()); | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
input-stream/src/main/java/com/amazon/connector/s3/SeekableInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package com.amazon.connector.s3; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
||
/** | ||
* A SeekableInputStream is like a conventional InputStream but equipped with two additional | ||
* operations: {@link #seek(long) seek} and {@link #getPos() getPos}. Typically, seekable streams | ||
* are used for random data access (i.e, data access that is not strictly sequential or requires | ||
* backwards seeks). | ||
* | ||
* <p>Implementations should implement {@link #close() close} to release resources. | ||
*/ | ||
public abstract class SeekableInputStream extends InputStream { | ||
|
||
/** | ||
* Seeks (jumps) to a position inside the stream. | ||
* | ||
* @param pos The position to jump to in the stream given in bytes (zero-indexed). | ||
* @throws IOException | ||
*/ | ||
public abstract void seek(long pos) throws IOException; | ||
|
||
/** | ||
* Returns the current position in the stream. | ||
* | ||
* @return the position in the stream | ||
*/ | ||
public abstract long getPos(); | ||
} |
25 changes: 25 additions & 0 deletions
25
input-stream/src/main/java/com/amazon/connector/s3/util/S3URI.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package com.amazon.connector.s3.util; | ||
|
||
import com.google.common.base.Preconditions; | ||
import lombok.Data; | ||
|
||
/** Container for representing an 's3://' or 's3a://'-style S3 location. */ | ||
@Data | ||
public class S3URI { | ||
|
||
private final String bucket; | ||
private final String key; | ||
|
||
private S3URI(String bucket, String key) { | ||
this.bucket = bucket; | ||
this.key = key; | ||
} | ||
|
||
/** Given a bucket and a key, creates an S3URI object. */ | ||
public static S3URI of(String bucket, String key) { | ||
Preconditions.checkNotNull(bucket, "bucket must be non-null"); | ||
Preconditions.checkNotNull(key, "key must be non-null"); | ||
|
||
return new S3URI(bucket, key); | ||
} | ||
} |
25 changes: 0 additions & 25 deletions
25
input-stream/src/test/java/com/amazon/connector/s3/InputStreamTest.java
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.