Skip to content

Commit 3d9fc1d

Browse files
authored
[AWS] S3FileIO - Add Cross-Region Bucket Access (#11259)
1 parent 6a5ae1a commit 3d9fc1d

File tree

4 files changed

+76
-2
lines changed

4 files changed

+76
-2
lines changed

aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,29 @@ public void testNewInputStreamWithAccessPoint() throws Exception {
182182
validateRead(s3FileIO);
183183
}
184184

185+
@Test
186+
public void testCrossRegionAccessEnabled() throws Exception {
187+
clientFactory.initialize(
188+
ImmutableMap.of(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true"));
189+
S3Client s3Client = clientFactory.s3();
190+
String crossBucketObjectKey = String.format("%s/%s", prefix, UUID.randomUUID());
191+
String crossBucketObjectUri =
192+
String.format("s3://%s/%s", crossRegionBucketName, crossBucketObjectKey);
193+
try {
194+
s3Client.putObject(
195+
PutObjectRequest.builder()
196+
.bucket(crossRegionBucketName)
197+
.key(crossBucketObjectKey)
198+
.build(),
199+
RequestBody.fromBytes(contentBytes));
200+
// make a copy in cross-region bucket
201+
S3FileIO s3FileIO = new S3FileIO(clientFactory::s3);
202+
validateRead(s3FileIO, crossBucketObjectUri);
203+
} finally {
204+
AwsIntegTestUtil.cleanS3Bucket(s3Client, crossRegionBucketName, crossBucketObjectKey);
205+
}
206+
}
207+
185208
@Test
186209
public void testNewInputStreamWithCrossRegionAccessPoint() throws Exception {
187210
clientFactory.initialize(ImmutableMap.of(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true"));
@@ -550,7 +573,11 @@ private void write(S3FileIO s3FileIO, String uri) throws Exception {
550573
}
551574

552575
private void validateRead(S3FileIO s3FileIO) throws Exception {
553-
InputFile file = s3FileIO.newInputFile(objectUri);
576+
validateRead(s3FileIO, objectUri);
577+
}
578+
579+
private void validateRead(S3FileIO s3FileIO, String s3Uri) throws Exception {
580+
InputFile file = s3FileIO.newInputFile(s3Uri);
554581
assertThat(file.getLength()).isEqualTo(contentBytes.length);
555582
try (InputStream stream = file.newStream()) {
556583
String result = IoUtils.toUtf8String(stream);

aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,16 @@ public class S3FileIOProperties implements Serializable {
376376

377377
public static final boolean DUALSTACK_ENABLED_DEFAULT = false;
378378

379+
/**
380+
* Determines if S3 client will allow Cross-Region bucket access, default to false.
381+
*
382+
* <p>For more details, see
383+
* https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html
384+
*/
385+
public static final String CROSS_REGION_ACCESS_ENABLED = "s3.cross-region-access-enabled";
386+
387+
public static final boolean CROSS_REGION_ACCESS_ENABLED_DEFAULT = false;
388+
379389
/**
380390
* Used by {@link S3FileIO}, prefix used for bucket access point configuration. To set, we can
381391
* pass a catalog property.
@@ -442,6 +452,7 @@ public class S3FileIOProperties implements Serializable {
442452
private final Map<String, String> bucketToAccessPointMapping;
443453
private boolean isPreloadClientEnabled;
444454
private final boolean isDualStackEnabled;
455+
private final boolean isCrossRegionAccessEnabled;
445456
private final boolean isPathStyleAccess;
446457
private final boolean isUseArnRegionEnabled;
447458
private final boolean isAccelerationEnabled;
@@ -477,6 +488,7 @@ public S3FileIOProperties() {
477488
this.bucketToAccessPointMapping = Collections.emptyMap();
478489
this.isPreloadClientEnabled = PRELOAD_CLIENT_ENABLED_DEFAULT;
479490
this.isDualStackEnabled = DUALSTACK_ENABLED_DEFAULT;
491+
this.isCrossRegionAccessEnabled = CROSS_REGION_ACCESS_ENABLED_DEFAULT;
480492
this.isPathStyleAccess = PATH_STYLE_ACCESS_DEFAULT;
481493
this.isUseArnRegionEnabled = USE_ARN_REGION_ENABLED_DEFAULT;
482494
this.isAccelerationEnabled = ACCELERATION_ENABLED_DEFAULT;
@@ -521,6 +533,9 @@ public S3FileIOProperties(Map<String, String> properties) {
521533
properties, ACCELERATION_ENABLED, ACCELERATION_ENABLED_DEFAULT);
522534
this.isDualStackEnabled =
523535
PropertyUtil.propertyAsBoolean(properties, DUALSTACK_ENABLED, DUALSTACK_ENABLED_DEFAULT);
536+
this.isCrossRegionAccessEnabled =
537+
PropertyUtil.propertyAsBoolean(
538+
properties, CROSS_REGION_ACCESS_ENABLED, CROSS_REGION_ACCESS_ENABLED_DEFAULT);
524539
try {
525540
this.multiPartSize =
526541
PropertyUtil.propertyAsInt(properties, MULTIPART_SIZE, MULTIPART_SIZE_DEFAULT);
@@ -680,6 +695,10 @@ public boolean isDualStackEnabled() {
680695
return this.isDualStackEnabled;
681696
}
682697

698+
public boolean isCrossRegionAccessEnabled() {
699+
return this.isCrossRegionAccessEnabled;
700+
}
701+
683702
public boolean isPathStyleAccess() {
684703
return this.isPathStyleAccess;
685704
}
@@ -832,7 +851,7 @@ public <T extends S3ClientBuilder> void applyCredentialConfigurations(
832851

833852
/**
834853
* Configure services settings for an S3 client. The settings include: s3DualStack,
835-
* s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
854+
* crossRegionAccessEnabled, s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
836855
*
837856
* <p>Sample usage:
838857
*
@@ -843,6 +862,7 @@ public <T extends S3ClientBuilder> void applyCredentialConfigurations(
843862
public <T extends S3ClientBuilder> void applyServiceConfigurations(T builder) {
844863
builder
845864
.dualstackEnabled(isDualStackEnabled)
865+
.crossRegionAccessEnabled(isCrossRegionAccessEnabled)
846866
.serviceConfiguration(
847867
S3Configuration.builder()
848868
.pathStyleAccessEnabled(isPathStyleAccess)

aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ public void testS3FileIOPropertiesDefaultValues() {
7272
assertThat(S3FileIOProperties.DUALSTACK_ENABLED_DEFAULT)
7373
.isEqualTo(s3FileIOProperties.isDualStackEnabled());
7474

75+
assertThat(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED_DEFAULT)
76+
.isEqualTo(s3FileIOProperties.isCrossRegionAccessEnabled());
77+
7578
assertThat(S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT)
7679
.isEqualTo(s3FileIOProperties.isPathStyleAccess());
7780

@@ -155,6 +158,11 @@ public void testS3FileIOProperties() {
155158
S3FileIOProperties.DUALSTACK_ENABLED,
156159
String.valueOf(s3FileIOProperties.isDualStackEnabled()));
157160

161+
assertThat(map)
162+
.containsEntry(
163+
S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED,
164+
String.valueOf(s3FileIOProperties.isCrossRegionAccessEnabled()));
165+
158166
assertThat(map)
159167
.containsEntry(
160168
S3FileIOProperties.PATH_STYLE_ACCESS,
@@ -382,6 +390,7 @@ private Map<String, String> getTestProperties() {
382390
map.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
383391
map.put(S3FileIOProperties.ACCELERATION_ENABLED, "true");
384392
map.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
393+
map.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
385394
map.put(
386395
S3FileIOProperties.MULTIPART_SIZE,
387396
String.valueOf(S3FileIOProperties.MULTIPART_SIZE_DEFAULT));
@@ -427,6 +436,7 @@ public void testApplyCredentialConfigurations() {
427436
public void testApplyS3ServiceConfigurations() {
428437
Map<String, String> properties = Maps.newHashMap();
429438
properties.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
439+
properties.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
430440
properties.put(S3FileIOProperties.PATH_STYLE_ACCESS, "true");
431441
properties.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
432442
// acceleration enabled has to be set to false if path style is true
@@ -438,6 +448,7 @@ public void testApplyS3ServiceConfigurations() {
438448
ArgumentCaptor.forClass(S3Configuration.class);
439449

440450
Mockito.doReturn(mockA).when(mockA).dualstackEnabled(Mockito.anyBoolean());
451+
Mockito.doReturn(mockA).when(mockA).crossRegionAccessEnabled(Mockito.anyBoolean());
441452
Mockito.doReturn(mockA).when(mockA).serviceConfiguration(Mockito.any(S3Configuration.class));
442453

443454
s3FileIOProperties.applyServiceConfigurations(mockA);

docs/docs/aws.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,22 @@ spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCata
514514

515515
For more details on using S3 Access Grants, please refer to [Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html).
516516

517+
### S3 Cross-Region Access
518+
519+
S3 Cross-Region bucket access can be turned on by setting catalog property `s3.cross-region-access-enabled` to `true`.
520+
This is turned off by default to avoid first S3 API call increased latency.
521+
522+
For example, to enable S3 Cross-Region bucket access with Spark 3.3, you can start the Spark SQL shell with:
523+
```
524+
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
525+
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
526+
--conf spark.sql.catalog.my_catalog.type=glue \
527+
--conf spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
528+
--conf spark.sql.catalog.my_catalog.s3.cross-region-access-enabled=true
529+
```
530+
531+
For more details, please refer to [Cross-Region access for Amazon S3](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html).
532+
517533
### S3 Acceleration
518534

519535
[S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be used to speed up transfers to and from Amazon S3 by as much as 50-500% for long-distance transfer of larger objects.

0 commit comments

Comments
 (0)