Skip to content

Commit f1e775b

Browse files
committed
HADOOP-18830. Cut S3 Select (apache#6144)
Cut out S3 Select * leave public/unstable constants alone * s3guard tool will fail with error * s3afs. path capability will fail * openFile() will fail with specific error * s3 select doc updated * New test: ITestSelectUnsupported verifies new failure handling above This is the v1 SDK version of the feature. Contributed by Steve Loughran Change-Id: Ic18c49562e5143a2a2204d66840be149db486b02
1 parent 8a7fb26 commit f1e775b

33 files changed

+738
-5520
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 8 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,6 @@
7272
import com.amazonaws.services.s3.model.PutObjectRequest;
7373
import com.amazonaws.services.s3.model.PutObjectResult;
7474
import com.amazonaws.services.s3.model.S3Object;
75-
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
76-
import com.amazonaws.services.s3.model.SelectObjectContentResult;
7775
import com.amazonaws.services.s3.model.StorageClass;
7876
import com.amazonaws.services.s3.model.UploadPartRequest;
7977
import com.amazonaws.services.s3.model.UploadPartResult;
@@ -180,8 +178,6 @@
180178
import org.apache.hadoop.fs.s3a.commit.PutTracker;
181179
import org.apache.hadoop.fs.s3a.commit.MagicCommitIntegration;
182180
import org.apache.hadoop.fs.s3a.impl.ChangeTracker;
183-
import org.apache.hadoop.fs.s3a.select.SelectBinding;
184-
import org.apache.hadoop.fs.s3a.select.SelectConstants;
185181
import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
186182
import org.apache.hadoop.fs.s3a.statistics.BlockOutputStreamStatistics;
187183
import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics;
@@ -1562,8 +1558,7 @@ public FSDataInputStream open(Path f, int bufferSize)
15621558
/**
15631559
* Opens an FSDataInputStream at the indicated Path.
15641560
* The {@code fileInformation} parameter controls how the file
1565-
* is opened, whether it is normal vs. an S3 select call,
1566-
* can a HEAD be skipped, etc.
1561+
* is opened, can a HEAD be skipped, etc.
15671562
* @param path the file to open
15681563
* @param fileInformation information about the file to open
15691564
* @throws IOException IO failure.
@@ -1689,11 +1684,6 @@ public <T> CompletableFuture<T> submit(final CallableRaisingIOE<T> operation) {
16891684
private final class WriteOperationHelperCallbacksImpl
16901685
implements WriteOperationHelper.WriteOperationHelperCallbacks {
16911686

1692-
@Override
1693-
public SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request) {
1694-
return s3.selectObjectContent(request);
1695-
}
1696-
16971687
@Override
16981688
public CompleteMultipartUploadResult completeMultipartUpload(
16991689
CompleteMultipartUploadRequest request) {
@@ -1707,7 +1697,7 @@ public CompleteMultipartUploadResult completeMultipartUpload(
17071697
* using FS state as well as the status.
17081698
* @param fileStatus file status.
17091699
* @param auditSpan audit span.
1710-
* @return a context for read and select operations.
1700+
* @return a context for read operations.
17111701
*/
17121702
@VisibleForTesting
17131703
protected S3AReadOpContext createReadContext(
@@ -5135,11 +5125,6 @@ public boolean hasPathCapability(final Path path, final String capability)
51355125
// capability depends on FS configuration
51365126
return isMagicCommitEnabled();
51375127

5138-
case SelectConstants.S3_SELECT_CAPABILITY:
5139-
// select is only supported if enabled and client side encryption is
5140-
// disabled.
5141-
return !isCSEEnabled && SelectBinding.isSelectEnabled(getConf());
5142-
51435128
case CommonPathCapabilities.FS_CHECKSUMS:
51445129
// capability depends on FS configuration
51455130
return getConf().getBoolean(ETAG_CHECKSUM_ENABLED,
@@ -5224,85 +5209,6 @@ public AWSCredentialProviderList shareCredentials(final String purpose) {
52245209
return credentials.share();
52255210
}
52265211

5227-
/**
5228-
* This is a proof of concept of a select API.
5229-
* @param source path to source data
5230-
* @param options request configuration from the builder.
5231-
* @param fileInformation any passed in information.
5232-
* @return the stream of the results
5233-
* @throws IOException IO failure
5234-
*/
5235-
@Retries.RetryTranslated
5236-
@AuditEntryPoint
5237-
private FSDataInputStream select(final Path source,
5238-
final Configuration options,
5239-
final OpenFileSupport.OpenFileInformation fileInformation)
5240-
throws IOException {
5241-
requireSelectSupport(source);
5242-
final AuditSpan auditSpan = entryPoint(OBJECT_SELECT_REQUESTS, source);
5243-
final Path path = makeQualified(source);
5244-
String expression = fileInformation.getSql();
5245-
final S3AFileStatus fileStatus = extractOrFetchSimpleFileStatus(path,
5246-
fileInformation);
5247-
5248-
// readahead range can be dynamically set
5249-
S3ObjectAttributes objectAttributes = createObjectAttributes(
5250-
path, fileStatus);
5251-
ChangeDetectionPolicy changePolicy = fileInformation.getChangePolicy();
5252-
S3AReadOpContext readContext = createReadContext(
5253-
fileStatus,
5254-
auditSpan);
5255-
fileInformation.applyOptions(readContext);
5256-
5257-
if (changePolicy.getSource() != ChangeDetectionPolicy.Source.None
5258-
&& fileStatus.getEtag() != null) {
5259-
// if there is change detection, and the status includes at least an
5260-
// etag,
5261-
// check that the object metadata lines up with what is expected
5262-
// based on the object attributes (which may contain an eTag or
5263-
// versionId).
5264-
// This is because the select API doesn't offer this.
5265-
// (note: this is trouble for version checking as cannot force the old
5266-
// version in the final read; nor can we check the etag match)
5267-
ChangeTracker changeTracker =
5268-
new ChangeTracker(uri.toString(),
5269-
changePolicy,
5270-
readContext.getS3AStatisticsContext()
5271-
.newInputStreamStatistics()
5272-
.getChangeTrackerStatistics(),
5273-
objectAttributes);
5274-
5275-
// will retry internally if wrong version detected
5276-
Invoker readInvoker = readContext.getReadInvoker();
5277-
getObjectMetadata(path, changeTracker, readInvoker, "select");
5278-
}
5279-
// instantiate S3 Select support using the current span
5280-
// as the active span for operations.
5281-
SelectBinding selectBinding = new SelectBinding(
5282-
createWriteOperationHelper(auditSpan));
5283-
5284-
// build and execute the request
5285-
return selectBinding.select(
5286-
readContext,
5287-
expression,
5288-
options,
5289-
objectAttributes);
5290-
}
5291-
5292-
/**
5293-
* Verify the FS supports S3 Select.
5294-
* @param source source file.
5295-
* @throws UnsupportedOperationException if not.
5296-
*/
5297-
private void requireSelectSupport(final Path source) throws
5298-
UnsupportedOperationException {
5299-
if (!isCSEEnabled && !SelectBinding.isSelectEnabled(getConf())) {
5300-
5301-
throw new UnsupportedOperationException(
5302-
SelectConstants.SELECT_UNSUPPORTED);
5303-
}
5304-
}
5305-
53065212
/**
53075213
* Get the file status of the source file.
53085214
* If in the fileInformation parameter return that
@@ -5333,16 +5239,14 @@ private S3AFileStatus extractOrFetchSimpleFileStatus(
53335239
}
53345240

53355241
/**
5336-
* Initiate the open() or select() operation.
5242+
* Initiate the open() operation.
53375243
* This is invoked from both the FileSystem and FileContext APIs.
53385244
* It's declared as an audit entry point but the span creation is pushed
5339-
* down into the open/select methods it ultimately calls.
5245+
* down into the open operation s it ultimately calls.
53405246
* @param rawPath path to the file
53415247
* @param parameters open file parameters from the builder.
5342-
* @return a future which will evaluate to the opened/selected file.
5248+
* @return a future which will evaluate to the opened file.
53435249
* @throws IOException failure to resolve the link.
5344-
* @throws PathIOException operation is a select request but S3 select is
5345-
* disabled
53465250
* @throws IllegalArgumentException unknown mandatory key
53475251
*/
53485252
@Override
@@ -5358,20 +5262,9 @@ public CompletableFuture<FSDataInputStream> openFileWithOptions(
53585262
parameters,
53595263
getDefaultBlockSize());
53605264
CompletableFuture<FSDataInputStream> result = new CompletableFuture<>();
5361-
if (!fileInformation.isS3Select()) {
5362-
// normal path.
5363-
unboundedThreadPool.submit(() ->
5364-
LambdaUtils.eval(result,
5365-
() -> executeOpen(path, fileInformation)));
5366-
} else {
5367-
// it is a select statement.
5368-
// fail fast if the operation is not available
5369-
requireSelectSupport(path);
5370-
// submit the query
5371-
unboundedThreadPool.submit(() ->
5372-
LambdaUtils.eval(result,
5373-
() -> select(path, parameters.getOptions(), fileInformation)));
5374-
}
5265+
unboundedThreadPool.submit(() ->
5266+
LambdaUtils.eval(result,
5267+
() -> executeOpen(path, fileInformation)));
53755268
return result;
53765269
}
53775270

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
/**
2828
* This class holds attributes of an object independent of the
2929
* file status type.
30-
* It is used in {@link S3AInputStream} and the select equivalent.
30+
* It is used in {@link S3AInputStream} and elsewhere.
3131
* as a way to reduce parameters being passed
3232
* to the constructor of such class,
3333
* and elsewhere to be a source-neutral representation of a file status.

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,6 @@ public enum Statistic {
262262
StoreStatisticNames.OBJECT_PUT_BYTES_PENDING,
263263
"number of bytes queued for upload/being actively uploaded",
264264
TYPE_GAUGE),
265-
OBJECT_SELECT_REQUESTS(
266-
StoreStatisticNames.OBJECT_SELECT_REQUESTS,
267-
"Count of S3 Select requests issued",
268-
TYPE_COUNTER),
269265
STREAM_READ_ABORTED(
270266
StreamStatisticNames.STREAM_READ_ABORTED,
271267
"Count of times the TCP stream was aborted",

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
import com.amazonaws.services.s3.model.PartETag;
3636
import com.amazonaws.services.s3.model.PutObjectRequest;
3737
import com.amazonaws.services.s3.model.PutObjectResult;
38-
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
39-
import com.amazonaws.services.s3.model.SelectObjectContentResult;
4038
import com.amazonaws.services.s3.model.UploadPartRequest;
4139
import com.amazonaws.services.s3.model.UploadPartResult;
4240
import org.slf4j.Logger;
@@ -51,13 +49,10 @@
5149
import org.apache.hadoop.fs.s3a.impl.PutObjectOptions;
5250
import org.apache.hadoop.fs.s3a.impl.StoreContext;
5351
import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext;
54-
import org.apache.hadoop.fs.s3a.select.SelectBinding;
5552
import org.apache.hadoop.fs.statistics.DurationTrackerFactory;
5653
import org.apache.hadoop.fs.store.audit.AuditSpan;
5754
import org.apache.hadoop.fs.store.audit.AuditSpanSource;
58-
import org.apache.hadoop.util.DurationInfo;
5955
import org.apache.hadoop.util.functional.CallableRaisingIOE;
60-
import org.apache.hadoop.util.Preconditions;
6156

6257
import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull;
6358
import static org.apache.hadoop.fs.s3a.Invoker.*;
@@ -81,7 +76,6 @@
8176
* <li>Other low-level access to S3 functions, for private use.</li>
8277
* <li>Failure handling, including converting exceptions to IOEs.</li>
8378
* <li>Integration with instrumentation.</li>
84-
* <li>Evolution to add more low-level operations, such as S3 select.</li>
8579
* </ul>
8680
*
8781
* This API is for internal use only.
@@ -659,67 +653,6 @@ public Configuration getConf() {
659653
return conf;
660654
}
661655

662-
/**
663-
* Create a S3 Select request for the destination path.
664-
* This does not build the query.
665-
* @param path pre-qualified path for query
666-
* @return the request
667-
*/
668-
public SelectObjectContentRequest newSelectRequest(Path path) {
669-
try (AuditSpan span = getAuditSpan()) {
670-
return getRequestFactory().newSelectRequest(
671-
storeContext.pathToKey(path));
672-
}
673-
}
674-
675-
/**
676-
* Execute an S3 Select operation.
677-
* On a failure, the request is only logged at debug to avoid the
678-
* select exception being printed.
679-
* @param source source for selection
680-
* @param request Select request to issue.
681-
* @param action the action for use in exception creation
682-
* @return response
683-
* @throws IOException failure
684-
*/
685-
@Retries.RetryTranslated
686-
public SelectObjectContentResult select(
687-
final Path source,
688-
final SelectObjectContentRequest request,
689-
final String action)
690-
throws IOException {
691-
// no setting of span here as the select binding is (statically) created
692-
// without any span.
693-
String bucketName = request.getBucketName();
694-
Preconditions.checkArgument(bucket.equals(bucketName),
695-
"wrong bucket: %s", bucketName);
696-
if (LOG.isDebugEnabled()) {
697-
LOG.debug("Initiating select call {} {}",
698-
source, request.getExpression());
699-
LOG.debug(SelectBinding.toString(request));
700-
}
701-
return invoker.retry(
702-
action,
703-
source.toString(),
704-
true,
705-
withinAuditSpan(getAuditSpan(), () -> {
706-
try (DurationInfo ignored =
707-
new DurationInfo(LOG, "S3 Select operation")) {
708-
try {
709-
return writeOperationHelperCallbacks.selectObjectContent(request);
710-
} catch (AmazonS3Exception e) {
711-
LOG.error("Failure of S3 Select request against {}",
712-
source);
713-
LOG.debug("S3 Select request against {}:\n{}",
714-
source,
715-
SelectBinding.toString(request),
716-
e);
717-
throw e;
718-
}
719-
}
720-
}));
721-
}
722-
723656
@Override
724657
public AuditSpan createSpan(final String operation,
725658
@Nullable final String path1,
@@ -753,13 +686,6 @@ public RequestFactory getRequestFactory() {
753686
*/
754687
public interface WriteOperationHelperCallbacks {
755688

756-
/**
757-
* Initiates a select request.
758-
* @param request selectObjectContent request
759-
* @return selectObjectContentResult
760-
*/
761-
SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request);
762-
763689
/**
764690
* Initiates a complete multi-part upload request.
765691
* @param request Complete multi-part upload request

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,10 @@
3333
import com.amazonaws.services.s3.model.PartETag;
3434
import com.amazonaws.services.s3.model.PutObjectRequest;
3535
import com.amazonaws.services.s3.model.PutObjectResult;
36-
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
37-
import com.amazonaws.services.s3.model.SelectObjectContentResult;
3836
import com.amazonaws.services.s3.model.UploadPartRequest;
3937
import com.amazonaws.services.s3.model.UploadPartResult;
4038

4139
import org.apache.hadoop.conf.Configuration;
42-
import org.apache.hadoop.fs.Path;
4340
import org.apache.hadoop.fs.PathIOException;
4441
import org.apache.hadoop.fs.s3a.impl.PutObjectOptions;
4542
import org.apache.hadoop.fs.statistics.DurationTrackerFactory;
@@ -305,31 +302,6 @@ UploadPartResult uploadPart(UploadPartRequest request,
305302
*/
306303
Configuration getConf();
307304

308-
/**
309-
* Create a S3 Select request for the destination path.
310-
* This does not build the query.
311-
* @param path pre-qualified path for query
312-
* @return the request
313-
*/
314-
SelectObjectContentRequest newSelectRequest(Path path);
315-
316-
/**
317-
* Execute an S3 Select operation.
318-
* On a failure, the request is only logged at debug to avoid the
319-
* select exception being printed.
320-
* @param source source for selection
321-
* @param request Select request to issue.
322-
* @param action the action for use in exception creation
323-
* @return response
324-
* @throws IOException failure
325-
*/
326-
@Retries.RetryTranslated
327-
SelectObjectContentResult select(
328-
Path source,
329-
SelectObjectContentRequest request,
330-
String action)
331-
throws IOException;
332-
333305
/**
334306
* Increment the write operation counter
335307
* of the filesystem.

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
import com.amazonaws.services.s3.model.PutObjectRequest;
4444
import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams;
4545
import com.amazonaws.services.s3.model.SSECustomerKey;
46-
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
4746
import com.amazonaws.services.s3.model.StorageClass;
4847
import com.amazonaws.services.s3.model.UploadPartRequest;
4948

@@ -254,14 +253,6 @@ UploadPartRequest newUploadPartRequest(
254253
File sourceFile,
255254
long offset) throws PathIOException;
256255

257-
/**
258-
* Create a S3 Select request for the destination object.
259-
* This does not build the query.
260-
* @param key object key
261-
* @return the request
262-
*/
263-
SelectObjectContentRequest newSelectRequest(String key);
264-
265256
/**
266257
* Create the (legacy) V1 list request.
267258
* @param key key to list under

0 commit comments

Comments
 (0)