Skip to content

Commit 6ed7389

Browse files
authored
HADOOP-18656. [ABFS] Add Support for Paginated Delete for Large Directories in HNS Account (#6409)
Contributed by Anuj Modi
1 parent d7157b4 commit 6ed7389

File tree

13 files changed

+456
-23
lines changed

13 files changed

+456
-23
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,10 @@ public class AbfsConfiguration{
363363
FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, DefaultValue = DEFAULT_ENABLE_ABFS_CHECKSUM_VALIDATION)
364364
private boolean isChecksumValidationEnabled;
365365

366+
@BooleanConfigurationValidatorAnnotation(ConfigurationKey =
367+
FS_AZURE_ENABLE_PAGINATED_DELETE, DefaultValue = DEFAULT_ENABLE_PAGINATED_DELETE)
368+
private boolean isPaginatedDeleteEnabled;
369+
366370
private String clientProvidedEncryptionKey;
367371
private String clientProvidedEncryptionKeySHA;
368372

@@ -1240,8 +1244,8 @@ public boolean getRenameResilience() {
12401244
return renameResilience;
12411245
}
12421246

1243-
void setRenameResilience(boolean actualResilience) {
1244-
renameResilience = actualResilience;
1247+
public boolean isPaginatedDeleteEnabled() {
1248+
return isPaginatedDeleteEnabled;
12451249
}
12461250

12471251
public boolean getIsChecksumValidationEnabled() {

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,8 +1077,8 @@ public void delete(final Path path, final boolean recursive,
10771077

10781078
do {
10791079
try (AbfsPerfInfo perfInfo = startTracking("delete", "deletePath")) {
1080-
AbfsRestOperation op = client
1081-
.deletePath(relativePath, recursive, continuation, tracingContext);
1080+
AbfsRestOperation op = client.deletePath(relativePath, recursive,
1081+
continuation, tracingContext, getIsNamespaceEnabled(tracingContext));
10821082
perfInfo.registerResult(op.getResult());
10831083
continuation = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION);
10841084
perfInfo.registerSuccess(true);

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,37 @@ public final class AbfsHttpConstants {
121121
public static final char CHAR_EQUALS = '=';
122122
public static final char CHAR_STAR = '*';
123123
public static final char CHAR_PLUS = '+';
124-
public static final String DECEMBER_2019_API_VERSION = "2019-12-12";
125-
public static final String APRIL_2021_API_VERSION = "2021-04-10";
124+
125+
/**
126+
* Specifies the version of the REST protocol used for processing the request.
127+
* Versions should be added in enum list in ascending chronological order.
128+
* Latest one should be added last in the list.
129+
* When upgrading the version for whole driver, update the getCurrentVersion;
130+
*/
131+
public enum ApiVersion {
132+
133+
DEC_12_2019("2019-12-12"),
134+
APR_10_2021("2021-04-10"),
135+
AUG_03_2023("2023-08-03");
136+
137+
private final String xMsApiVersion;
138+
139+
ApiVersion(String xMsApiVersion) {
140+
this.xMsApiVersion = xMsApiVersion;
141+
}
142+
143+
@Override
144+
public String toString() {
145+
return xMsApiVersion;
146+
}
147+
148+
public static ApiVersion getCurrentVersion() {
149+
return DEC_12_2019;
150+
}
151+
}
152+
153+
@Deprecated
154+
public static final String DECEMBER_2019_API_VERSION = ApiVersion.DEC_12_2019.toString();
126155

127156
/**
128157
* Value that differentiates categories of the http_status.

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,11 @@ public final class ConfigurationKeys {
275275
/** Add extra resilience to rename failures, at the expense of performance. */
276276
public static final String FS_AZURE_ABFS_RENAME_RESILIENCE = "fs.azure.enable.rename.resilience";
277277

278+
/**
279+
* Specify whether paginated behavior is to be expected or not in delete path. {@value}
280+
*/
281+
public static final String FS_AZURE_ENABLE_PAGINATED_DELETE = "fs.azure.enable.paginated.delete";
282+
278283
/** Add extra layer of verification of the integrity of the request content during transport: {@value}. */
279284
public static final String FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION = "fs.azure.enable.checksum.validation";
280285

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ public final class FileSystemConfigurations {
133133
public static final int STREAM_ID_LEN = 12;
134134
public static final boolean DEFAULT_ENABLE_ABFS_LIST_ITERATOR = true;
135135
public static final boolean DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE = true;
136+
public static final boolean DEFAULT_ENABLE_PAGINATED_DELETE = false;
136137
public static final boolean DEFAULT_ENABLE_ABFS_CHECKSUM_VALIDATION = false;
137138

138139
/**

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public final class HttpQueryParams {
4040
public static final String QUERY_PARAM_CLOSE = "close";
4141
public static final String QUERY_PARAM_UPN = "upn";
4242
public static final String QUERY_PARAM_BLOBTYPE = "blobtype";
43+
public static final String QUERY_PARAM_PAGINATED = "paginated";
4344

4445
//query params for SAS
4546
public static final String QUERY_PARAM_SAOID = "saoid";

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ public class AbfsClient implements Closeable {
100100

101101
private final URL baseUrl;
102102
private final SharedKeyCredentials sharedKeyCredentials;
103-
private String xMsVersion = DECEMBER_2019_API_VERSION;
103+
private ApiVersion xMsVersion = ApiVersion.getCurrentVersion();
104104
private final ExponentialRetryPolicy exponentialRetryPolicy;
105105
private final StaticRetryPolicy staticRetryPolicy;
106106
private final String filesystem;
@@ -122,7 +122,6 @@ public class AbfsClient implements Closeable {
122122
private final ListeningScheduledExecutorService executorService;
123123
private Boolean isNamespaceEnabled;
124124

125-
126125
private boolean renameResilience;
127126

128127
/**
@@ -149,7 +148,7 @@ private AbfsClient(final URL baseUrl,
149148

150149
if (encryptionContextProvider != null) {
151150
this.encryptionContextProvider = encryptionContextProvider;
152-
xMsVersion = APRIL_2021_API_VERSION; // will be default once server change deployed
151+
xMsVersion = ApiVersion.APR_10_2021; // will be default once server change deployed
153152
encryptionType = EncryptionType.ENCRYPTION_CONTEXT;
154153
} else if (abfsConfiguration.getEncodedClientProvidedEncryptionKey() != null) {
155154
clientProvidedEncryptionKey =
@@ -259,13 +258,27 @@ AbfsThrottlingIntercept getIntercept() {
259258
return intercept;
260259
}
261260

262-
List<AbfsHttpHeader> createDefaultHeaders() {
261+
/**
262+
* Create request headers for Rest Operation using the current API version.
263+
* @return default request headers
264+
*/
265+
@VisibleForTesting
266+
protected List<AbfsHttpHeader> createDefaultHeaders() {
267+
return createDefaultHeaders(this.xMsVersion);
268+
}
269+
270+
/**
271+
* Create request headers for Rest Operation using the specified API version.
272+
* @param xMsVersion
273+
* @return default request headers
274+
*/
275+
private List<AbfsHttpHeader> createDefaultHeaders(ApiVersion xMsVersion) {
263276
final List<AbfsHttpHeader> requestHeaders = new ArrayList<AbfsHttpHeader>();
264-
requestHeaders.add(new AbfsHttpHeader(X_MS_VERSION, xMsVersion));
277+
requestHeaders.add(new AbfsHttpHeader(X_MS_VERSION, xMsVersion.toString()));
265278
requestHeaders.add(new AbfsHttpHeader(ACCEPT, APPLICATION_JSON
266-
+ COMMA + SINGLE_WHITE_SPACE + APPLICATION_OCTET_STREAM));
279+
+ COMMA + SINGLE_WHITE_SPACE + APPLICATION_OCTET_STREAM));
267280
requestHeaders.add(new AbfsHttpHeader(ACCEPT_CHARSET,
268-
UTF_8));
281+
UTF_8));
269282
requestHeaders.add(new AbfsHttpHeader(CONTENT_TYPE, EMPTY_STRING));
270283
requestHeaders.add(new AbfsHttpHeader(USER_AGENT, userAgent));
271284
return requestHeaders;
@@ -1117,12 +1130,29 @@ public AbfsRestOperation read(final String path,
11171130
return op;
11181131
}
11191132

1120-
public AbfsRestOperation deletePath(final String path, final boolean recursive, final String continuation,
1121-
TracingContext tracingContext)
1133+
public AbfsRestOperation deletePath(final String path, final boolean recursive,
1134+
final String continuation,
1135+
TracingContext tracingContext,
1136+
final boolean isNamespaceEnabled)
11221137
throws AzureBlobFileSystemException {
1123-
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
1124-
1138+
/*
1139+
* If Pagination is enabled and current API version is old,
1140+
* use the minimum required version for pagination.
1141+
* If Pagination is enabled and current API version is later than minimum required
1142+
* version for pagination, use current version only as azure service is backward compatible.
1143+
* If pagination is disabled, use the current API version only.
1144+
*/
1145+
final List<AbfsHttpHeader> requestHeaders = (isPaginatedDelete(recursive,
1146+
isNamespaceEnabled) && xMsVersion.compareTo(ApiVersion.AUG_03_2023) < 0)
1147+
? createDefaultHeaders(ApiVersion.AUG_03_2023)
1148+
: createDefaultHeaders();
11251149
final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
1150+
1151+
if (isPaginatedDelete(recursive, isNamespaceEnabled)) {
1152+
// Add paginated query parameter
1153+
abfsUriQueryBuilder.addQuery(QUERY_PARAM_PAGINATED, TRUE);
1154+
}
1155+
11261156
abfsUriQueryBuilder.addQuery(QUERY_PARAM_RECURSIVE, String.valueOf(recursive));
11271157
abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation);
11281158
String operation = recursive ? SASTokenProvider.DELETE_RECURSIVE_OPERATION : SASTokenProvider.DELETE_OPERATION;
@@ -1465,6 +1495,14 @@ private synchronized Boolean getIsNamespaceEnabled(TracingContext tracingContext
14651495
return isNamespaceEnabled;
14661496
}
14671497

1498+
protected Boolean getIsPaginatedDeleteEnabled() {
1499+
return abfsConfiguration.isPaginatedDeleteEnabled();
1500+
}
1501+
1502+
private Boolean isPaginatedDelete(boolean isRecursiveDelete, boolean isNamespaceEnabled) {
1503+
return getIsPaginatedDeleteEnabled() && isNamespaceEnabled && isRecursiveDelete;
1504+
}
1505+
14681506
public AuthType getAuthType() {
14691507
return authType;
14701508
}
@@ -1659,7 +1697,7 @@ protected AbfsCounters getAbfsCounters() {
16591697
return abfsCounters;
16601698
}
16611699

1662-
public String getxMsVersion() {
1700+
public ApiVersion getxMsVersion() {
16631701
return xMsVersion;
16641702
}
16651703

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.util.concurrent.Callable;
2727

2828
import org.junit.After;
29+
import org.junit.Assume;
2930
import org.junit.Before;
3031
import org.slf4j.Logger;
3132
import org.slf4j.LoggerFactory;
@@ -215,6 +216,7 @@ public void setup() throws Exception {
215216
wasb = new NativeAzureFileSystem(azureNativeFileSystemStore);
216217
wasb.initialize(wasbUri, rawConfig);
217218
}
219+
// Todo: To be fixed in HADOOP-19137
218220
AbfsClientUtils.setIsNamespaceEnabled(abfs.getAbfsClient(), true);
219221
}
220222

@@ -532,4 +534,10 @@ protected long assertAbfsStatistics(AbfsStatistic statistic,
532534
(long) metricMap.get(statistic.getStatName()));
533535
return expectedValue;
534536
}
537+
538+
protected void assumeValidTestConfigPresent(final Configuration conf, final String key) {
539+
String configuredValue = conf.get(key);
540+
Assume.assumeTrue(String.format("Missing Required Test Config: %s.", key),
541+
configuredValue != null && !configuredValue.isEmpty());
542+
}
535543
}

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,9 @@ private AbfsRestOperation callOperation(AzureBlobFileSystem fs,
323323
return client.renamePath(path, new Path(path + "_2").toString(),
324324
null, tc, null, false, fs.getIsNamespaceEnabled(tc)).getOp();
325325
case DELETE:
326+
TracingContext testTC = getTestTracingContext(fs, false);
326327
return client.deletePath(path, false, null,
327-
getTestTracingContext(fs, false));
328+
testTC, fs.getIsNamespaceEnabled(testTC));
328329
case GET_ATTR:
329330
return client.getPathStatus(path, true,
330331
getTestTracingContext(fs, false),

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception {
242242
"/NonExistingPath",
243243
false,
244244
null,
245-
getTestTracingContext(fs, true)));
245+
getTestTracingContext(fs, true),
246+
fs.getIsNamespaceEnabled(getTestTracingContext(fs, true))));
246247

247248
// mock idempotency check to mimic retried case
248249
AbfsClient mockClient = ITestAbfsClient.getMockAbfsClient(
@@ -269,14 +270,15 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception {
269270
doReturn(idempotencyRetOp).when(mockClient).deleteIdempotencyCheckOp(any());
270271
TracingContext tracingContext = getTestTracingContext(fs, false);
271272
doReturn(tracingContext).when(idempotencyRetOp).createNewTracingContext(any());
272-
when(mockClient.deletePath("/NonExistingPath", false, null, tracingContext))
273+
when(mockClient.deletePath("/NonExistingPath", false, null,
274+
tracingContext, fs.getIsNamespaceEnabled(tracingContext)))
273275
.thenCallRealMethod();
274276

275277
Assertions.assertThat(mockClient.deletePath(
276278
"/NonExistingPath",
277279
false,
278280
null,
279-
tracingContext)
281+
tracingContext, fs.getIsNamespaceEnabled(tracingContext))
280282
.getResult()
281283
.getStatusCode())
282284
.describedAs("Idempotency check reports successful "

0 commit comments

Comments
 (0)