Skip to content

Commit 8f78aeb

Browse files
authored
Hadoop-17015. ABFS: Handling Rename and Delete idempotency
Contributed by Sneha Vijayarajan.
1 parent d4e3640 commit 8f78aeb

File tree

12 files changed

+409
-18
lines changed

12 files changed

+409
-18
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,11 @@ public void setMaxIoRetries(int maxIoRetries) {
764764
this.maxIoRetries = maxIoRetries;
765765
}
766766

767+
@VisibleForTesting
768+
void setMaxBackoffIntervalMilliseconds(int maxBackoffInterval) {
769+
this.maxBackoffInterval = maxBackoffInterval;
770+
}
771+
767772
@VisibleForTesting
768773
void setIsNamespaceEnabledAccount(String isNamespaceEnabledAccount) {
769774
this.isNamespaceEnabledAccount = isNamespaceEnabledAccount;

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import java.nio.charset.CharsetDecoder;
3535
import java.nio.charset.CharsetEncoder;
3636
import java.nio.charset.StandardCharsets;
37-
import java.text.ParseException;
3837
import java.text.SimpleDateFormat;
3938
import java.time.Instant;
4039
import java.util.ArrayList;
@@ -96,6 +95,7 @@
9695
import org.apache.hadoop.fs.azurebfs.services.AbfsPerfInfo;
9796
import org.apache.hadoop.fs.azurebfs.utils.Base64;
9897
import org.apache.hadoop.fs.azurebfs.utils.CRC64;
98+
import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils;
9999
import org.apache.hadoop.fs.azurebfs.utils.UriUtils;
100100
import org.apache.hadoop.fs.permission.AclEntry;
101101
import org.apache.hadoop.fs.permission.AclStatus;
@@ -128,7 +128,6 @@ public class AzureBlobFileSystemStore implements Closeable {
128128
private URI uri;
129129
private String userName;
130130
private String primaryUserGroup;
131-
private static final String DATE_TIME_PATTERN = "E, dd MMM yyyy HH:mm:ss z";
132131
private static final String TOKEN_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSS'Z'";
133132
private static final String XMS_PROPERTIES_ENCODING = "ISO-8859-1";
134133
private static final int GET_SET_AGGREGATE_COUNT = 2;
@@ -672,7 +671,7 @@ public FileStatus getFileStatus(final Path path) throws IOException {
672671
resourceIsDir,
673672
1,
674673
blockSize,
675-
parseLastModifiedTime(lastModified),
674+
DateTimeUtils.parseLastModifiedTime(lastModified),
676675
path,
677676
eTag);
678677
}
@@ -748,7 +747,8 @@ public FileStatus[] listStatus(final Path path, final String startFrom) throws I
748747
long contentLength = entry.contentLength() == null ? 0 : entry.contentLength();
749748
boolean isDirectory = entry.isDirectory() == null ? false : entry.isDirectory();
750749
if (entry.lastModified() != null && !entry.lastModified().isEmpty()) {
751-
lastModifiedMillis = parseLastModifiedTime(entry.lastModified());
750+
lastModifiedMillis = DateTimeUtils.parseLastModifiedTime(
751+
entry.lastModified());
752752
}
753753

754754
Path entryPath = new Path(File.separator + entry.name());
@@ -1235,18 +1235,6 @@ private boolean parseIsDirectory(final String resourceType) {
12351235
&& resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY);
12361236
}
12371237

1238-
private long parseLastModifiedTime(final String lastModifiedTime) {
1239-
long parsedTime = 0;
1240-
try {
1241-
Date utcDate = new SimpleDateFormat(DATE_TIME_PATTERN, Locale.US).parse(lastModifiedTime);
1242-
parsedTime = utcDate.getTime();
1243-
} catch (ParseException e) {
1244-
LOG.error("Failed to parse the date {}", lastModifiedTime);
1245-
} finally {
1246-
return parsedTime;
1247-
}
1248-
}
1249-
12501238
private String convertXmsPropertiesToCommaSeparatedString(final Hashtable<String, String> properties) throws
12511239
CharacterCodingException {
12521240
StringBuilder commaSeparatedProperties = new StringBuilder();

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,5 +81,8 @@ public final class FileSystemConfigurations {
8181
public static final String DEFAULT_FS_AZURE_USER_AGENT_PREFIX = EMPTY_STRING;
8282
public static final String DEFAULT_VALUE_UNKNOWN = "UNKNOWN";
8383

84+
public static final boolean DEFAULT_DELETE_CONSIDERED_IDEMPOTENT = true;
85+
public static final int DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS = 5 * 60 * 1000; // 5 mins
86+
8487
private FileSystemConfigurations() {}
8588
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
import java.io.Closeable;
2222
import java.io.IOException;
2323
import java.io.UnsupportedEncodingException;
24+
import java.net.HttpURLConnection;
2425
import java.net.MalformedURLException;
2526
import java.net.URL;
2627
import java.net.URLEncoder;
28+
import java.time.Instant;
2729
import java.util.ArrayList;
2830
import java.util.List;
2931
import java.util.Locale;
@@ -44,9 +46,11 @@
4446
import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider;
4547
import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
4648
import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider;
49+
import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils;
4750
import org.apache.hadoop.io.IOUtils;
4851

4952
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.*;
53+
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_DELETE_CONSIDERED_IDEMPOTENT;
5054
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME;
5155
import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*;
5256
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*;
@@ -320,7 +324,51 @@ public AbfsRestOperation renamePath(String source, final String destination, fin
320324
HTTP_METHOD_PUT,
321325
url,
322326
requestHeaders);
327+
Instant renameRequestStartTime = Instant.now();
323328
op.execute();
329+
330+
if (op.getResult().getStatusCode() != HttpURLConnection.HTTP_OK) {
331+
return renameIdempotencyCheckOp(renameRequestStartTime, op, destination);
332+
}
333+
334+
return op;
335+
}
336+
337+
/**
338+
* Check if the rename request failure is post a retry and if earlier rename
339+
* request might have succeeded at back-end.
340+
*
341+
* If there is a parallel rename activity happening from any other store
342+
* interface, the logic here will detect the rename to have happened due to
343+
* the one initiated from this ABFS filesytem instance as it was retried. This
344+
* should be a corner case hence going ahead with LMT check.
345+
* @param renameRequestStartTime startTime for the rename request
346+
* @param op Rename request REST operation response
347+
* @param destination rename destination path
348+
* @return REST operation response post idempotency check
349+
* @throws AzureBlobFileSystemException if GetFileStatus hits any exception
350+
*/
351+
public AbfsRestOperation renameIdempotencyCheckOp(
352+
final Instant renameRequestStartTime,
353+
final AbfsRestOperation op,
354+
final String destination) throws AzureBlobFileSystemException {
355+
if ((op.isARetriedRequest())
356+
&& (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND)) {
357+
// Server has returned HTTP 404, which means rename source no longer
358+
// exists. Check on destination status and if it has a recent LMT timestamp.
359+
// If yes, return success, else fall back to original rename request failure response.
360+
361+
final AbfsRestOperation destStatusOp = getPathStatus(destination, false);
362+
if (destStatusOp.getResult().getStatusCode() == HttpURLConnection.HTTP_OK) {
363+
String lmt = destStatusOp.getResult().getResponseHeader(
364+
HttpHeaderConfigurations.LAST_MODIFIED);
365+
366+
if (DateTimeUtils.isRecentlyModified(lmt, renameRequestStartTime)) {
367+
return destStatusOp;
368+
}
369+
}
370+
}
371+
324372
return op;
325373
}
326374

@@ -476,6 +524,45 @@ public AbfsRestOperation deletePath(final String path, final boolean recursive,
476524
url,
477525
requestHeaders);
478526
op.execute();
527+
528+
if (op.getResult().getStatusCode() != HttpURLConnection.HTTP_OK) {
529+
return deleteIdempotencyCheckOp(op);
530+
}
531+
532+
return op;
533+
}
534+
535+
/**
536+
* Check if the delete request failure is post a retry and if delete failure
537+
* qualifies to be a success response assuming idempotency.
538+
*
539+
* There are below scenarios where delete could be incorrectly deducted as
540+
* success post request retry:
541+
* 1. Target was originally not existing and initial delete request had to be
542+
* re-tried.
543+
* 2. Parallel delete issued from any other store interface rather than
544+
* delete issued from this filesystem instance.
545+
* These are few corner cases and usually returning a success at this stage
546+
* should help the job to continue.
547+
* @param op Delete request REST operation response
548+
* @return REST operation response post idempotency check
549+
*/
550+
public AbfsRestOperation deleteIdempotencyCheckOp(final AbfsRestOperation op) {
551+
if ((op.isARetriedRequest())
552+
&& (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND)
553+
&& DEFAULT_DELETE_CONSIDERED_IDEMPOTENT) {
554+
// Server has returned HTTP 404, which means path no longer
555+
// exists. Assuming delete result to be idempotent, return success.
556+
final AbfsRestOperation successOp = new AbfsRestOperation(
557+
AbfsRestOperationType.DeletePath,
558+
this,
559+
HTTP_METHOD_DELETE,
560+
op.getUrl(),
561+
op.getRequestHeaders());
562+
successOp.hardSetResult(HttpURLConnection.HTTP_OK);
563+
return successOp;
564+
}
565+
479566
return op;
480567
}
481568

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,19 @@ public class AbfsHttpOperation implements AbfsPerfLoggable {
8181
private long sendRequestTimeMs;
8282
private long recvResponseTimeMs;
8383

84+
public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult(final URL url,
85+
final String method, final int httpStatus) {
86+
return new AbfsHttpOperation(url, method, httpStatus);
87+
}
88+
89+
private AbfsHttpOperation(final URL url, final String method,
90+
final int httpStatus) {
91+
this.isTraceEnabled = LOG.isTraceEnabled();
92+
this.url = url;
93+
this.method = method;
94+
this.statusCode = httpStatus;
95+
}
96+
8497
protected HttpURLConnection getConnection() {
8598
return connection;
8699
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,31 @@ public class AbfsRestOperation {
6363
private byte[] buffer;
6464
private int bufferOffset;
6565
private int bufferLength;
66+
private int retryCount = 0;
6667

6768
private AbfsHttpOperation result;
6869

6970
public AbfsHttpOperation getResult() {
7071
return result;
7172
}
7273

74+
public void hardSetResult(int httpStatus) {
75+
result = AbfsHttpOperation.getAbfsHttpOperationWithFixedResult(this.url,
76+
this.method, httpStatus);
77+
}
78+
79+
public URL getUrl() {
80+
return url;
81+
}
82+
83+
public List<AbfsHttpHeader> getRequestHeaders() {
84+
return requestHeaders;
85+
}
86+
87+
public boolean isARetriedRequest() {
88+
return (retryCount > 0);
89+
}
90+
7391
String getSasToken() {
7492
return sasToken;
7593
}
@@ -157,7 +175,7 @@ void execute() throws AzureBlobFileSystemException {
157175
requestHeaders.add(httpHeader);
158176
}
159177

160-
int retryCount = 0;
178+
retryCount = 0;
161179
LOG.debug("First execution of REST operation - {}", operationType);
162180
while (!executeHttpOperation(retryCount++)) {
163181
try {
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.azurebfs.utils;
20+
21+
import java.text.ParseException;
22+
import java.text.SimpleDateFormat;
23+
import java.time.Instant;
24+
import java.util.Date;
25+
import java.util.Locale;
26+
27+
import org.slf4j.Logger;
28+
import org.slf4j.LoggerFactory;
29+
30+
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS;
31+
32+
public final class DateTimeUtils {
33+
private static final Logger LOG = LoggerFactory.getLogger(DateTimeUtils.class);
34+
private static final String DATE_TIME_PATTERN = "E, dd MMM yyyy HH:mm:ss z";
35+
36+
public static long parseLastModifiedTime(final String lastModifiedTime) {
37+
long parsedTime = 0;
38+
try {
39+
Date utcDate = new SimpleDateFormat(DATE_TIME_PATTERN, Locale.US)
40+
.parse(lastModifiedTime);
41+
parsedTime = utcDate.getTime();
42+
} catch (ParseException e) {
43+
LOG.error("Failed to parse the date {}", lastModifiedTime);
44+
} finally {
45+
return parsedTime;
46+
}
47+
}
48+
49+
/**
50+
* Tries to identify if an operation was recently executed based on the LMT of
51+
* a file or folder. LMT needs to be more recent that the original request
52+
* start time. To include any clock skew with server, LMT within
53+
* DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS from the request start time is going
54+
* to be considered to qualify for recent operation.
55+
* @param lastModifiedTime File/Folder LMT
56+
* @param expectedLMTUpdateTime original request timestamp which should
57+
* have updated the LMT on target
58+
* @return true if the LMT is within timespan for recent operation, else false
59+
*/
60+
public static boolean isRecentlyModified(final String lastModifiedTime,
61+
final Instant expectedLMTUpdateTime) {
62+
long lmtEpochTime = DateTimeUtils.parseLastModifiedTime(lastModifiedTime);
63+
long currentEpochTime = expectedLMTUpdateTime.toEpochMilli();
64+
65+
return ((lmtEpochTime > currentEpochTime)
66+
|| ((currentEpochTime - lmtEpochTime) <= DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS));
67+
}
68+
69+
private DateTimeUtils() {
70+
}
71+
}

hadoop-tools/hadoop-azure/src/site/markdown/abfs.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,18 @@ Config `fs.azure.account.hns.enabled` provides an option to specify whether
740740
Config `fs.azure.enable.check.access` needs to be set true to enable
741741
the AzureBlobFileSystem.access().
742742

743+
### <a name="idempotency"></a> Operation Idempotency
744+
745+
Requests failing due to server timeouts and network failures will be retried.
746+
PUT/POST operations are idempotent and need no specific handling
747+
except for Rename and Delete operations.
748+
749+
Rename idempotency checks are made by ensuring the LastModifiedTime on destination
750+
is recent if source path is found to be non-existent on retry.
751+
752+
Delete is considered to be idempotent by default if the target does not exist on
753+
retry.
754+
743755
### <a name="featureconfigoptions"></a> Primary User Group Options
744756
The group name which is part of FileStatus and AclStatus will be set the same as
745757
the username if the following config is set to true

0 commit comments

Comments
 (0)