Skip to content

Commit 49df838

Browse files
committed
HADOOP-16697. Tune/audit S3A authoritative mode.
Contains: HADOOP-16474. S3Guard ProgressiveRenameTracker to mark destination dirirectory as authoritative on success. HADOOP-16684. S3guard bucket info to list a bit more about authoritative paths. HADOOP-16722. S3GuardTool to support FilterFileSystem. This patch improves the marking of newly created/import directory trees in S3Guard DynamoDB tables as authoritative. Specific changes: * Renamed directories are marked as authoritative if the entire operation succeeded (HADOOP-16474). * When updating parent table entries as part of any table write, there's no overwriting of their authoritative flag. s3guard import changes: * new -verbose flag to print out what is going on. * The "s3guard import" command lets you declare that a directory tree is to be marked as authoritative hadoop s3guard import -authoritative -verbose s3a://bucket/path When importing a listing and a file is found, the import tool queries the metastore and only updates the entry if the file is different from before, where different == new timestamp, etag, or length. S3Guard can get timestamp differences due to clock skew in PUT operations. As the recursive list performed by the import command doesn't retrieve the versionID, the existing entry may in fact be more complete. When updating an existing due to clock skew the existing version ID is propagated to the new entry (note: the etags must match; this is needed to deal with inconsistent listings). There is a new s3guard command to audit a s3guard bucket/path's authoritative state: hadoop s3guard authoritative -check-config s3a://bucket/path This is primarily for testing/auditing. The s3guard bucket-info command also provides some more details on the authoritative state of a store (HADOOP-16684). Change-Id: I58001341c04f6f3597fcb4fcb1581ccefeb77d91
1 parent 9da294a commit 49df838

32 files changed

+2586
-364
lines changed

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,10 @@ public void testRenameWithNonEmptySubDir() throws Throwable {
204204
assertPathExists("not created in src/sub dir",
205205
new Path(srcSubDir, "subfile.txt"));
206206

207-
fs.rename(srcDir, finalDir);
207+
boolean rename = fs.rename(srcDir, finalDir);
208+
assertTrue("rename(" + srcDir + ", " + finalDir + ") failed",
209+
rename);
210+
208211
// Accept both POSIX rename behavior and CLI rename behavior
209212
if (renameRemoveEmptyDest) {
210213
// POSIX rename behavior

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,15 @@ public static void nameTestThread() {
8282
Thread.currentThread().setName("JUnit");
8383
}
8484

85+
@Before
86+
public void nameThread() {
87+
Thread.currentThread().setName("JUnit-" + getMethodName());
88+
}
89+
90+
protected String getMethodName() {
91+
return methodName.getMethodName();
92+
}
93+
8594
/**
8695
* This must be implemented by all instantiated test cases.
8796
* -provide the FS contract
@@ -172,6 +181,7 @@ protected int getTestTimeoutMillis() {
172181
*/
173182
@Before
174183
public void setup() throws Exception {
184+
Thread.currentThread().setName("setup");
175185
LOG.debug("== Setup ==");
176186
contract = createContract(createConfiguration());
177187
contract.init();
@@ -200,6 +210,7 @@ public void setup() throws Exception {
200210
*/
201211
@After
202212
public void teardown() throws Exception {
213+
Thread.currentThread().setName("teardown");
203214
LOG.debug("== Teardown ==");
204215
deleteTestDirInTeardown();
205216
LOG.debug("== Teardown complete ==");

hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,10 @@
6868
<Method name="openFileWithOptions"/>
6969
<Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE"/>
7070
</Match>
71+
<Match>
72+
<Class name="org.apache.hadoop.fs.s3a.s3guard.S3GuardTool$BucketInfo"/>
73+
<Method name="run"/>
74+
<Bug pattern="SF_SWITCH_FALLTHROUGH"/>
75+
</Match>
7176

7277
</FindBugsFilter>

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,14 @@ public String getVersionId() {
178178
return versionId;
179179
}
180180

181+
/**
182+
* set the S3 object versionId, else null.
183+
* @param versionId version ID or null.
184+
*/
185+
public void setVersionId(final String versionId) {
186+
this.versionId = versionId;
187+
}
188+
181189
/** Compare if this object is equal to another object.
182190
* @param o the object to be compared.
183191
* @return true if two file status has the same path name; false if not.

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,8 @@ public boolean rename(Path src, Path dst) throws IOException {
12451245
} catch (AmazonClientException e) {
12461246
throw translateException("rename(" + src +", " + dst + ")", src, e);
12471247
} catch (RenameFailedException e) {
1248-
LOG.debug(e.getMessage());
1248+
LOG.info("{}", e.getMessage());
1249+
LOG.debug("rename failure", e);
12491250
return e.getExitCode();
12501251
} catch (FileNotFoundException e) {
12511252
LOG.debug(e.toString());
@@ -2477,7 +2478,7 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException,
24772478
* @param path path
24782479
* @return true if the path is auth
24792480
*/
2480-
protected boolean allowAuthoritative(final Path path) {
2481+
public boolean allowAuthoritative(final Path path) {
24812482
return S3Guard.allowAuthoritative(path, this,
24822483
allowAuthoritativeMetadataStore, allowAuthoritativePaths);
24832484
}
@@ -2720,7 +2721,7 @@ S3AFileStatus innerGetFileStatus(final Path f,
27202721
+ " s3modtime={}; msModTime={} updating metastore",
27212722
path, s3ModTime, msModTime);
27222723
return S3Guard.putAndReturn(metadataStore, s3AFileStatus,
2723-
instrumentation, ttlTimeProvider);
2724+
ttlTimeProvider);
27242725
}
27252726
}
27262727
}
@@ -2755,13 +2756,12 @@ S3AFileStatus innerGetFileStatus(final Path f,
27552756
}
27562757
// entry was found, save in S3Guard
27572758
return S3Guard.putAndReturn(metadataStore, s3FileStatus,
2758-
instrumentation, ttlTimeProvider);
2759+
ttlTimeProvider);
27592760
} else {
27602761
// there was no entry in S3Guard
27612762
// retrieve the data and update the metadata store in the process.
27622763
return S3Guard.putAndReturn(metadataStore,
27632764
s3GetFileStatus(path, key, probes, tombstones),
2764-
instrumentation,
27652765
ttlTimeProvider);
27662766
}
27672767
}
@@ -3177,12 +3177,12 @@ protected synchronized void stopAllServices() {
31773177
HadoopExecutors.shutdown(unboundedThreadPool, LOG,
31783178
THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS);
31793179
unboundedThreadPool = null;
3180-
closeAutocloseables(LOG, credentials);
31813180
cleanupWithLogger(LOG,
31823181
metadataStore,
31833182
instrumentation,
31843183
delegationTokens.orElse(null),
31853184
signerManager);
3185+
closeAutocloseables(LOG, credentials);
31863186
delegationTokens = Optional.empty();
31873187
signerManager = null;
31883188
credentials = null;
@@ -3529,13 +3529,21 @@ void finishedWrite(String key, long length, String eTag, String versionId,
35293529
activeState = stateToClose;
35303530
}
35313531
S3Guard.addAncestors(metadataStore, p, ttlTimeProvider, activeState);
3532+
final boolean isDir = objectRepresentsDirectory(key, length);
35323533
S3AFileStatus status = createUploadFileStatus(p,
3533-
S3AUtils.objectRepresentsDirectory(key, length), length,
3534+
isDir, length,
35343535
getDefaultBlockSize(p), username, eTag, versionId);
3535-
S3Guard.putAndReturn(metadataStore, status,
3536-
instrumentation,
3537-
ttlTimeProvider,
3538-
activeState);
3536+
if (!isDir) {
3537+
S3Guard.putAndReturn(metadataStore, status,
3538+
ttlTimeProvider,
3539+
activeState);
3540+
} else {
3541+
// this is a directory marker so put it as such.
3542+
status.setIsEmptyDirectory(Tristate.TRUE);
3543+
S3Guard.putAuthDirectoryMarker(metadataStore, status,
3544+
ttlTimeProvider,
3545+
activeState);
3546+
}
35393547
}
35403548
} catch (IOException e) {
35413549
if (failOnMetadataWriteError) {

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.hadoop.classification.InterfaceAudience;
2626
import org.apache.hadoop.classification.InterfaceStability;
2727
import org.apache.hadoop.fs.FileSystem.Statistics;
28+
import org.apache.hadoop.fs.s3a.s3guard.MetastoreInstrumentation;
2829
import org.apache.hadoop.metrics2.AbstractMetric;
2930
import org.apache.hadoop.metrics2.MetricStringBuilder;
3031
import org.apache.hadoop.metrics2.MetricsCollector;
@@ -188,6 +189,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource {
188189
S3GUARD_METADATASTORE_RECORD_WRITES,
189190
S3GUARD_METADATASTORE_RETRY,
190191
S3GUARD_METADATASTORE_THROTTLED,
192+
S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED,
191193
STORE_IO_THROTTLED,
192194
DELEGATION_TOKENS_ISSUED,
193195
FILES_DELETE_REJECTED
@@ -562,11 +564,11 @@ public InputStreamStatistics newInputStreamStatistics() {
562564
}
563565

564566
/**
565-
* Create a S3Guard instrumentation instance.
567+
* Create a MetastoreInstrumentation instrumentation instance.
566568
* There's likely to be at most one instance of this per FS instance.
567569
* @return the S3Guard instrumentation point.
568570
*/
569-
public S3GuardInstrumentation getS3GuardInstrumentation() {
571+
public MetastoreInstrumentation getS3GuardInstrumentation() {
570572
return s3GuardInstrumentation;
571573
}
572574

@@ -1127,43 +1129,35 @@ public String toString() {
11271129
/**
11281130
* Instrumentation exported to S3Guard.
11291131
*/
1130-
public final class S3GuardInstrumentation {
1132+
private final class S3GuardInstrumentation
1133+
implements MetastoreInstrumentation {
11311134

1132-
/** Initialized event. */
1135+
@Override
11331136
public void initialized() {
11341137
incrementCounter(S3GUARD_METADATASTORE_INITIALIZATION, 1);
11351138
}
11361139

1140+
@Override
11371141
public void storeClosed() {
11381142

11391143
}
11401144

1141-
/**
1142-
* Throttled request.
1143-
*/
1145+
@Override
11441146
public void throttled() {
11451147
// counters are incremented by owner.
11461148
}
11471149

1148-
/**
1149-
* S3Guard is retrying after a (retryable) failure.
1150-
*/
1150+
@Override
11511151
public void retrying() {
11521152
// counters are incremented by owner.
11531153
}
11541154

1155-
/**
1156-
* Records have been read.
1157-
* @param count the number of records read
1158-
*/
1155+
@Override
11591156
public void recordsDeleted(int count) {
11601157
incrementCounter(S3GUARD_METADATASTORE_RECORD_DELETES, count);
11611158
}
11621159

1163-
/**
1164-
* Records have been read.
1165-
* @param count the number of records read
1166-
*/
1160+
@Override
11671161
public void recordsRead(int count) {
11681162
incrementCounter(S3GUARD_METADATASTORE_RECORD_READS, count);
11691163
}
@@ -1172,10 +1166,25 @@ public void recordsRead(int count) {
11721166
* records have been written (including deleted).
11731167
* @param count number of records written.
11741168
*/
1169+
@Override
11751170
public void recordsWritten(int count) {
11761171
incrementCounter(S3GUARD_METADATASTORE_RECORD_WRITES, count);
11771172
}
11781173

1174+
@Override
1175+
public void directoryMarkedAuthoritative() {
1176+
incrementCounter(S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED,
1177+
1);
1178+
}
1179+
1180+
@Override
1181+
public void entryAdded(final long durationNanos) {
1182+
addValueToQuantiles(
1183+
S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
1184+
durationNanos);
1185+
incrementCounter(S3GUARD_METADATASTORE_PUT_PATH_REQUEST, 1);
1186+
}
1187+
11791188
}
11801189

11811190
/**

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,9 @@ public enum Statistic {
225225
S3GUARD_METADATASTORE_THROTTLE_RATE(
226226
"s3guard_metadatastore_throttle_rate",
227227
"S3Guard metadata store throttle rate"),
228+
S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED(
229+
"s3guard_metadatastore_authoritative_directories_updated",
230+
"S3Guard metadata store authoritative directories updated from S3"),
228231

229232
STORE_IO_THROTTLED("store_io_throttled", "Requests throttled and retried"),
230233

0 commit comments

Comments
 (0)