Skip to content

Commit cfe8d59

Browse files
committed
HADOOP-17227. Marker Tool tuning
* move from -expect to -min and -max; easier for CLI testing. Plus works * in -nonauth mode, even when policy == keep, files not in an auth path count as failure. * bucket-info option also prints out the authoritative path, so you have more idea what is happening * reporting of command failure more informative The reason for change #2 is a workflow where you want to audit a dir, even though you are in keep mode, and you don't have any auth path. You'd expect -nonauth to say "no auth path", but instead it treats the whole dir as auth. Change-Id: Ib310e321e5862957fbd92bebfade93231f92b16f
1 parent 5e12dc5 commit cfe8d59

File tree

6 files changed

+175
-63
lines changed

6 files changed

+175
-63
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1463,9 +1463,12 @@ public int run(String[] args, PrintStream out)
14631463
private void processMarkerOption(final PrintStream out,
14641464
final S3AFileSystem fs,
14651465
final String marker) {
1466+
println(out, "%nSecurity");
14661467
DirectoryPolicy markerPolicy = fs.getDirectoryMarkerPolicy();
14671468
String desc = markerPolicy.describe();
1468-
println(out, "%nThe directory marker policy is \"%s\"%n", desc);
1469+
println(out, "\tThe directory marker policy is \"%s\"%n", desc);
1470+
printOption(out, "\tAuthoritative paths",
1471+
AUTHORITATIVE_PATH, "");
14691472

14701473
DirectoryPolicy.MarkerPolicy mp = markerPolicy.getMarkerPolicy();
14711474

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java

Lines changed: 155 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import org.apache.hadoop.fs.s3a.UnknownStoreException;
5454
import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker;
5555
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy;
56+
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl;
5657
import org.apache.hadoop.fs.s3a.impl.StoreContext;
5758
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
5859
import org.apache.hadoop.fs.shell.CommandFormat;
@@ -113,9 +114,14 @@ public final class MarkerTool extends S3GuardTool {
113114
public static final String CLEAN = "-" + OPT_CLEAN;
114115

115116
/**
116-
* Expected number of markers to find: {@value}.
117+
* Min number of markers to find: {@value}.
117118
*/
118-
public static final String OPT_EXPECTED = "expected";
119+
public static final String OPT_MIN = "min";
120+
121+
/**
122+
* Max number of markers to find: {@value}.
123+
*/
124+
public static final String OPT_MAX = "max";
119125

120126
/**
121127
* Name of a file to save the list of markers to: {@value}.
@@ -151,13 +157,21 @@ public final class MarkerTool extends S3GuardTool {
151157
public static final int UNLIMITED_LISTING = 0;
152158

153159

160+
/**
161+
* Constant to use when there is no minimum number of
162+
* markers: {@value}.
163+
*/
164+
public static final int UNLIMITED_MIN_MARKERS = -1;
165+
166+
154167
/**
155168
* Usage string: {@value}.
156169
*/
157170
private static final String USAGE = MARKERS
158171
+ " (-" + OPT_AUDIT
159172
+ " | -" + OPT_CLEAN + ")"
160-
+ " [-" + OPT_EXPECTED + " <count>]"
173+
+ " [-" + OPT_MIN + " <count>]"
174+
+ " [-" + OPT_MAX + " <count>]"
161175
+ " [-" + OPT_OUT + " <filename>]"
162176
+ " [-" + OPT_LIMIT + " <limit>]"
163177
+ " [-" + OPT_NONAUTH + "]"
@@ -195,7 +209,8 @@ public MarkerTool(final Configuration conf) {
195209
VERBOSE,
196210
OPT_NONAUTH);
197211
CommandFormat format = getCommandFormat();
198-
format.addOptionWithValue(OPT_EXPECTED);
212+
format.addOptionWithValue(OPT_MIN);
213+
format.addOptionWithValue(OPT_MAX);
199214
format.addOptionWithValue(OPT_LIMIT);
200215
format.addOptionWithValue(OPT_OUT);
201216
}
@@ -231,8 +246,7 @@ public int run(final String[] args, final PrintStream stream)
231246
if (parsedArgs.size() != 1) {
232247
errorln(getUsage());
233248
println(out, "Supplied arguments: ["
234-
+ parsedArgs.stream()
235-
.collect(Collectors.joining(", "))
249+
+ String.join(", ", parsedArgs)
236250
+ "]");
237251
throw new ExitUtil.ExitException(EXIT_USAGE,
238252
String.format(E_ARGUMENTS, parsedArgs.size()));
@@ -241,12 +255,11 @@ public int run(final String[] args, final PrintStream stream)
241255
CommandFormat command = getCommandFormat();
242256
verbose = command.getOpt(VERBOSE);
243257

244-
// How many markers are expected?
245-
int expected = 0;
246-
String value = command.getOptValue(OPT_EXPECTED);
247-
if (value != null && !value.isEmpty()) {
248-
expected = Integer.parseInt(value);
249-
}
258+
// minimum number of markers expected
259+
int expectedMin = getOptValue(OPT_MIN, 0);
260+
// max number of markers allowed
261+
int expectedMax = getOptValue(OPT_MAX, 0);
262+
250263

251264
// determine the action
252265
boolean audit = command.getOpt(OPT_AUDIT);
@@ -258,11 +271,7 @@ public int run(final String[] args, final PrintStream stream)
258271
throw new ExitUtil.ExitException(EXIT_USAGE,
259272
"Exactly one of " + AUDIT + " and " + CLEAN);
260273
}
261-
int limit = UNLIMITED_LISTING;
262-
value = command.getOptValue(OPT_LIMIT);
263-
if (value != null && !value.isEmpty()) {
264-
limit = Integer.parseInt(value);
265-
}
274+
int limit = getOptValue(OPT_LIMIT, UNLIMITED_LISTING);
266275
final String dir = parsedArgs.get(0);
267276
Path path = new Path(dir);
268277
URI uri = path.toUri();
@@ -275,7 +284,8 @@ public int run(final String[] args, final PrintStream stream)
275284
fs,
276285
path,
277286
clean,
278-
expected,
287+
expectedMin,
288+
expectedMax,
279289
limit,
280290
command.getOpt(OPT_NONAUTH));
281291
if (verbose) {
@@ -300,16 +310,40 @@ public int run(final String[] args, final PrintStream stream)
300310
IOUtils.writeLines(surplus, "\n", writer);
301311
}
302312
}
303-
return result.exitCode;
313+
314+
return result.finish();
315+
}
316+
317+
/**
318+
* Get the value of an option, or the default if the option
319+
* is unset/empty.
320+
* @param option option key
321+
* @param defVal default
322+
* @return the value to use
323+
*/
324+
private int getOptValue(String option, int defVal) {
325+
CommandFormat command = getCommandFormat();
326+
String value = command.getOptValue(option);
327+
if (value != null && !value.isEmpty()) {
328+
try {
329+
return Integer.parseInt(value);
330+
} catch (NumberFormatException e) {
331+
throw new ExitUtil.ExitException(EXIT_USAGE,
332+
String.format("Argument for %s is not a number: %s", option, value));
333+
}
334+
} else {
335+
return defVal;
336+
}
304337
}
305338

306339
/**
307340
* Execute the scan/purge.
308341
* @param sourceFS source FS; must be or wrap an S3A FS.
309342
* @param path path to scan.
310343
* @param doPurge purge?
311-
* @param expectedMarkerCount expected marker count
312-
* @param limit limit of files to scan; -1 for 'unlimited'
344+
* @param minMarkerCount min marker count (ignored on purge)
345+
* @param maxMarkerCount max marker count (ignored on purge)
346+
* @param limit limit of files to scan; 0 for 'unlimited'
313347
* @param nonAuth consider only markers in nonauth paths as errors
314348
* @return scan+purge result.
315349
* @throws IOException failure
@@ -319,7 +353,8 @@ ScanResult execute(
319353
final FileSystem sourceFS,
320354
final Path path,
321355
final boolean doPurge,
322-
final int expectedMarkerCount,
356+
final int minMarkerCount,
357+
final int maxMarkerCount,
323358
final int limit,
324359
final boolean nonAuth)
325360
throws IOException {
@@ -360,10 +395,19 @@ ScanResult execute(
360395
}
361396

362397
// the default filter policy is that all entries should be deleted
363-
DirectoryPolicy filterPolicy = nonAuth
364-
? activePolicy
365-
: null;
366-
ScanResult result = scan(target, doPurge, expectedMarkerCount, limit,
398+
DirectoryPolicy filterPolicy;
399+
if (nonAuth) {
400+
filterPolicy = new DirectoryPolicyImpl(
401+
DirectoryPolicy.MarkerPolicy.Authoritative,
402+
fs::allowAuthoritative);
403+
} else {
404+
filterPolicy = null;
405+
}
406+
ScanResult result = scan(target,
407+
doPurge,
408+
maxMarkerCount,
409+
minMarkerCount,
410+
limit,
367411
filterPolicy);
368412
return result;
369413
}
@@ -378,6 +422,22 @@ public static final class ScanResult {
378422
*/
379423
private int exitCode;
380424

425+
/**
426+
* Text to include if raising an exception.
427+
*/
428+
private String exitText = "";
429+
430+
/**
431+
* Count of all markers found.
432+
*/
433+
private int totalMarkerCount;
434+
435+
/**
436+
* Count of all markers found after excluding
437+
* any from a [-nonauth] qualification;
438+
*/
439+
private int filteredMarkerCount;
440+
381441
/**
382442
* The tracker.
383443
*/
@@ -395,6 +455,9 @@ private ScanResult() {
395455
public String toString() {
396456
return "ScanResult{" +
397457
"exitCode=" + exitCode +
458+
", exitText=" + exitText +
459+
", totalMarkerCount=" + totalMarkerCount +
460+
", filteredMarkerCount=" + filteredMarkerCount +
398461
", tracker=" + tracker +
399462
", purgeSummary=" + purgeSummary +
400463
'}';
@@ -414,13 +477,34 @@ public DirMarkerTracker getTracker() {
414477
public MarkerPurgeSummary getPurgeSummary() {
415478
return purgeSummary;
416479
}
480+
481+
public int getTotalMarkerCount() {
482+
return totalMarkerCount;
483+
}
484+
485+
public int getFilteredMarkerCount() {
486+
return filteredMarkerCount;
487+
}
488+
489+
/**
490+
* Throw an exception if the exit code is non-zero.
491+
* @return 0 if everything is good.
492+
* @throws ExitUtil.ExitException if code != 0
493+
*/
494+
public int finish() throws ExitUtil.ExitException {
495+
if (exitCode != 0) {
496+
throw new ExitUtil.ExitException(exitCode, exitText);
497+
}
498+
return 0;
499+
}
417500
}
418501

419502
/**
420503
* Do the scan/purge.
421504
* @param path path to scan.
422-
* @param clean purge?
423-
* @param expectedMarkerCount expected marker count
505+
* @param doPurge purge rather than just scan/audit?
506+
* @param minMarkerCount min marker count (ignored on purge)
507+
* @param maxMarkerCount max marker count (ignored on purge)
424508
* @param limit limit of files to scan; 0 for 'unlimited'
425509
* @param filterPolicy filter policy on a nonauth scan; may be null
426510
* @return result.
@@ -430,8 +514,9 @@ public MarkerPurgeSummary getPurgeSummary() {
430514
@Retries.RetryTranslated
431515
private ScanResult scan(
432516
final Path path,
433-
final boolean clean,
434-
final int expectedMarkerCount,
517+
final boolean doPurge,
518+
final int minMarkerCount,
519+
final int maxMarkerCount,
435520
final int limit,
436521
final DirectoryPolicy filterPolicy)
437522
throws IOException, ExitUtil.ExitException {
@@ -458,13 +543,16 @@ private ScanResult scan(
458543
= tracker.getSurplusMarkers();
459544
Map<Path, DirMarkerTracker.Marker> leafMarkers
460545
= tracker.getLeafMarkers();
461-
int surplus = surplusMarkers.size();
462-
if (surplus == 0) {
546+
// determine marker count
547+
int markerCount = surplusMarkers.size();
548+
result.totalMarkerCount = markerCount;
549+
result.filteredMarkerCount = markerCount;
550+
if (markerCount == 0) {
463551
println(out, "No surplus directory markers were found under %s", path);
464552
} else {
465553
println(out, "Found %d surplus directory marker%s under %s",
466-
surplus,
467-
suffix(surplus),
554+
markerCount,
555+
suffix(markerCount),
468556
path);
469557

470558
for (Path markers : surplusMarkers.keySet()) {
@@ -482,9 +570,9 @@ private ScanResult scan(
482570
println(out, "These are required to indicate empty directories");
483571
}
484572

485-
if (clean) {
573+
if (doPurge) {
486574
// clean: remove the markers, do not worry about their
487-
// presence when reporting success/failiure
575+
// presence when reporting success/failure
488576
int deletePageSize = storeContext.getConfiguration()
489577
.getInt(BULK_DELETE_PAGE_SIZE,
490578
BULK_DELETE_PAGE_SIZE_DEFAULT);
@@ -503,28 +591,41 @@ private ScanResult scan(
503591
allowed.forEach(p -> println(out, p.toString()));
504592
}
505593
// recalculate the marker size
506-
surplus = surplusMarkers.size();
594+
markerCount = surplusMarkers.size();
595+
result.filteredMarkerCount = markerCount;
507596
}
508-
if (surplus > expectedMarkerCount) {
597+
if (markerCount < minMarkerCount || markerCount > maxMarkerCount) {
509598
// failure
510-
if (expectedMarkerCount > 0) {
511-
println(out, "Expected %d marker%s", expectedMarkerCount,
512-
suffix(surplus));
513-
}
514-
println(out, "Surplus markers were found -failing audit");
515-
516-
result.exitCode = EXIT_NOT_ACCEPTABLE;
599+
return failScan(result, EXIT_NOT_ACCEPTABLE,
600+
"Marker count %d out of range "
601+
+ "[%d - %d]",
602+
markerCount, minMarkerCount, maxMarkerCount);
517603
}
518604
}
519605

520606
// now one little check for whether a limit was reached.
521607
if (!completed) {
522-
println(out, "Listing limit reached before completing the scan");
523-
result.exitCode = EXIT_INTERRUPTED;
608+
failScan(result, EXIT_INTERRUPTED,
609+
"Listing limit (%d) reached before completing the scan", limit);
524610
}
525611
return result;
526612
}
527613

614+
/**
615+
* Fail the scan; print the formatted error and update the result.
616+
* @param result result to update
617+
* @param code Exit code
618+
* @param message Error message
619+
* @param args arguments for the error message
620+
* @return scan result
621+
*/
622+
private ScanResult failScan(ScanResult result, int code, String message, Object...args) {
623+
String text = String.format(message, args);
624+
result.exitCode = code;
625+
result.exitText = text;
626+
return result;
627+
}
628+
528629
/**
529630
* Suffix for plurals.
530631
* @param size size to generate a suffix for
@@ -702,7 +803,8 @@ public void setVerbose(final boolean verbose) {
702803
* @param sourceFS filesystem to use
703804
* @param path path to scan
704805
* @param doPurge should markers be purged
705-
* @param expectedMarkers number of markers expected
806+
* @param minMarkerCount min marker count (ignored on purge)
807+
* @param maxMarkerCount max marker count (ignored on purge)
706808
* @param limit limit of files to scan; -1 for 'unlimited'
707809
* @param nonAuth only use nonauth path count for failure rules
708810
* @return the result
@@ -712,12 +814,14 @@ public static MarkerTool.ScanResult execMarkerTool(
712814
final FileSystem sourceFS,
713815
final Path path,
714816
final boolean doPurge,
715-
final int expectedMarkers,
716-
final int limit, boolean nonAuth) throws IOException {
817+
final int minMarkerCount,
818+
final int maxMarkerCount,
819+
final int limit,
820+
boolean nonAuth) throws IOException {
717821
MarkerTool tool = new MarkerTool(sourceFS.getConf());
718822
tool.setVerbose(LOG.isDebugEnabled());
719823

720824
return tool.execute(sourceFS, path, doPurge,
721-
expectedMarkers, limit, nonAuth);
825+
minMarkerCount, maxMarkerCount, limit, nonAuth);
722826
}
723827
}

0 commit comments

Comments
 (0)