Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class AppendOp extends Operation {
* @return Path
*/
protected Path getAppendFile() {
Path fn = getFinder().getFile();
Path fn = getFinder().getFile("APPEND");
return fn;
}

Expand Down Expand Up @@ -100,7 +100,7 @@ List<OperationOutput> run(FileSystem fs) {
} catch (FileNotFoundException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.NOT_FOUND, 1L));
LOG.warn("Error with appending", e);
LOG.warn("AppendOp failed: File not found", e);
} catch (IOException | UnsupportedOperationException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.FAILURES, 1L));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,29 @@ boolean shouldExitOnFirstError(String primary) {
return Boolean.parseBoolean(val);
}

/**
* @return true|false for whether to use existing files only
*/
boolean shouldUseNewAlgorithm() {
return shouldUseNewAlgorithm(null);
}

/**
* @param primary
* primary the initial string to be used for the value of this
* @return true|false for whether to use existing files only from primary,config,default (in that order)
*/
boolean shouldUseNewAlgorithm(String primary) {
String val = primary;
if (val == null) {
val = config.get(ConfigOption.USE_NEW_ALGORITHM.getCfgOption());
}
if (val == null) {
val = ConfigOption.USE_NEW_ALGORITHM.getDefault().toString();
}
return Boolean.parseBoolean(val);
}

/**
* @return whether the mapper or reducer should wait for truncate recovery
*/
Expand Down Expand Up @@ -735,6 +758,7 @@ static void dumpOptions(ConfigExtractor cfg) {
LOG.info("Operation amount = " + cfg.getOpCount());
LOG.info("Total file limit = " + cfg.getTotalFiles());
LOG.info("Total dir file limit = " + cfg.getDirSize());
LOG.info("Use new algorithm = " + cfg.shouldUseNewAlgorithm());
{
String read = "Read size = ";
if (cfg.shouldReadFullFile()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ class ConfigOption<T> extends Option {
"exitOnError", false, "Exit on first error", SLIVE_PREFIX
+ ".exit.on.error", false);

static final ConfigOption<Boolean> USE_NEW_ALGORITHM = new ConfigOption<Boolean>(
"useNewAlgorithm", false, "Use new algorithm for slivetest read/delete etc. operations", SLIVE_PREFIX
+ ".use.new.algorithm", false);

static final ConfigOption<Integer> FILES = new ConfigOption<Integer>(
"files", true, "Max total number of files",
SLIVE_PREFIX + ".total.files", 10);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.hadoop.fs.slive;

import java.io.IOException;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import java.util.List;
import java.util.Random;

Expand Down Expand Up @@ -111,7 +112,7 @@ private int getBufferSize() {
* @return Path
*/
protected Path getCreateFile() {
Path fn = getFinder().getFile();
Path fn = getFinder().getFile("CREATE");
return fn;
}

Expand Down Expand Up @@ -164,10 +165,14 @@ List<OperationOutput> run(FileSystem fs) {
ReportWriter.BYTES_WRITTEN, bytesWritten));
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.SUCCESSES, 1L));
} catch (FileAlreadyExistsException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.FILE_ALREADY_EXISTS, 1L));
LOG.warn("CreateOp failed: File already exists", e);
} catch (IOException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.FAILURES, 1L));
LOG.warn("Error with creating", e);
LOG.warn("CreateOp failed: IO error creating file", e);
} finally {
if (os != null) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class DeleteOp extends Operation {
* Gets the file to delete
*/
protected Path getDeleteFile() {
Path fn = getFinder().getFile();
Path fn = getFinder().getFile("DELETE");
return fn;
}

Expand All @@ -58,6 +58,7 @@ List<OperationOutput> run(FileSystem fs) {
List<OperationOutput> out = super.run(fs);
try {
Path fn = getDeleteFile();
LOG.info("Deleting file: " + fn);
long timeTaken = 0;
boolean deleteStatus = false;
{
Expand All @@ -80,7 +81,7 @@ List<OperationOutput> run(FileSystem fs) {
} catch (FileNotFoundException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.NOT_FOUND, 1L));
LOG.warn("Error with deleting", e);
LOG.warn("DeleteOp failed: File not found", e);
} catch (IOException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.FAILURES, 1L));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class ListOp extends Operation {
* @return Path
*/
protected Path getDirectory() {
Path dir = getFinder().getDirectory();
Path dir = getFinder().getDirectory("LS");
return dir;
}

Expand Down Expand Up @@ -81,7 +81,7 @@ List<OperationOutput> run(FileSystem fs) {
} catch (FileNotFoundException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.NOT_FOUND, 1L));
LOG.warn("Error with listing", e);
LOG.warn("ListOp failed: File not found", e);
} catch (IOException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.FAILURES, 1L));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ List<OperationOutput> run(FileSystem fs) {
} catch (FileNotFoundException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.NOT_FOUND, 1L));
LOG.warn("Error with mkdir", e);
LOG.warn("MkdirOp failed: File not found", e);
} catch (IOException e) {
out.add(new OperationOutput(OutputType.LONG, getType(),
ReportWriter.FAILURES, 1L));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,26 @@

package org.apache.hadoop.fs.slive;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.UUID;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Class which generates a file or directory path using a simple random
* generation algorithm stated in http://issues.apache.org/jira/browse/HDFS-708
*/
class PathFinder {

private static final Logger LOG = LoggerFactory.getLogger(PathFinder.class);

private enum Type {
FILE, DIRECTORY
}
Expand All @@ -38,11 +48,96 @@ private enum Type {
private Path basePath;
private ConfigExtractor config;
private Random rnd;

// Used to store scanned existing paths
private List<Path> existingFiles;
private List<Path> existingDirs;

PathFinder(ConfigExtractor cfg, Random rnd) {
this.basePath = cfg.getDataPath();
this.config = cfg;
this.rnd = rnd;
this.existingFiles = new ArrayList<>();
this.existingDirs = new ArrayList<>();
}

/**
* Scan all paths under base_dir and record existing files and directories
*/
private void scanBaseDirectory() {
try {
FileSystem fs = basePath.getFileSystem(config.getConfig());
LOG.info("Starting to scan base_dir: " + basePath);
// Clear existing lists
clearExistingPaths();

// Recursively scan directories
scanDirectoryRecursively(fs, basePath);

// Print summary only (avoid huge log output)
LOG.info("Scan complete: found " + existingFiles.size() + " files, "
+ existingDirs.size() + " directories");

} catch (IOException e) {
LOG.error("Error scanning base_dir: " + e.getMessage(), e);
clearExistingPaths();
}
}

private void clearExistingPaths() {
existingFiles.clear();
existingDirs.clear();
}

/**
* Recursively scan directories
*/
private void scanDirectoryRecursively(FileSystem fs, Path dir) throws IOException {
if (!fs.exists(dir)) {
return;
}

FileStatus[] statuses = fs.listStatus(dir);
if (statuses == null || statuses.length == 0) {
return;
}

for (FileStatus status : statuses) {
Path path = status.getPath();
if (status.isFile()) {
existingFiles.add(path);
} else if (status.isDirectory()) {
existingDirs.add(path);
// Recursively scan subdirectories
scanDirectoryRecursively(fs, path);
}
}
}

/**
* Randomly select one from existing files
*/
private Path getExistingFile() {
if (existingFiles.isEmpty()) {
throw new RuntimeException("No files found in base_dir, cannot perform read/delete operations");
}
int index = rnd.nextInt(existingFiles.size());
Path selectedFile = existingFiles.get(index);
LOG.info("Selected from existing files: " + selectedFile);
return selectedFile;
}

/**
* Randomly select one from existing directories
*/
private Path getExistingDirectory() {
if (existingDirs.isEmpty()) {
throw new RuntimeException("No directories found in base_dir, cannot perform ls operations");
}
int index = rnd.nextInt(existingDirs.size());
Path selectedDir = existingDirs.get(index);
LOG.info("Selected from existing directories: " + selectedDir);
return selectedDir;
}

/**
Expand All @@ -62,6 +157,10 @@ private enum Type {
* @return Path
*/
private Path getPath(int curId, int limitPerDir, Type type) {
return getPath(curId, limitPerDir, type, null);
}

private Path getPath(int curId, int limitPerDir, Type type, String suffix) {
if (curId <= 0) {
return basePath;
}
Expand All @@ -74,6 +173,9 @@ private Path getPath(int curId, int limitPerDir, Type type) {
name = DIR_PREFIX + new Integer(curId % limitPerDir).toString();
break;
}
if (suffix != null) {
name += "_" + suffix;
}
Path base = getPath((curId / limitPerDir), limitPerDir, Type.DIRECTORY);
return new Path(base, name);
}
Expand All @@ -85,6 +187,57 @@ private Path getPath(int curId, int limitPerDir, Type type) {
* @return path
*/
Path getFile() {
return getFile(null);
}

/**
* Gets a file path based on operation type and configuration
*
* @param operationType the type of operation (can be null for backward compatibility)
* @return path
*/
Path getFile(String operationType) {
boolean useNewAlgorithm = config.shouldUseNewAlgorithm();

// Handle operations that need existing files
if (isExistingFileOperation(operationType)) {
if (useNewAlgorithm) {
LOG.info("Use new algorithm mode: scanning base_dir for " + operationType + " operation");
scanBaseDirectory();
Copy link

@NorthCedar NorthCedar Oct 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A full path scan for each getFile operation will slow down the test

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I also tried the following methods:
Short-term caching of directory scan results: for example, using the previous scan results for 5 seconds + error-driven refresh: if a file_not_found error is encountered three times in a row, a refresh is forced; however, this method still has a high failure rate.

Using the new algorithm, 5000 operations took approximately 16 minutes, with most operations successfully executed. Using the original algorithm, 5000 operations took 3 minutes, with approximately 50% of the operations failing. This seems acceptable if accuracy is the only concern, not performance.

Do you have any suggestions for avoiding this problem?

return getExistingFile();
}
// Fall through to original algorithm for normal mode
}

// Handle CREATE operation
if ("CREATE".equals(operationType)) {
if (useNewAlgorithm) {
LOG.info("Generating unique path for CREATE operation");
return generateUniquePath();
}
// Fall through to original algorithm for normal mode
}

// Use original algorithm for all other cases
LOG.info("Using original algorithm for " + (operationType != null ? operationType : "default") + " operation");
return generateOriginalPath();
}

private boolean isExistingFileOperation(String operationType) {
return "READ".equals(operationType) || "DELETE".equals(operationType) ||
"TRUNCATE".equals(operationType) || "APPEND".equals(operationType) ||
"RENAME_SRC".equals(operationType);
}

private Path generateUniquePath() {
int fileLimit = config.getTotalFiles();
int dirLimit = config.getDirSize();
int startPoint = 1 + rnd.nextInt(fileLimit);
String uniqueId = UUID.randomUUID().toString().replace("-", "").substring(0, 10);
return getPath(startPoint, dirLimit, Type.FILE, uniqueId);
}

private Path generateOriginalPath() {
int fileLimit = config.getTotalFiles();
int dirLimit = config.getDirSize();
int startPoint = 1 + rnd.nextInt(fileLimit);
Expand All @@ -98,6 +251,30 @@ Path getFile() {
* @return path
*/
Path getDirectory() {
return getDirectory(null);
}

/**
* Gets a directory path based on operation type
* For CREATE/MKDIR operations: use original algorithm (write to base_dir)
* For LS operations: scan base_dir and select from existing directories
*
* @param operationType the type of operation (can be null for backward compatibility)
* @return path
*/
Path getDirectory(String operationType) {
boolean useNewAlgorithm = config.shouldUseNewAlgorithm();
// For LS operation, scan base_dir and select existing directories each time
if ("LS".equals(operationType)) {
if (useNewAlgorithm) {
LOG.info("Starting to scan base_dir and select existing directories for LS operation");
scanBaseDirectory();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

return getExistingDirectory();
}
// Fall through to original algorithm for normal mode
}

// Use original algorithm by default
int fileLimit = config.getTotalFiles();
int dirLimit = config.getDirSize();
int startPoint = rnd.nextInt(fileLimit);
Expand Down
Loading