Skip to content

Commit

Permalink
Optimize deleteTreesBelow.
Browse files Browse the repository at this point in the history
Make deleteTreesBelow faster by assuming that the directories to be deleted
are readable and writable.  We create most of the trees we delete via this
function anyway, so we know that they are accessible for the most part.

The previous code was blindly resetting read/write/execute permissions for
each traversed directory, and was doing so individually, which means we
issued 3 extra syscalls per directory.

And on Unix file systems, go even further by taking advantage of the fact
that readdir returns the type of each entry: there is no need to issue a
separate stat for each entry to determine if it is a subdirectory or not.
Do this from our JNI code because there really is no reason to pay the
cost of going in an out of Java for each file: we are traversing very
large directory trees, so every bit helps.

A fully-local build of a large iOS app on a Mac Pro 2013 shows that this
reduces build times from about 7300s to 5100s.  A build of a similar app
on a MacBook Pro 2015 shows a reduction from 7500s to 5400s.  The impact
on these builds using dynamic execution is much smaller, and there is
no observable improvement in smaller builds.

Addresses #7527.

RELNOTES: None.
PiperOrigin-RevId: 239594433
  • Loading branch information
jmmv authored and copybara-github committed Mar 21, 2019
1 parent 074c9c4 commit fac322b
Show file tree
Hide file tree
Showing 4 changed files with 274 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -453,18 +453,12 @@ public static HashCode md5sum(String path) throws IOException {
}

/**
* Removes entire directory tree. Doesn't follow symlinks.
* Deletes all directory trees recursively beneath the given path, which is expected to be a
* directory. Does not remove the top directory.
*
* @param path the file or directory to remove.
* @throws IOException if the remove failed.
* @param dir the directory hierarchy to remove
* @throws IOException if the hierarchy cannot be removed successfully or if the given path is not
* a directory
*/
public static void rmTree(String path) throws IOException {
if (isDirectory(path)) {
String[] contents = readdir(path);
for (String entry : contents) {
rmTree(path + "/" + entry);
}
}
remove(path.toString());
}
public static native void deleteTreesBelow(String dir) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.google.devtools.build.lib.vfs.FileStatus;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Symlinks;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -419,4 +420,16 @@ protected void createFSDependentHardLink(Path linkPath, Path originalPath)
throws IOException {
NativePosixFiles.link(originalPath.toString(), linkPath.toString());
}

@Override
public void deleteTreesBelow(Path dir) throws IOException {
if (dir.isDirectory(Symlinks.NOFOLLOW)) {
long startTime = Profiler.nanoTimeMaybe();
try {
NativePosixFiles.deleteTreesBelow(dir.toString());
} finally {
profiler.logSimpleTask(startTime, ProfilerTask.VFS_DELETE, dir.toString());
}
}
}
}
43 changes: 37 additions & 6 deletions src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.io.OutputStream;
import java.nio.file.FileAlreadyExistsException;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

/**
Expand Down Expand Up @@ -214,17 +215,47 @@ public void deleteTree(Path path) throws IOException {
* Deletes all directory trees recursively beneath the given path. Does nothing if the given path
* is not a directory.
*
* <p>This generic implementation is not as efficient as it could be: for example, we issue
* separate stats for each directory entry to determine if they are directories or not (instead of
* reusing the information that readdir returns), and we issue separate operations to toggle
* different permissions while they could be done at once via chmod. Subclasses can optimize this
* by taking advantage of platform-specific features.
*
* @param dir the directory hierarchy to remove
* @throws IOException if the hierarchy cannot be removed successfully
*/
public void deleteTreesBelow(Path dir) throws IOException {
if (dir.isDirectory(Symlinks.NOFOLLOW)) {
dir.setReadable(true);
dir.setWritable(true);
dir.setExecutable(true);
for (Path child : dir.getDirectoryEntries()) {
deleteTreesBelow(child);
child.delete();
Collection<Path> entries;
try {
entries = dir.getDirectoryEntries();
} catch (IOException e) {
// If we couldn't read the directory, it may be because it's not readable. Try granting this
// permission and retry. If the retry fails, give up.
dir.setReadable(true);
dir.setExecutable(true);
entries = dir.getDirectoryEntries();
}

Iterator<Path> iterator = entries.iterator();
if (iterator.hasNext()) {
Path first = iterator.next();
deleteTreesBelow(first);
try {
first.delete();
} catch (IOException e) {
// If we couldn't delete the first entry in a directory, it may be because the directory
// (not the entry!) is not writable. Try granting this permission and retry. If the retry
// fails, give up.
dir.setWritable(true);
first.delete();
}
}
while (iterator.hasNext()) {
Path path = iterator.next();
deleteTreesBelow(path);
// No need to retry here: if needed, we already unprotected the directory earlier.
path.delete();
}
}
}
Expand Down
218 changes: 218 additions & 0 deletions src/main/native/unix_jni.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "src/main/native/unix_jni.h"

#include <assert.h>
#include <jni.h>
#include <dirent.h>
#include <errno.h>
Expand All @@ -36,6 +37,12 @@
#include "src/main/cpp/util/md5.h"
#include "src/main/cpp/util/port.h"

#if defined(O_DIRECTORY)
#define PORTABLE_O_DIRECTORY O_DIRECTORY
#else
#define PORTABLE_O_DIRECTORY 0
#endif

using blaze_util::Md5Digest;

////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -863,6 +870,217 @@ Java_com_google_devtools_build_lib_unix_NativePosixFiles_mkfifo(JNIEnv *env,
ReleaseStringLatin1Chars(path_chars);
}

// Posts an exception generated by the DeleteTreesBelow algorithm and its helper
// functions.
//
// This is just a convenience wrapper over PostSystemException to format the
// path that caused an error only when necessary, as we keep that path tokenized
// throughout the deletion process.
//
// env is the JNI environment in which to post the exception. error and function
// capture the errno value and the name of the system function that triggered
// it. The faulty path is specified by all the components of dir_path and the
// optional entry subcomponent, which may be NULL.
static void PostDeleteTreesBelowException(
JNIEnv* env, int error, const char* function,
const std::vector<std::string>& dir_path, const char* entry) {
std::vector<std::string>::const_iterator iter = dir_path.begin();
assert(iter != dir_path.end());
std::string path = *iter;
while (++iter != dir_path.end()) {
path += "/";
path += *iter;
}
if (entry != NULL) {
path += "/";
path += entry;
}
assert(!env->ExceptionOccurred());
PostSystemException(env, errno, function, path.c_str());
}

// Tries to open a directory and, if the first attempt fails, retries after
// granting extra permissions to the directory.
//
// The directory to open is identified by the open descriptor of the parent
// directory (dir_fd) and the subpath to resolve within that directory (entry).
// dir_path contains the path components that were used when opening dir_fd and
// is only used for error reporting purposes.
//
// Returns a directory on success. Returns NULL on error and posts an
// exception.
static DIR* ForceOpendir(JNIEnv* env, const std::vector<std::string>& dir_path,
const int dir_fd, const char* entry) {
static const int flags = O_RDONLY | O_NOFOLLOW | PORTABLE_O_DIRECTORY;
int fd = openat(dir_fd, entry, flags);
if (fd == -1) {
if (fchmodat(dir_fd, entry, 0700, 0) == -1) {
PostDeleteTreesBelowException(env, errno, "fchmodat", dir_path, entry);
return NULL;
}
fd = openat(dir_fd, entry, flags);
if (fd == -1) {
PostDeleteTreesBelowException(env, errno, "opendir", dir_path, entry);
return NULL;
}
}
DIR* dir = fdopendir(fd);
if (dir == NULL) {
PostDeleteTreesBelowException(env, errno, "fdopendir", dir_path, entry);
close(fd);
return NULL;
}
return dir;
}

// Tries to delete a file within a directory and, if the first attempt fails,
// retries after granting extra write permissions to the directory.
//
// The file to delete is identified by the open descriptor of the parent
// directory (dir_fd) and the subpath to resolve within that directory (entry).
// dir_path contains the path components that were used when opening dir_fd and
// is only used for error reporting purposes.
//
// is_dir indicates whether the entry to delete is a directory or not.
//
// Returns 0 on success. Returns -1 on error and posts an exception.
static int ForceDelete(JNIEnv* env, const std::vector<std::string>& dir_path,
const int dir_fd, const char* entry,
const bool is_dir) {
const int flags = is_dir ? AT_REMOVEDIR : 0;
if (unlinkat(dir_fd, entry, flags) == -1) {
if (fchmod(dir_fd, 0700) == -1) {
PostDeleteTreesBelowException(env, errno, "fchmod", dir_path, NULL);
return -1;
}
if (unlinkat(dir_fd, entry, flags) == -1) {
PostDeleteTreesBelowException(env, errno, "unlinkat", dir_path, entry);
return -1;
}
}
return 0;
}

// Returns true if the given directory entry represents a subdirectory of dir.
//
// The file to check is identified by the open descriptor of the parent
// directory (dir_fd) and the directory entry within that directory (de).
// dir_path contains the path components that were used when opening dir_fd and
// is only used for error reporting purposes.
//
// This function prefers to extract the type information from the directory
// entry itself if available. If not available, issues a stat starting from
// dir_fd.
//
// Returns 0 on success and updates is_dir accordingly. Returns -1 on error and
// posts an exception.
static int IsSubdir(JNIEnv* env, const std::vector<std::string>& dir_path,
const int dir_fd, const struct dirent* de, bool* is_dir) {
switch (de->d_type) {
case DT_DIR:
*is_dir = true;
return 0;

case DT_UNKNOWN: {
struct stat st;
if (fstatat(dir_fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) == -1) {
PostDeleteTreesBelowException(env, errno, "fstatat", dir_path,
de->d_name);
return -1;
}
*is_dir = st.st_mode & S_IFDIR;
return 0;
}

default:
*is_dir = false;
return 0;
}
}

// Recursively deletes all trees under the given path.
//
// The directory to delete is identified by the open descriptor of the parent
// directory (dir_fd) and the subpath to resolve within that directory (entry).
// dir_path contains the path components that were used when opening dir_fd and
// is only used for error reporting purposes.
//
// dir_path is an in/out parameter updated with the path to the directory being
// processed. This avoids the need to construct unnecessary intermediate paths,
// as this algorithm works purely on file descriptors: the paths are only used
// for error reporting purposes, and therefore are only formatted at that
// point.
//
// Returns 0 on success. Returns -1 on error and posts an exception.
static int DeleteTreesBelow(JNIEnv* env, std::vector<std::string>* dir_path,
const int dir_fd, const char* entry) {
DIR *dir = ForceOpendir(env, *dir_path, dir_fd, entry);
if (dir == NULL) {
assert(env->ExceptionOccurred() != NULL);
return -1;
}

dir_path->push_back(entry);
for (;;) {
errno = 0;
struct dirent* de = readdir(dir);
if (de == NULL) {
if (errno != 0) {
PostDeleteTreesBelowException(env, errno, "readdir", *dir_path, NULL);
}
break;
}

if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) {
continue;
}

bool is_dir;
if (IsSubdir(env, *dir_path, dirfd(dir), de, &is_dir) == -1) {
assert(env->ExceptionOccurred() != NULL);
break;
}
if (is_dir) {
if (DeleteTreesBelow(env, dir_path, dirfd(dir), de->d_name) == -1) {
assert(env->ExceptionOccurred() != NULL);
break;
}
}

if (ForceDelete(env, *dir_path, dirfd(dir), de->d_name, is_dir) == -1) {
assert(env->ExceptionOccurred() != NULL);
break;
}
}
if (closedir(dir) == -1) {
// Prefer reporting the error encountered while processing entries,
// not the (unlikely) error on close.
if (env->ExceptionOccurred() == NULL) {
PostDeleteTreesBelowException(env, errno, "closedir", *dir_path, NULL);
}
}
dir_path->pop_back();
return env->ExceptionOccurred() == NULL ? 0 : -1;
}

/*
* Class: com.google.devtools.build.lib.unix.NativePosixFiles
* Method: deleteTreesBelow
* Signature: (Ljava/lang/String;)V
* Throws: java.io.IOException
*/
extern "C" JNIEXPORT void JNICALL
Java_com_google_devtools_build_lib_unix_NativePosixFiles_deleteTreesBelow(
JNIEnv *env, jclass clazz, jstring path) {
const char *path_chars = GetStringLatin1Chars(env, path);
std::vector<std::string> dir_path;
if (DeleteTreesBelow(env, &dir_path, AT_FDCWD, path_chars) == -1) {
assert(env->ExceptionOccurred() != NULL);
}
assert(dir_path.empty());
ReleaseStringLatin1Chars(path_chars);
}

////////////////////////////////////////////////////////////////////////
// Linux extended file attributes

Expand Down

0 comments on commit fac322b

Please sign in to comment.