From fac322b1ef7a9acbe71f3718143c0dae43fb5833 Mon Sep 17 00:00:00 2001 From: jmmv Date: Thu, 21 Mar 2019 07:53:51 -0700 Subject: [PATCH] Optimize deleteTreesBelow. Make deleteTreesBelow faster by assuming that the directories to be deleted are readable and writable. We create most of the trees we delete via this function anyway, so we know that they are accessible for the most part. The previous code was blindly resetting read/write/execute permissions for each traversed directory, and was doing so individually, which means we issued 3 extra syscalls per directory. And on Unix file systems, go even further by taking advantage of the fact that readdir returns the type of each entry: there is no need to issue a separate stat for each entry to determine if it is a subdirectory or not. Do this from our JNI code because there really is no reason to pay the cost of going in an out of Java for each file: we are traversing very large directory trees, so every bit helps. A fully-local build of a large iOS app on a Mac Pro 2013 shows that this reduces build times from about 7300s to 5100s. A build of a similar app on a MacBook Pro 2015 shows a reduction from 7500s to 5400s. The impact on these builds using dynamic execution is much smaller, and there is no observable improvement in smaller builds. Addresses https://github.com/bazelbuild/bazel/issues/7527. RELNOTES: None. PiperOrigin-RevId: 239594433 --- .../build/lib/unix/NativePosixFiles.java | 18 +- .../build/lib/unix/UnixFileSystem.java | 13 ++ .../devtools/build/lib/vfs/FileSystem.java | 43 +++- src/main/native/unix_jni.cc | 218 ++++++++++++++++++ 4 files changed, 274 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/unix/NativePosixFiles.java b/src/main/java/com/google/devtools/build/lib/unix/NativePosixFiles.java index 96697eea225da6..dba8d2c16cd111 100644 --- a/src/main/java/com/google/devtools/build/lib/unix/NativePosixFiles.java +++ b/src/main/java/com/google/devtools/build/lib/unix/NativePosixFiles.java @@ -453,18 +453,12 @@ public static HashCode md5sum(String path) throws IOException { } /** - * Removes entire directory tree. Doesn't follow symlinks. + * Deletes all directory trees recursively beneath the given path, which is expected to be a + * directory. Does not remove the top directory. * - * @param path the file or directory to remove. - * @throws IOException if the remove failed. + * @param dir the directory hierarchy to remove + * @throws IOException if the hierarchy cannot be removed successfully or if the given path is not + * a directory */ - public static void rmTree(String path) throws IOException { - if (isDirectory(path)) { - String[] contents = readdir(path); - for (String entry : contents) { - rmTree(path + "/" + entry); - } - } - remove(path.toString()); - } + public static native void deleteTreesBelow(String dir) throws IOException; } diff --git a/src/main/java/com/google/devtools/build/lib/unix/UnixFileSystem.java b/src/main/java/com/google/devtools/build/lib/unix/UnixFileSystem.java index c68cad13248400..7e043f987a3d99 100644 --- a/src/main/java/com/google/devtools/build/lib/unix/UnixFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/unix/UnixFileSystem.java @@ -28,6 +28,7 @@ import com.google.devtools.build.lib.vfs.FileStatus; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; +import com.google.devtools.build.lib.vfs.Symlinks; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -419,4 +420,16 @@ protected void createFSDependentHardLink(Path linkPath, Path originalPath) throws IOException { NativePosixFiles.link(originalPath.toString(), linkPath.toString()); } + + @Override + public void deleteTreesBelow(Path dir) throws IOException { + if (dir.isDirectory(Symlinks.NOFOLLOW)) { + long startTime = Profiler.nanoTimeMaybe(); + try { + NativePosixFiles.deleteTreesBelow(dir.toString()); + } finally { + profiler.logSimpleTask(startTime, ProfilerTask.VFS_DELETE, dir.toString()); + } + } + } } diff --git a/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java index ad38ee57a0a382..97e1933f3e4c50 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java @@ -29,6 +29,7 @@ import java.io.OutputStream; import java.nio.file.FileAlreadyExistsException; import java.util.Collection; +import java.util.Iterator; import java.util.List; /** @@ -214,17 +215,47 @@ public void deleteTree(Path path) throws IOException { * Deletes all directory trees recursively beneath the given path. Does nothing if the given path * is not a directory. * + *

This generic implementation is not as efficient as it could be: for example, we issue + * separate stats for each directory entry to determine if they are directories or not (instead of + * reusing the information that readdir returns), and we issue separate operations to toggle + * different permissions while they could be done at once via chmod. Subclasses can optimize this + * by taking advantage of platform-specific features. + * * @param dir the directory hierarchy to remove * @throws IOException if the hierarchy cannot be removed successfully */ public void deleteTreesBelow(Path dir) throws IOException { if (dir.isDirectory(Symlinks.NOFOLLOW)) { - dir.setReadable(true); - dir.setWritable(true); - dir.setExecutable(true); - for (Path child : dir.getDirectoryEntries()) { - deleteTreesBelow(child); - child.delete(); + Collection entries; + try { + entries = dir.getDirectoryEntries(); + } catch (IOException e) { + // If we couldn't read the directory, it may be because it's not readable. Try granting this + // permission and retry. If the retry fails, give up. + dir.setReadable(true); + dir.setExecutable(true); + entries = dir.getDirectoryEntries(); + } + + Iterator iterator = entries.iterator(); + if (iterator.hasNext()) { + Path first = iterator.next(); + deleteTreesBelow(first); + try { + first.delete(); + } catch (IOException e) { + // If we couldn't delete the first entry in a directory, it may be because the directory + // (not the entry!) is not writable. Try granting this permission and retry. If the retry + // fails, give up. + dir.setWritable(true); + first.delete(); + } + } + while (iterator.hasNext()) { + Path path = iterator.next(); + deleteTreesBelow(path); + // No need to retry here: if needed, we already unprotected the directory earlier. + path.delete(); } } } diff --git a/src/main/native/unix_jni.cc b/src/main/native/unix_jni.cc index c9ec3dd5d656e5..eb9930e9713df2 100644 --- a/src/main/native/unix_jni.cc +++ b/src/main/native/unix_jni.cc @@ -14,6 +14,7 @@ #include "src/main/native/unix_jni.h" +#include #include #include #include @@ -36,6 +37,12 @@ #include "src/main/cpp/util/md5.h" #include "src/main/cpp/util/port.h" +#if defined(O_DIRECTORY) +#define PORTABLE_O_DIRECTORY O_DIRECTORY +#else +#define PORTABLE_O_DIRECTORY 0 +#endif + using blaze_util::Md5Digest; //////////////////////////////////////////////////////////////////////// @@ -863,6 +870,217 @@ Java_com_google_devtools_build_lib_unix_NativePosixFiles_mkfifo(JNIEnv *env, ReleaseStringLatin1Chars(path_chars); } +// Posts an exception generated by the DeleteTreesBelow algorithm and its helper +// functions. +// +// This is just a convenience wrapper over PostSystemException to format the +// path that caused an error only when necessary, as we keep that path tokenized +// throughout the deletion process. +// +// env is the JNI environment in which to post the exception. error and function +// capture the errno value and the name of the system function that triggered +// it. The faulty path is specified by all the components of dir_path and the +// optional entry subcomponent, which may be NULL. +static void PostDeleteTreesBelowException( + JNIEnv* env, int error, const char* function, + const std::vector& dir_path, const char* entry) { + std::vector::const_iterator iter = dir_path.begin(); + assert(iter != dir_path.end()); + std::string path = *iter; + while (++iter != dir_path.end()) { + path += "/"; + path += *iter; + } + if (entry != NULL) { + path += "/"; + path += entry; + } + assert(!env->ExceptionOccurred()); + PostSystemException(env, errno, function, path.c_str()); +} + +// Tries to open a directory and, if the first attempt fails, retries after +// granting extra permissions to the directory. +// +// The directory to open is identified by the open descriptor of the parent +// directory (dir_fd) and the subpath to resolve within that directory (entry). +// dir_path contains the path components that were used when opening dir_fd and +// is only used for error reporting purposes. +// +// Returns a directory on success. Returns NULL on error and posts an +// exception. +static DIR* ForceOpendir(JNIEnv* env, const std::vector& dir_path, + const int dir_fd, const char* entry) { + static const int flags = O_RDONLY | O_NOFOLLOW | PORTABLE_O_DIRECTORY; + int fd = openat(dir_fd, entry, flags); + if (fd == -1) { + if (fchmodat(dir_fd, entry, 0700, 0) == -1) { + PostDeleteTreesBelowException(env, errno, "fchmodat", dir_path, entry); + return NULL; + } + fd = openat(dir_fd, entry, flags); + if (fd == -1) { + PostDeleteTreesBelowException(env, errno, "opendir", dir_path, entry); + return NULL; + } + } + DIR* dir = fdopendir(fd); + if (dir == NULL) { + PostDeleteTreesBelowException(env, errno, "fdopendir", dir_path, entry); + close(fd); + return NULL; + } + return dir; +} + +// Tries to delete a file within a directory and, if the first attempt fails, +// retries after granting extra write permissions to the directory. +// +// The file to delete is identified by the open descriptor of the parent +// directory (dir_fd) and the subpath to resolve within that directory (entry). +// dir_path contains the path components that were used when opening dir_fd and +// is only used for error reporting purposes. +// +// is_dir indicates whether the entry to delete is a directory or not. +// +// Returns 0 on success. Returns -1 on error and posts an exception. +static int ForceDelete(JNIEnv* env, const std::vector& dir_path, + const int dir_fd, const char* entry, + const bool is_dir) { + const int flags = is_dir ? AT_REMOVEDIR : 0; + if (unlinkat(dir_fd, entry, flags) == -1) { + if (fchmod(dir_fd, 0700) == -1) { + PostDeleteTreesBelowException(env, errno, "fchmod", dir_path, NULL); + return -1; + } + if (unlinkat(dir_fd, entry, flags) == -1) { + PostDeleteTreesBelowException(env, errno, "unlinkat", dir_path, entry); + return -1; + } + } + return 0; +} + +// Returns true if the given directory entry represents a subdirectory of dir. +// +// The file to check is identified by the open descriptor of the parent +// directory (dir_fd) and the directory entry within that directory (de). +// dir_path contains the path components that were used when opening dir_fd and +// is only used for error reporting purposes. +// +// This function prefers to extract the type information from the directory +// entry itself if available. If not available, issues a stat starting from +// dir_fd. +// +// Returns 0 on success and updates is_dir accordingly. Returns -1 on error and +// posts an exception. +static int IsSubdir(JNIEnv* env, const std::vector& dir_path, + const int dir_fd, const struct dirent* de, bool* is_dir) { + switch (de->d_type) { + case DT_DIR: + *is_dir = true; + return 0; + + case DT_UNKNOWN: { + struct stat st; + if (fstatat(dir_fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) == -1) { + PostDeleteTreesBelowException(env, errno, "fstatat", dir_path, + de->d_name); + return -1; + } + *is_dir = st.st_mode & S_IFDIR; + return 0; + } + + default: + *is_dir = false; + return 0; + } +} + +// Recursively deletes all trees under the given path. +// +// The directory to delete is identified by the open descriptor of the parent +// directory (dir_fd) and the subpath to resolve within that directory (entry). +// dir_path contains the path components that were used when opening dir_fd and +// is only used for error reporting purposes. +// +// dir_path is an in/out parameter updated with the path to the directory being +// processed. This avoids the need to construct unnecessary intermediate paths, +// as this algorithm works purely on file descriptors: the paths are only used +// for error reporting purposes, and therefore are only formatted at that +// point. +// +// Returns 0 on success. Returns -1 on error and posts an exception. +static int DeleteTreesBelow(JNIEnv* env, std::vector* dir_path, + const int dir_fd, const char* entry) { + DIR *dir = ForceOpendir(env, *dir_path, dir_fd, entry); + if (dir == NULL) { + assert(env->ExceptionOccurred() != NULL); + return -1; + } + + dir_path->push_back(entry); + for (;;) { + errno = 0; + struct dirent* de = readdir(dir); + if (de == NULL) { + if (errno != 0) { + PostDeleteTreesBelowException(env, errno, "readdir", *dir_path, NULL); + } + break; + } + + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) { + continue; + } + + bool is_dir; + if (IsSubdir(env, *dir_path, dirfd(dir), de, &is_dir) == -1) { + assert(env->ExceptionOccurred() != NULL); + break; + } + if (is_dir) { + if (DeleteTreesBelow(env, dir_path, dirfd(dir), de->d_name) == -1) { + assert(env->ExceptionOccurred() != NULL); + break; + } + } + + if (ForceDelete(env, *dir_path, dirfd(dir), de->d_name, is_dir) == -1) { + assert(env->ExceptionOccurred() != NULL); + break; + } + } + if (closedir(dir) == -1) { + // Prefer reporting the error encountered while processing entries, + // not the (unlikely) error on close. + if (env->ExceptionOccurred() == NULL) { + PostDeleteTreesBelowException(env, errno, "closedir", *dir_path, NULL); + } + } + dir_path->pop_back(); + return env->ExceptionOccurred() == NULL ? 0 : -1; +} + +/* + * Class: com.google.devtools.build.lib.unix.NativePosixFiles + * Method: deleteTreesBelow + * Signature: (Ljava/lang/String;)V + * Throws: java.io.IOException + */ +extern "C" JNIEXPORT void JNICALL +Java_com_google_devtools_build_lib_unix_NativePosixFiles_deleteTreesBelow( + JNIEnv *env, jclass clazz, jstring path) { + const char *path_chars = GetStringLatin1Chars(env, path); + std::vector dir_path; + if (DeleteTreesBelow(env, &dir_path, AT_FDCWD, path_chars) == -1) { + assert(env->ExceptionOccurred() != NULL); + } + assert(dir_path.empty()); + ReleaseStringLatin1Chars(path_chars); +} + //////////////////////////////////////////////////////////////////////// // Linux extended file attributes