diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 0a8625d6252f6b..795d3942062b47 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -3797,4 +3797,8 @@ public static int metaServiceRpcRetryTimes() { + "by default" }) public static boolean calc_delete_bitmap_get_versions_waiting_for_pending_txns = true; + + @ConfField(mutable = true, description = {"是否允许获取hdfs的文件元数据信息时忽略隐藏目录", + "whether to enable list hdfs files ignore hidden directory"}) + public static boolean enable_list_hdfs_files_ignore_hidden_directory = true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java index 2ee3c15693130a..4bb238c07fb206 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.StorageBackend; import org.apache.doris.backup.Status; +import org.apache.doris.common.Config; import org.apache.doris.common.UserException; import org.apache.doris.common.security.authentication.HadoopAuthenticator; import org.apache.doris.common.util.S3Util; @@ -60,6 +61,7 @@ import java.nio.file.FileVisitOption; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.ArrayDeque; import java.util.Arrays; import java.util.Comparator; import java.util.List; @@ -89,6 +91,9 @@ public Status listFiles(String remotePath, boolean recursive, List r try { Path locatedPath = new Path(remotePath); org.apache.hadoop.fs.FileSystem fileSystem = nativeFileSystem(locatedPath); + if (recursive && Config.enable_list_hdfs_files_ignore_hidden_directory) { + return listFilesIgnoreHiddenDirectory(fileSystem, locatedPath, result); + } RemoteIterator locatedFiles = getLocatedFiles(recursive, fileSystem, locatedPath); while (locatedFiles.hasNext()) { LocatedFileStatus fileStatus = locatedFiles.next(); @@ -105,6 +110,36 @@ public Status listFiles(String remotePath, boolean recursive, List r return Status.OK; } + /** + * List files recursively while ignoring hidden directories (directories starting with '.' or '_'). + * This provides an optimization by not descending into hidden directories. + */ + private Status listFilesIgnoreHiddenDirectory(FileSystem fileSystem, Path locatedPath, + List result) throws IOException { + ArrayDeque pathQueue = new ArrayDeque<>(); + pathQueue.add(locatedPath); + while (!pathQueue.isEmpty()) { + Path currentPath = pathQueue.poll(); + FileStatus[] fileStatusList = hdfsProperties.getHadoopAuthenticator() + .doAs(() -> fileSystem.listStatus(currentPath)); + for (FileStatus fileStatus : fileStatusList) { + if (fileStatus.isDirectory()) { + String fileName = fileStatus.getPath().getName(); + if (fileName.startsWith(".") || fileName.startsWith("_")) { + continue; + } + pathQueue.add(fileStatus.getPath()); + } else { + RemoteFile location = new RemoteFile( + fileStatus.getPath(), fileStatus.isDirectory(), fileStatus.getLen(), + fileStatus.getBlockSize(), fileStatus.getModificationTime(), null); + result.add(location); + } + } + } + return Status.OK; + } + @Override public Status listDirectories(String remotePath, Set result) {