From 49e5930cc5a82f11614fc5ec6659e04042ac5bf3 Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Tue, 7 Feb 2023 18:04:14 +0100 Subject: [PATCH 1/3] HBASE-27590 Change Iterable to List in SnapshotFileCache --- .../hadoop/hbase/master/snapshot/SnapshotFileCache.java | 8 ++------ .../hbase/master/snapshot/SnapshotHFileCleaner.java | 7 ++++++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java index e9a33f6cb28d..da3da42f35a4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java @@ -178,15 +178,11 @@ public synchronized void triggerCacheRefreshForTesting() { * at that point, cache will still think the file system contains that file and return * true, even if it is no longer present (false positive). However, if the file never was * on the filesystem, we will never find it and always return false. - * @param files file to check, NOTE: Relies that files are loaded from hdfs before method is - * called (NOT LAZY) + * @param files file to check * @return unReferencedFiles the collection of files that do not have snapshot references * @throws IOException if there is an unexpected error reaching the filesystem. */ - // XXX this is inefficient to synchronize on the method, when what we really need to guard against - // is an illegal access to the cache. Really we could do a mutex-guarded pointer swap on the - // cache, but that seems overkill at the moment and isn't necessarily a bottleneck. - public Iterable getUnreferencedFiles(Iterable files, + public Iterable getUnreferencedFiles(List files, final SnapshotManager snapshotManager) throws IOException { List unReferencedFiles = Lists.newArrayList(); List snapshotsInProgress = null; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java index 200c88798906..0f1e2793ea1d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java @@ -20,7 +20,10 @@ import java.io.IOException; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -64,8 +67,10 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate { @Override public Iterable getDeletableFiles(Iterable files) { + List filesList = + StreamSupport.stream(files.spliterator(), false).collect(Collectors.toList()); try { - return cache.getUnreferencedFiles(files, master.getSnapshotManager()); + return cache.getUnreferencedFiles(filesList, master.getSnapshotManager()); } catch (CorruptedSnapshotException cse) { LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse); } catch (IOException e) { From 0b8bf615fccd7a27c3e144e26a333acec3a4d6af Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Wed, 8 Feb 2023 09:25:18 +0100 Subject: [PATCH 2/3] Add comment for conversion --- .../hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java index 0f1e2793ea1d..6ecac13a6892 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java @@ -67,6 +67,7 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate { @Override public Iterable getDeletableFiles(Iterable files) { + // Converting to List to initialization cost in the snapshot lock in SnapshotFileCache List filesList = StreamSupport.stream(files.spliterator(), false).collect(Collectors.toList()); try { From 7015d1a53ae456676ea6860f1b8bdb67f0bd8e99 Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Wed, 8 Feb 2023 12:35:32 +0100 Subject: [PATCH 3/3] Update comment --- .../hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java index 6ecac13a6892..596334c8242b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java @@ -67,7 +67,11 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate { @Override public Iterable getDeletableFiles(Iterable files) { - // Converting to List to initialization cost in the snapshot lock in SnapshotFileCache + // The Iterable is lazy evaluated, so if we just pass this Iterable in, we will access the HFile + // storage inside the snapshot lock, which could take a lot of time (for example, several + // seconds), and block all other operations, especially other cleaners. + // So here we convert it to List first, to force it evaluated before calling + // getUnreferencedFiles, so we will not hold snapshot lock for a long time. List filesList = StreamSupport.stream(files.spliterator(), false).collect(Collectors.toList()); try {