From a5a5b5ac4d523710bdb1765228d971a14a51a605 Mon Sep 17 00:00:00 2001 From: wangtaohz <103108928+wangtaohz@users.noreply.github.com> Date: Thu, 29 Jun 2023 09:46:40 +0800 Subject: [PATCH] [ARCTIC-1620] Check there are no orphan files in the directory when Mixed Hive Full Optimizing committing (#1621) * set delete-untracked-hive-file=true when Mixed Hive Table commit * not check orphan files when move files to old hive location --- .../netease/arctic/server/optimizing/KeyedTableCommit.java | 5 +++++ .../netease/arctic/server/optimizing/UnKeyedTableCommit.java | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/ams/server/src/main/java/com/netease/arctic/server/optimizing/KeyedTableCommit.java b/ams/server/src/main/java/com/netease/arctic/server/optimizing/KeyedTableCommit.java index 06fe9d35dc..bd924e88e1 100644 --- a/ams/server/src/main/java/com/netease/arctic/server/optimizing/KeyedTableCommit.java +++ b/ams/server/src/main/java/com/netease/arctic/server/optimizing/KeyedTableCommit.java @@ -4,6 +4,7 @@ import com.netease.arctic.data.DataFileType; import com.netease.arctic.data.IcebergContentFile; import com.netease.arctic.data.PrimaryKeyedFile; +import com.netease.arctic.hive.utils.TableTypeUtil; import com.netease.arctic.op.OverwriteBaseFiles; import com.netease.arctic.optimizing.RewriteFilesInput; import com.netease.arctic.optimizing.RewriteFilesOutput; @@ -29,6 +30,7 @@ import java.util.List; import java.util.Set; +import static com.netease.arctic.hive.op.UpdateHiveFiles.DELETE_UNTRACKED_HIVE_FILE; import static com.netease.arctic.server.ArcticServiceConstants.INVALID_SNAPSHOT_ID; public class KeyedTableCommit extends UnKeyedTableCommit { @@ -139,6 +141,9 @@ private void executeCommit( addedDataFiles.forEach(overwriteBaseFiles::addFile); addedDeleteFiles.forEach(overwriteBaseFiles::addFile); removedDataFiles.forEach(overwriteBaseFiles::deleteFile); + if (TableTypeUtil.isHive(table) && !needMoveFile2Hive()) { + overwriteBaseFiles.set(DELETE_UNTRACKED_HIVE_FILE, "true"); + } overwriteBaseFiles.skipEmptyCommit().commit(); //remove delete files diff --git a/ams/server/src/main/java/com/netease/arctic/server/optimizing/UnKeyedTableCommit.java b/ams/server/src/main/java/com/netease/arctic/server/optimizing/UnKeyedTableCommit.java index 214476613d..0c1bf5860a 100644 --- a/ams/server/src/main/java/com/netease/arctic/server/optimizing/UnKeyedTableCommit.java +++ b/ams/server/src/main/java/com/netease/arctic/server/optimizing/UnKeyedTableCommit.java @@ -25,6 +25,7 @@ import com.netease.arctic.hive.table.SupportHive; import com.netease.arctic.hive.utils.HivePartitionUtil; import com.netease.arctic.hive.utils.HiveTableUtil; +import com.netease.arctic.hive.utils.TableTypeUtil; import com.netease.arctic.optimizing.OptimizingInputProperties; import com.netease.arctic.optimizing.RewriteFilesOutput; import com.netease.arctic.server.ArcticServiceConstants; @@ -63,6 +64,7 @@ import java.util.Set; import java.util.stream.Collectors; +import static com.netease.arctic.hive.op.UpdateHiveFiles.DELETE_UNTRACKED_HIVE_FILE; import static com.netease.arctic.server.ArcticServiceConstants.INVALID_SNAPSHOT_ID; public class UnKeyedTableCommit { @@ -192,6 +194,9 @@ protected void replaceFiles( dataFileRewrite.rewriteFiles(removedDataFiles, addedDataFiles); } dataFileRewrite.set(SnapshotSummary.SNAPSHOT_PRODUCER, CommitMetaProducer.OPTIMIZE.name()); + if (TableTypeUtil.isHive(table) && !needMoveFile2Hive()) { + dataFileRewrite.set(DELETE_UNTRACKED_HIVE_FILE, "true"); + } dataFileRewrite.commit(); } if (CollectionUtils.isNotEmpty(addDeleteFiles)) {