Skip to content

Commit

Permalink
[ARCTIC-1620] Check there are no orphan files in the directory when M…
Browse files Browse the repository at this point in the history
…ixed Hive Full Optimizing committing (apache#1621)

* set delete-untracked-hive-file=true when Mixed Hive Table commit

* not check orphan files when move files to old hive location
  • Loading branch information
wangtaohz authored Jun 29, 2023
1 parent d7c64e9 commit a5a5b5a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.netease.arctic.data.DataFileType;
import com.netease.arctic.data.IcebergContentFile;
import com.netease.arctic.data.PrimaryKeyedFile;
import com.netease.arctic.hive.utils.TableTypeUtil;
import com.netease.arctic.op.OverwriteBaseFiles;
import com.netease.arctic.optimizing.RewriteFilesInput;
import com.netease.arctic.optimizing.RewriteFilesOutput;
Expand All @@ -29,6 +30,7 @@
import java.util.List;
import java.util.Set;

import static com.netease.arctic.hive.op.UpdateHiveFiles.DELETE_UNTRACKED_HIVE_FILE;
import static com.netease.arctic.server.ArcticServiceConstants.INVALID_SNAPSHOT_ID;

public class KeyedTableCommit extends UnKeyedTableCommit {
Expand Down Expand Up @@ -139,6 +141,9 @@ private void executeCommit(
addedDataFiles.forEach(overwriteBaseFiles::addFile);
addedDeleteFiles.forEach(overwriteBaseFiles::addFile);
removedDataFiles.forEach(overwriteBaseFiles::deleteFile);
if (TableTypeUtil.isHive(table) && !needMoveFile2Hive()) {
overwriteBaseFiles.set(DELETE_UNTRACKED_HIVE_FILE, "true");
}
overwriteBaseFiles.skipEmptyCommit().commit();

//remove delete files
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import com.netease.arctic.hive.table.SupportHive;
import com.netease.arctic.hive.utils.HivePartitionUtil;
import com.netease.arctic.hive.utils.HiveTableUtil;
import com.netease.arctic.hive.utils.TableTypeUtil;
import com.netease.arctic.optimizing.OptimizingInputProperties;
import com.netease.arctic.optimizing.RewriteFilesOutput;
import com.netease.arctic.server.ArcticServiceConstants;
Expand Down Expand Up @@ -63,6 +64,7 @@
import java.util.Set;
import java.util.stream.Collectors;

import static com.netease.arctic.hive.op.UpdateHiveFiles.DELETE_UNTRACKED_HIVE_FILE;
import static com.netease.arctic.server.ArcticServiceConstants.INVALID_SNAPSHOT_ID;

public class UnKeyedTableCommit {
Expand Down Expand Up @@ -192,6 +194,9 @@ protected void replaceFiles(
dataFileRewrite.rewriteFiles(removedDataFiles, addedDataFiles);
}
dataFileRewrite.set(SnapshotSummary.SNAPSHOT_PRODUCER, CommitMetaProducer.OPTIMIZE.name());
if (TableTypeUtil.isHive(table) && !needMoveFile2Hive()) {
dataFileRewrite.set(DELETE_UNTRACKED_HIVE_FILE, "true");
}
dataFileRewrite.commit();
}
if (CollectionUtils.isNotEmpty(addDeleteFiles)) {
Expand Down

0 comments on commit a5a5b5a

Please sign in to comment.