Skip to content

Commit

Permalink
[ARCTIC-1167][core][hive] fix Trash for restoring deleted files (apac…
Browse files Browse the repository at this point in the history
…he#1223)

* fix ArcticHadoopFileIO cast error

* overwrite file in trash when move

* [ARCTIC-1213] Optimizing of Mixed Format Table supports optimizing a part of partitions at a time (apache#1220)

* support partition ordered by PartitionWeight for OptimizePlan

* if not all partitions are optimized, current change snapshot id should set to -1

* fix checkstyle

* TableTrashManager should extends Serializable
  • Loading branch information
wangtaohz authored and ShawHee committed Dec 29, 2023
1 parent a1f8d99 commit 0e4a2c6
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 103 deletions.
3 changes: 2 additions & 1 deletion core/src/main/java/com/netease/arctic/io/ArcticFileIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package com.netease.arctic.io;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.fs.FileStatus;
import org.apache.iceberg.io.FileIO;

Expand All @@ -27,7 +28,7 @@
/**
* Arctic extension from {@link FileIO}, adding more operations.
*/
public interface ArcticFileIO extends FileIO {
public interface ArcticFileIO extends FileIO, Configurable {

/**
* Run the given action with login user.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.UncheckedIOException;
Expand All @@ -43,7 +41,6 @@
* Implementation of {@link ArcticFileIO} for hadoop file system with authentication.
*/
public class ArcticHadoopFileIO extends HadoopFileIO implements ArcticFileIO {
private static final Logger LOG = LoggerFactory.getLogger(ArcticHadoopFileIO.class);

private final TableMetaStore tableMetaStore;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ public void moveFileToTrash(String path) {
if (!arcticFileIO.exists(targetFileDir)) {
arcticFileIO.mkdirs(targetFileDir);
}
if (arcticFileIO.exists(targetFileLocation)) {
arcticFileIO.deleteFile(targetFileLocation);
}
arcticFileIO.rename(path, targetFileLocation);
} catch (Exception e) {
LOG.error("{} failed to move file to trash, {}", tableIdentifier, path, e);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.netease.arctic.io;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.OutputFile;
Expand Down Expand Up @@ -144,4 +145,14 @@ private void moveToTrash(String filePath) {
trashManager.moveFileToTrash(filePath);
LOG.debug("Move file:{} to table trash", filePath);
}

@Override
public void setConf(Configuration conf) {
fileIO.setConf(conf);
}

@Override
public Configuration getConf() {
return fileIO.getConf();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@

import com.netease.arctic.table.TableIdentifier;

import java.io.Serializable;
import java.time.LocalDate;

/**
* Trash Manager for a table.
*/
public interface TableTrashManager {
public interface TableTrashManager extends Serializable {
/**
* Table identifier.
* A TableTrashManager only handle files in this table's location.
Expand Down
95 changes: 0 additions & 95 deletions core/src/test/java/com/netease/arctic/io/ArcticFileIoDummy.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,20 @@ public void testDeleteAndRestore() {
Assert.assertFalse(getArcticTable().io().exists(fileLocationInTrash));
}

@Test
public void testMoveAndOverwrite() {
TableTrashManager tableTrashManager = TableTrashManagers.build(getArcticTable());

String relativeFilePath = "base/test/test1.parquet";
String path = createFile(getArcticTable().io(), getArcticTable().location() + File.separator + relativeFilePath);

tableTrashManager.moveFileToTrash(path);
Assert.assertTrue(tableTrashManager.fileExistInTrash(path));
createFile(getArcticTable().io(), getArcticTable().location() + File.separator + relativeFilePath);
tableTrashManager.moveFileToTrash(path);
Assert.assertTrue(tableTrashManager.fileExistInTrash(path));
}

@Test
public void testDeleteDirectory() {
TableTrashManager tableTrashManager = TableTrashManagers.build(getArcticTable());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import com.netease.arctic.hive.HMSClientPool;
import com.netease.arctic.hive.HiveTableProperties;
import com.netease.arctic.hive.op.OverwriteHiveFiles;
import com.netease.arctic.io.ArcticHadoopFileIO;
import com.netease.arctic.op.OverwriteBaseFiles;
import com.netease.arctic.table.ArcticTable;
import com.netease.arctic.table.TableIdentifier;
Expand Down Expand Up @@ -219,11 +218,11 @@ public static void syncHiveDataToArctic(ArcticTable table, HMSClientPool hiveCli
}

private static List<DataFile> listHivePartitionFiles(ArcticTable arcticTable, Map<String, String> partitionValueMap,
String partitionLocation) {
String partitionLocation) {
return arcticTable.io().doAs(() -> TableMigrationUtil.listPartition(partitionValueMap, partitionLocation,
arcticTable.properties().getOrDefault(TableProperties.DEFAULT_FILE_FORMAT,
TableProperties.DEFAULT_FILE_FORMAT_DEFAULT),
arcticTable.spec(), ((ArcticHadoopFileIO)arcticTable.io()).getTableMetaStore().getConfiguration(),
arcticTable.spec(), arcticTable.io().getConf(),
MetricsConfig.fromProperties(arcticTable.properties()), NameMappingParser.fromJson(
arcticTable.properties().get(org.apache.iceberg.TableProperties.DEFAULT_NAME_MAPPING))));
}
Expand Down

0 comments on commit 0e4a2c6

Please sign in to comment.