diff --git a/ams/server/src/main/java/com/netease/arctic/server/table/TableConfiguration.java b/ams/server/src/main/java/com/netease/arctic/server/table/TableConfiguration.java index a81518aa99..122fa22f39 100644 --- a/ams/server/src/main/java/com/netease/arctic/server/table/TableConfiguration.java +++ b/ams/server/src/main/java/com/netease/arctic/server/table/TableConfiguration.java @@ -1,5 +1,6 @@ package com.netease.arctic.server.table; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.google.common.base.Objects; import com.netease.arctic.server.optimizing.OptimizingConfig; import com.netease.arctic.table.TableProperties; @@ -7,6 +8,7 @@ import java.util.Map; +@JsonIgnoreProperties(ignoreUnknown = true) public class TableConfiguration { private boolean expireSnapshotEnabled; private long snapshotTTLMinutes; diff --git a/ams/server/src/test/java/TestFindDuplicateRecords.java b/ams/server/src/test/java/TestFindDuplicateRecords.java new file mode 100644 index 0000000000..624d3d0187 --- /dev/null +++ b/ams/server/src/test/java/TestFindDuplicateRecords.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.netease.arctic.catalog.ArcticCatalog; +import com.netease.arctic.catalog.CatalogLoader; +import com.netease.arctic.io.DataTestHelpers; +import com.netease.arctic.table.ArcticTable; +import com.netease.arctic.table.TableIdentifier; +import org.apache.iceberg.ContentFile; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.Table; +import org.apache.iceberg.data.GenericRecord; +import org.apache.iceberg.data.Record; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class TestFindDuplicateRecords { + private static final Logger LOG = LoggerFactory.getLogger(TestFindDuplicateRecords.class); + private final String thriftUrl; + private final String CID_COLUMN = "id"; + private final String HID_COLUMN = "name"; + + private final Object CID_VALUE = 2003; + private final Object HID_VALUE = "hhh"; + + public static void main(String[] args) throws IOException { + TestFindDuplicateRecords scan = new TestFindDuplicateRecords("thrift://localhost:1260"); + scan.test(TableIdentifier.of("local_iceberg", "db", "user10")); + } + + public TestFindDuplicateRecords(String thriftUrl) { + this.thriftUrl = thriftUrl; + } + + public void test(TableIdentifier tableIdentifier) throws IOException { + ArcticTable arcticTable = loadTable(tableIdentifier); + Table table = arcticTable.asUnkeyedTable(); + List> relatedFiles = findRelatedFiles(table, table.currentSnapshot().snapshotId()); + + System.out.println("===== related data files ====="); + relatedFiles.forEach(f -> System.out.println(f.path())); + } + + public List> findRelatedFiles(Table table, long snapshotId) throws IOException { + List> files = new ArrayList<>(); + for (FileScanTask task : table.newScan().useSnapshot(snapshotId).planFiles()) { + List records = + DataTestHelpers.readDataFile(FileFormat.PARQUET, table.schema(), task.file().path().toString()); + for (Record record : records) { + if (equals((GenericRecord) record)) { + files.add(task.file()); + break; + } + } + } + return files; + } + + public static void planFiles(Table table, long snapshotId) { + String file1 = "xxx"; + String file2 = "yyy"; + for (FileScanTask task : table.newScan().useSnapshot(snapshotId).planFiles()) { + String path = task.file().path().toString(); + if (path.equals(file1) || path.equals(file2)) { + System.out.println("===== related data files ====="); + System.out.println(path); + for (DeleteFile delete : task.deletes()) { + System.out.println(delete.path()); + } + } + } + } + + private boolean equals(GenericRecord record) { + return record.getField(CID_COLUMN).equals(CID_VALUE) && record.getField(HID_COLUMN).equals(HID_VALUE); + } + + private ArcticTable loadTable(TableIdentifier tableIdentifier) { + String catalogUrl = thriftUrl + "/" + tableIdentifier.getCatalog(); + + // 1.scan files + ArcticCatalog load = CatalogLoader.load(catalogUrl); + return load.loadTable(tableIdentifier); + } +}