diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java index a7aa0f607ac504..1c1d6ac67204b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java @@ -54,8 +54,10 @@ import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** @@ -99,6 +101,17 @@ public long getTotalFileSize() { return totalFileSize; } + /** + * Get all delete files for the given file range. + * @param rangeDesc the file range descriptor + * @return list of delete file paths (formatted strings) + */ + protected List getDeleteFiles(TFileRangeDesc rangeDesc) { + // Default implementation: return empty list + // Subclasses should override this method + return Collections.emptyList(); + } + @Override public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { StringBuilder output = new StringBuilder(); @@ -139,6 +152,21 @@ public int compare(TFileRangeDesc o1, TFileRangeDesc o2) { return Long.compare(o1.getStartOffset(), o2.getStartOffset()); } }); + + // A Data file may be divided into different splits, so a set is used to remove duplicates. + Set dataFilesSet = new HashSet<>(); + // A delete file might be used by multiple data files, so use set to remove duplicates. + Set deleteFilesSet = new HashSet<>(); + // You can estimate how many delete splits need to be read for a data split + // using deleteSplitNum / dataSplitNum(fileRangeDescs.size()) split. + long deleteSplitNum = 0; + for (TFileRangeDesc fileRangeDesc : fileRangeDescs) { + dataFilesSet.add(fileRangeDesc.getPath()); + List deletefiles = getDeleteFiles(fileRangeDesc); + deleteFilesSet.addAll(deletefiles); + deleteSplitNum += deletefiles.size(); + } + // 3. if size <= 4, print all. if size > 4, print first 3 and last 1 int size = fileRangeDescs.size(); if (size <= 4) { @@ -164,6 +192,10 @@ public int compare(TFileRangeDesc o1, TFileRangeDesc o2) { .append(" length: ").append(file.getSize()) .append("\n"); } + output.append(prefix).append(" ").append("dataFileNum=").append(dataFilesSet.size()) + .append(", deleteFileNum=").append(deleteFilesSet.size()) + .append(", deleteSplitNum=").append(deleteSplitNum) + .append("\n"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index 5bcf2f5546a51f..3ea25a845ec8d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -486,6 +486,37 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { } } + @Override + protected List getDeleteFiles(TFileRangeDesc rangeDesc) { + List deleteFiles = new ArrayList<>(); + if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) { + return deleteFiles; + } + TTableFormatFileDesc tableFormatParams = rangeDesc.getTableFormatParams(); + if (tableFormatParams == null || !tableFormatParams.isSetTransactionalHiveParams()) { + return deleteFiles; + } + TTransactionalHiveDesc hiveParams = tableFormatParams.getTransactionalHiveParams(); + if (hiveParams == null || !hiveParams.isSetDeleteDeltas()) { + return deleteFiles; + } + List deleteDeltas = hiveParams.getDeleteDeltas(); + if (deleteDeltas == null) { + return deleteFiles; + } + // Format: {directory_location}/{file_name} + for (TTransactionalHiveDeleteDeltaDesc deleteDelta : deleteDeltas) { + if (deleteDelta != null && deleteDelta.isSetDirectoryLocation() + && deleteDelta.isSetFileNames() && deleteDelta.getFileNames() != null) { + String directoryLocation = deleteDelta.getDirectoryLocation(); + for (String fileName : deleteDelta.getFileNames()) { + deleteFiles.add(directoryLocation + "/" + fileName); + } + } + } + return deleteFiles; + } + @Override protected Map getLocationProperties() { return hmsTable.getBackendStorageProperties(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java index f1dc6cd4eb9fd2..7980259f55af4a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java @@ -277,6 +277,46 @@ private void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSpli rangeDesc.setTableFormatParams(tableFormatFileDesc); } + @Override + protected List getDeleteFiles(TFileRangeDesc rangeDesc) { + List deleteFiles = new ArrayList<>(); + if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) { + return deleteFiles; + } + TTableFormatFileDesc tableFormatParams = rangeDesc.getTableFormatParams(); + if (tableFormatParams == null || !tableFormatParams.isSetIcebergParams()) { + return deleteFiles; + } + TIcebergFileDesc icebergParams = tableFormatParams.getIcebergParams(); + if (icebergParams == null || !icebergParams.isSetDeleteFiles()) { + return deleteFiles; + } + List icebergDeleteFiles = icebergParams.getDeleteFiles(); + if (icebergDeleteFiles == null) { + return deleteFiles; + } + for (TIcebergDeleteFileDesc deleteFile : icebergDeleteFiles) { + if (deleteFile != null && deleteFile.isSetPath()) { + deleteFiles.add(deleteFile.getPath()); + } + } + return deleteFiles; + } + + private String getDeleteFileContentType(int content) { + // Iceberg file type: 0: data, 1: position delete, 2: equality delete, 3: deletion vector + switch (content) { + case 1: + return "position_delete"; + case 2: + return "equality_delete"; + case 3: + return "deletion_vector"; + default: + return "unknown"; + } + } + @Override public List getSplits(int numBackends) throws UserException { try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index eecddd31b446c5..8a97bc6f9203d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -274,6 +274,28 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) rangeDesc.setTableFormatParams(tableFormatFileDesc); } + @Override + protected List getDeleteFiles(TFileRangeDesc rangeDesc) { + List deleteFiles = new ArrayList<>(); + if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) { + return deleteFiles; + } + TTableFormatFileDesc tableFormatParams = rangeDesc.getTableFormatParams(); + if (tableFormatParams == null || !tableFormatParams.isSetPaimonParams()) { + return deleteFiles; + } + TPaimonFileDesc paimonParams = tableFormatParams.getPaimonParams(); + if (paimonParams == null || !paimonParams.isSetDeletionFile()) { + return deleteFiles; + } + TPaimonDeletionFileDesc deletionFile = paimonParams.getDeletionFile(); + if (deletionFile != null && deletionFile.isSetPath()) { + // Format: path [offset: offset, length: length] + deleteFiles.add(deletionFile.getPath()); + } + return deleteFiles; + } + @Override public List getSplits(int numBackends) throws UserException { boolean forceJniScanner = sessionVariable.isForceJniScanner(); diff --git a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy index 568bb632decbe9..adc97540665c37 100644 --- a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy +++ b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy @@ -142,6 +142,13 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock qt_count_5 """ select count(*) from orc_acid_major; """ //3 } + def test_explain_verbose = { + explain { + sql ("select count(*) from orc_full_acid") + verbose (true) + contains "deleteFileNum" + } + } String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled == null || !enabled.equalsIgnoreCase("true")) { @@ -177,6 +184,7 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock test_acid_count() + test_explain_verbose() q01_par_limit() diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy index d793cef3568ddb..91bc48a6758025 100644 --- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy @@ -165,6 +165,16 @@ suite("test_iceberg_position_delete", "p0,external,doris,external_docker,externa assertTrue(iceberg_position_gen_7.size() == 5632) // sql """drop catalog ${catalog_name}""" + + def test_explain_verbose = { + explain { + sql ("select name from iceberg_position_gen_data where id != 5;") + verbose (true) + contains "deleteFileNum" + } + } + test_explain_verbose() + } /* diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy index 71a4d971169ea8..76574aae528474 100644 --- a/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy +++ b/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy @@ -53,8 +53,17 @@ suite("test_paimon_deletion_vector_oss", "p0,external,doris,external_docker,exte qt_6 """select * from deletion_vector_parquet where id > 2 order by id;""" } + def test_explain_verbose = { + explain { + sql ("select * from deletion_vector_orc;") + verbose (true) + contains "deleteFileNum" + } + } + test_cases("false") test_cases("true") + test_explain_verbose() } finally { sql """set force_jni_scanner=false"""