Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@

import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -99,6 +101,17 @@ public long getTotalFileSize() {
return totalFileSize;
}

/**
* Get all delete files for the given file range.
* @param rangeDesc the file range descriptor
* @return list of delete file paths (formatted strings)
*/
protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
// Default implementation: return empty list
// Subclasses should override this method
return Collections.emptyList();
}

@Override
public String getNodeExplainString(String prefix, TExplainLevel detailLevel) {
StringBuilder output = new StringBuilder();
Expand Down Expand Up @@ -139,6 +152,21 @@ public int compare(TFileRangeDesc o1, TFileRangeDesc o2) {
return Long.compare(o1.getStartOffset(), o2.getStartOffset());
}
});

// A Data file may be divided into different splits, so a set is used to remove duplicates.
Set<String> dataFilesSet = new HashSet<>();
// A delete file might be used by multiple data files, so use set to remove duplicates.
Set<String> deleteFilesSet = new HashSet<>();
// You can estimate how many delete splits need to be read for a data split
// using deleteSplitNum / dataSplitNum(fileRangeDescs.size()) split.
long deleteSplitNum = 0;
for (TFileRangeDesc fileRangeDesc : fileRangeDescs) {
dataFilesSet.add(fileRangeDesc.getPath());
List<String> deletefiles = getDeleteFiles(fileRangeDesc);
deleteFilesSet.addAll(deletefiles);
deleteSplitNum += deletefiles.size();
}

// 3. if size <= 4, print all. if size > 4, print first 3 and last 1
int size = fileRangeDescs.size();
if (size <= 4) {
Expand All @@ -164,6 +192,10 @@ public int compare(TFileRangeDesc o1, TFileRangeDesc o2) {
.append(" length: ").append(file.getSize())
.append("\n");
}
output.append(prefix).append(" ").append("dataFileNum=").append(dataFilesSet.size())
.append(", deleteFileNum=").append(deleteFilesSet.size())
.append(", deleteSplitNum=").append(deleteSplitNum)
.append("\n");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,37 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) {
}
}

@Override
protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
List<String> deleteFiles = new ArrayList<>();
if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
return deleteFiles;
}
TTableFormatFileDesc tableFormatParams = rangeDesc.getTableFormatParams();
if (tableFormatParams == null || !tableFormatParams.isSetTransactionalHiveParams()) {
return deleteFiles;
}
TTransactionalHiveDesc hiveParams = tableFormatParams.getTransactionalHiveParams();
if (hiveParams == null || !hiveParams.isSetDeleteDeltas()) {
return deleteFiles;
}
List<TTransactionalHiveDeleteDeltaDesc> deleteDeltas = hiveParams.getDeleteDeltas();
if (deleteDeltas == null) {
return deleteFiles;
}
// Format: {directory_location}/{file_name}
for (TTransactionalHiveDeleteDeltaDesc deleteDelta : deleteDeltas) {
if (deleteDelta != null && deleteDelta.isSetDirectoryLocation()
&& deleteDelta.isSetFileNames() && deleteDelta.getFileNames() != null) {
String directoryLocation = deleteDelta.getDirectoryLocation();
for (String fileName : deleteDelta.getFileNames()) {
deleteFiles.add(directoryLocation + "/" + fileName);
}
}
}
return deleteFiles;
}

@Override
protected Map<String, String> getLocationProperties() {
return hmsTable.getBackendStorageProperties();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,46 @@ private void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSpli
rangeDesc.setTableFormatParams(tableFormatFileDesc);
}

@Override
protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
List<String> deleteFiles = new ArrayList<>();
if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
return deleteFiles;
}
TTableFormatFileDesc tableFormatParams = rangeDesc.getTableFormatParams();
if (tableFormatParams == null || !tableFormatParams.isSetIcebergParams()) {
return deleteFiles;
}
TIcebergFileDesc icebergParams = tableFormatParams.getIcebergParams();
if (icebergParams == null || !icebergParams.isSetDeleteFiles()) {
return deleteFiles;
}
List<TIcebergDeleteFileDesc> icebergDeleteFiles = icebergParams.getDeleteFiles();
if (icebergDeleteFiles == null) {
return deleteFiles;
}
for (TIcebergDeleteFileDesc deleteFile : icebergDeleteFiles) {
if (deleteFile != null && deleteFile.isSetPath()) {
deleteFiles.add(deleteFile.getPath());
}
}
return deleteFiles;
}

private String getDeleteFileContentType(int content) {
// Iceberg file type: 0: data, 1: position delete, 2: equality delete, 3: deletion vector
switch (content) {
case 1:
return "position_delete";
case 2:
return "equality_delete";
case 3:
return "deletion_vector";
default:
return "unknown";
}
}

@Override
public List<Split> getSplits(int numBackends) throws UserException {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,28 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit)
rangeDesc.setTableFormatParams(tableFormatFileDesc);
}

@Override
protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
List<String> deleteFiles = new ArrayList<>();
if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
return deleteFiles;
}
TTableFormatFileDesc tableFormatParams = rangeDesc.getTableFormatParams();
if (tableFormatParams == null || !tableFormatParams.isSetPaimonParams()) {
return deleteFiles;
}
TPaimonFileDesc paimonParams = tableFormatParams.getPaimonParams();
if (paimonParams == null || !paimonParams.isSetDeletionFile()) {
return deleteFiles;
}
TPaimonDeletionFileDesc deletionFile = paimonParams.getDeletionFile();
if (deletionFile != null && deletionFile.isSetPath()) {
// Format: path [offset: offset, length: length]
deleteFiles.add(deletionFile.getPath());
}
return deleteFiles;
}

@Override
public List<Split> getSplits(int numBackends) throws UserException {
boolean forceJniScanner = sessionVariable.isForceJniScanner();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock
qt_count_5 """ select count(*) from orc_acid_major; """ //3
}

def test_explain_verbose = {
explain {
sql ("select count(*) from orc_full_acid")
verbose (true)
contains "deleteFileNum"
}
}

String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
Expand Down Expand Up @@ -177,6 +184,7 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock


test_acid_count()
test_explain_verbose()

q01_par_limit()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,16 @@ suite("test_iceberg_position_delete", "p0,external,doris,external_docker,externa
assertTrue(iceberg_position_gen_7.size() == 5632)

// sql """drop catalog ${catalog_name}"""

def test_explain_verbose = {
explain {
sql ("select name from iceberg_position_gen_data where id != 5;")
verbose (true)
contains "deleteFileNum"
}
}
test_explain_verbose()

}
/*

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,17 @@ suite("test_paimon_deletion_vector_oss", "p0,external,doris,external_docker,exte
qt_6 """select * from deletion_vector_parquet where id > 2 order by id;"""
}

def test_explain_verbose = {
explain {
sql ("select * from deletion_vector_orc;")
verbose (true)
contains "deleteFileNum"
}
}

test_cases("false")
test_cases("true")
test_explain_verbose()

} finally {
sql """set force_jni_scanner=false"""
Expand Down
Loading