From 423ea89dceb86fa6f45896b61f92af95e442f4ea Mon Sep 17 00:00:00 2001 From: Socrates Date: Thu, 4 Sep 2025 14:02:34 +0800 Subject: [PATCH] [fix](hudi) Fix querying hudi jni table where only partition columns (and no data fields) are required (#55466) ### What problem does this PR solve? This pull request introduces support for "virtual tables" in the Hudi JNI scanner, allowing the system to handle cases where only partition columns (and no data fields) are required from a query. The implementation ensures correct handling of empty field lists throughout the scanner and vector table logic, and adds regression tests for this scenario. --- .../doris/hudi/HadoopHudiJniScanner.java | 27 +++++++++++++------ .../doris/common/jni/vec/VectorTable.java | 16 +++++++++++ .../hudi/test_hudi_catalog.out | 4 +++ .../hudi/test_hudi_catalog.groovy | 10 +++++++ 4 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 regression-test/data/external_table_p2/hudi/test_hudi_catalog.out diff --git a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java index be7e31a115e9aa..5fa4c72951b634 100644 --- a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java +++ b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java @@ -111,8 +111,13 @@ public HadoopHudiJniScanner(int fetchSize, Map params) { this.hudiColumnNames = params.get("hudi_column_names"); this.hudiColumnTypes = params.get("hudi_column_types").split("#"); - this.requiredFields = params.get("required_fields").split(","); - + // Required fields will be empty when only partition fields are selected + // This is because partition fields are not stored in the data files + if (!params.get("required_fields").equals("")) { + this.requiredFields = params.get("required_fields").split(","); + } else { + this.requiredFields = new String[0]; + } this.fieldInspectors = new ObjectInspector[requiredFields.length]; this.structFields = new StructField[requiredFields.length]; this.fsOptionsProps = Maps.newHashMap(); @@ -166,14 +171,20 @@ public int getNext() throws IOException { if (!reader.next(key, value)) { break; } - Object rowData = deserializer.deserialize(value); - for (int i = 0; i < fields.length; i++) { - Object fieldData = rowInspector.getStructFieldData(rowData, structFields[i]); - columnValue.setRow(fieldData); - columnValue.setField(types[i], fieldInspectors[i]); - appendData(i, columnValue); + if (fields.length > 0) { + Object rowData = deserializer.deserialize(value); + for (int i = 0; i < fields.length; i++) { + Object fieldData = rowInspector.getStructFieldData(rowData, structFields[i]); + columnValue.setRow(fieldData); + columnValue.setField(types[i], fieldInspectors[i]); + appendData(i, columnValue); + } } } + // vectorTable is virtual + if (fields.length == 0) { + vectorTable.appendVirtualData(numRows); + } return numRows; }); } catch (Exception e) { diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java index a8849d1fa2044d..08fb2bc8b009c5 100644 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java +++ b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java @@ -34,6 +34,10 @@ public class VectorTable { private final VectorColumn meta; private final boolean onlyReadable; private final int numRowsOfReadable; + // this will be true if fields is empty + private final boolean isVirtual; + // count rows when isVirtual is true + private int numVirtualRows = 0; // Create writable vector table private VectorTable(ColumnType[] types, String[] fields, int capacity) { @@ -48,6 +52,7 @@ private VectorTable(ColumnType[] types, String[] fields, int capacity) { this.meta = VectorColumn.createWritableColumn(new ColumnType("#meta", Type.BIGINT), metaSize); this.onlyReadable = false; numRowsOfReadable = -1; + this.isVirtual = columns.length == 0; } // Create readable vector table @@ -69,6 +74,7 @@ private VectorTable(ColumnType[] types, String[] fields, long metaAddress) { this.meta = VectorColumn.createReadableColumn(metaAddress, metaSize, new ColumnType("#meta", Type.BIGINT)); this.onlyReadable = true; numRowsOfReadable = numRows; + this.isVirtual = columns.length == 0; } public static VectorTable createWritableTable(ColumnType[] types, String[] fields, int capacity) { @@ -111,16 +117,24 @@ public static VectorTable createReadableTable(Map params) { public void appendNativeData(int fieldId, NativeColumnValue o) { assert (!onlyReadable); + assert (!isVirtual); columns[fieldId].appendNativeValue(o); } public void appendData(int fieldId, ColumnValue o) { assert (!onlyReadable); + assert (!isVirtual); columns[fieldId].appendValue(o); } + public void appendVirtualData(int numRows) { + assert (isVirtual); + numVirtualRows += numRows; + } + public void appendData(int fieldId, Object[] batch, ColumnValueConverter converter, boolean isNullable) { assert (!onlyReadable); + assert (!isVirtual); if (converter != null) { columns[fieldId].appendObjectColumn(converter.convert(batch), isNullable); } else { @@ -190,6 +204,8 @@ public void releaseColumn(int fieldId) { public int getNumRows() { if (onlyReadable) { return numRowsOfReadable; + } else if (isVirtual) { + return numVirtualRows; } else { return columns[0].numRows(); } diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out b/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out new file mode 100644 index 00000000000000..3f97939043f321 --- /dev/null +++ b/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_only_partition_columns -- +2023-11-15 + diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy index ad0f8d25d63665..858c064c48a8f5 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy @@ -36,5 +36,15 @@ suite("test_hudi_catalog", "p2,external,hudi,external_remote,external_remote_hud sql """ set enable_fallback_to_original_planner=false """ def tables = sql """ show tables; """ assertTrue(tables.size() > 0) + try { + sql """ set force_jni_scanner = true; """ + qt_test_only_partition_columns """ + select signup_date from user_activity_log_mor_partition order by signup_date limit 1; + """ + } catch (Exception e) { + logger.error("Error occurred while executing query", e) + } finally { + sql """ set force_jni_scanner = false; """ + } sql """drop catalog if exists ${catalog_name};""" }