From e15819d244f8c15f768f76de51310371816a514a Mon Sep 17 00:00:00 2001 From: Socrates Date: Fri, 29 Aug 2025 13:53:01 +0800 Subject: [PATCH 1/3] fix --- .../java/org/apache/doris/hudi/HadoopHudiJniScanner.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java index be7e31a115e9aa..911cf3f76ed23b 100644 --- a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java +++ b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java @@ -111,8 +111,11 @@ public HadoopHudiJniScanner(int fetchSize, Map params) { this.hudiColumnNames = params.get("hudi_column_names"); this.hudiColumnTypes = params.get("hudi_column_types").split("#"); - this.requiredFields = params.get("required_fields").split(","); - + if (!params.get("required_fields").equals("")) { + this.requiredFields = params.get("required_fields").split(","); + } else { + this.requiredFields = new String[0]; + } this.fieldInspectors = new ObjectInspector[requiredFields.length]; this.structFields = new StructField[requiredFields.length]; this.fsOptionsProps = Maps.newHashMap(); From f64be866876b1177dee5f635a3a59ed68bd5fc30 Mon Sep 17 00:00:00 2001 From: Socrates Date: Fri, 29 Aug 2025 15:06:04 +0800 Subject: [PATCH 2/3] add cases --- .../doris/hudi/HadoopHudiJniScanner.java | 18 ++++++++++++------ .../doris/common/jni/vec/VectorTable.java | 16 ++++++++++++++++ .../hudi/test_hudi_catalog.out | 4 ++++ .../hudi/test_hudi_catalog.groovy | 10 ++++++++++ 4 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 regression-test/data/external_table_p2/hudi/test_hudi_catalog.out diff --git a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java index 911cf3f76ed23b..599356cf6780af 100644 --- a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java +++ b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java @@ -169,14 +169,20 @@ public int getNext() throws IOException { if (!reader.next(key, value)) { break; } - Object rowData = deserializer.deserialize(value); - for (int i = 0; i < fields.length; i++) { - Object fieldData = rowInspector.getStructFieldData(rowData, structFields[i]); - columnValue.setRow(fieldData); - columnValue.setField(types[i], fieldInspectors[i]); - appendData(i, columnValue); + if (fields.length > 0) { + Object rowData = deserializer.deserialize(value); + for (int i = 0; i < fields.length; i++) { + Object fieldData = rowInspector.getStructFieldData(rowData, structFields[i]); + columnValue.setRow(fieldData); + columnValue.setField(types[i], fieldInspectors[i]); + appendData(i, columnValue); + } } } + // vectorTable is virtual + if (fields.length == 0) { + vectorTable.appendVirtualData(numRows); + } return numRows; }); } catch (Exception e) { diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java index c94c95b3e8be31..fa8fa1908f348d 100644 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java +++ b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java @@ -37,6 +37,10 @@ public class VectorTable { private final VectorColumn meta; private final boolean onlyReadable; private final int numRowsOfReadable; + // this will be true if fields is empty + private final boolean isVirtual; + // count rows when isVirtual is true + private int numVirtualRows = 0; // Create writable vector table private VectorTable(ColumnType[] types, String[] fields, int capacity) { @@ -51,6 +55,7 @@ private VectorTable(ColumnType[] types, String[] fields, int capacity) { this.meta = VectorColumn.createWritableColumn(new ColumnType("#meta", Type.BIGINT), metaSize); this.onlyReadable = false; numRowsOfReadable = -1; + this.isVirtual = columns.length == 0; } // Create readable vector table @@ -72,6 +77,7 @@ private VectorTable(ColumnType[] types, String[] fields, long metaAddress) { this.meta = VectorColumn.createReadableColumn(metaAddress, metaSize, new ColumnType("#meta", Type.BIGINT)); this.onlyReadable = true; numRowsOfReadable = numRows; + this.isVirtual = columns.length == 0; } public static VectorTable createWritableTable(ColumnType[] types, String[] fields, int capacity) { @@ -114,16 +120,24 @@ public static VectorTable createReadableTable(Map params) { public void appendNativeData(int fieldId, NativeColumnValue o) { assert (!onlyReadable); + assert (!isVirtual); columns[fieldId].appendNativeValue(o); } public void appendData(int fieldId, ColumnValue o) { assert (!onlyReadable); + assert (!isVirtual); columns[fieldId].appendValue(o); } + public void appendVirtualData(int numRows) { + assert (isVirtual); + numVirtualRows += numRows; + } + public void appendData(int fieldId, Object[] batch, ColumnValueConverter converter, boolean isNullable) { assert (!onlyReadable); + assert (!isVirtual); if (converter != null) { columns[fieldId].appendObjectColumn(converter.convert(batch), isNullable); } else { @@ -193,6 +207,8 @@ public void releaseColumn(int fieldId) { public int getNumRows() { if (onlyReadable) { return numRowsOfReadable; + } else if (isVirtual) { + return numVirtualRows; } else { return columns[0].numRows(); } diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out b/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out new file mode 100644 index 00000000000000..3f97939043f321 --- /dev/null +++ b/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_only_partition_columns -- +2023-11-15 + diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy index ad0f8d25d63665..858c064c48a8f5 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy @@ -36,5 +36,15 @@ suite("test_hudi_catalog", "p2,external,hudi,external_remote,external_remote_hud sql """ set enable_fallback_to_original_planner=false """ def tables = sql """ show tables; """ assertTrue(tables.size() > 0) + try { + sql """ set force_jni_scanner = true; """ + qt_test_only_partition_columns """ + select signup_date from user_activity_log_mor_partition order by signup_date limit 1; + """ + } catch (Exception e) { + logger.error("Error occurred while executing query", e) + } finally { + sql """ set force_jni_scanner = false; """ + } sql """drop catalog if exists ${catalog_name};""" } From 15f001b0c610bcb467980b09086a2d9306241d2a Mon Sep 17 00:00:00 2001 From: Socrates Date: Tue, 2 Sep 2025 10:27:58 +0800 Subject: [PATCH 3/3] add comment --- .../main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java index 599356cf6780af..5fa4c72951b634 100644 --- a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java +++ b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java @@ -111,6 +111,8 @@ public HadoopHudiJniScanner(int fetchSize, Map params) { this.hudiColumnNames = params.get("hudi_column_names"); this.hudiColumnTypes = params.get("hudi_column_types").split("#"); + // Required fields will be empty when only partition fields are selected + // This is because partition fields are not stored in the data files if (!params.get("required_fields").equals("")) { this.requiredFields = params.get("required_fields").split(","); } else {