From 3f5ab02f7c3e972de0b58e8f703d91b74b716ec9 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Thu, 25 Jan 2024 11:02:52 +0800 Subject: [PATCH 1/3] fix date --- .../external/iceberg/IcebergScanNode.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanNode.java index ae3efccadc5ff1..a335ccfa021477 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanNode.java @@ -69,6 +69,9 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.TableScanUtil; import java.io.IOException; @@ -220,11 +223,23 @@ private List doGetSplits() throws UserException { List partitionValues = new ArrayList<>(); if (isPartitionedTable) { StructLike structLike = splitTask.file().partition(); + List fields = splitTask.spec().fields(); + Types.StructType structType = icebergTable.schema().asStruct(); // set partitionValue for this IcebergSplit for (int i = 0; i < structLike.size(); i++) { - String partition = String.valueOf(structLike.get(i, Object.class)); - partitionValues.add(partition); + Object obj = structLike.get(i, Object.class); + String value = String.valueOf(obj); + PartitionField partitionField = fields.get(i); + if (partitionField.transform().isIdentity()) { + Type type = structType.fieldType(partitionField.name()); + if (type != null && type.typeId().equals(Type.TypeID.DATE)) { + // iceberg use integer to store date, + // we need transform it to string + value = DateTimeUtil.daysToIsoDate((Integer) obj); + } + } + partitionValues.add(value); } // Counts the number of partitions read From 6173b0a8a3c95e0970070f65dadc178681ae0a2b Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Thu, 25 Jan 2024 11:02:52 +0800 Subject: [PATCH 2/3] fix date --- .../external/iceberg/IcebergScanNode.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanNode.java index ae3efccadc5ff1..a335ccfa021477 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanNode.java @@ -69,6 +69,9 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.TableScanUtil; import java.io.IOException; @@ -220,11 +223,23 @@ private List doGetSplits() throws UserException { List partitionValues = new ArrayList<>(); if (isPartitionedTable) { StructLike structLike = splitTask.file().partition(); + List fields = splitTask.spec().fields(); + Types.StructType structType = icebergTable.schema().asStruct(); // set partitionValue for this IcebergSplit for (int i = 0; i < structLike.size(); i++) { - String partition = String.valueOf(structLike.get(i, Object.class)); - partitionValues.add(partition); + Object obj = structLike.get(i, Object.class); + String value = String.valueOf(obj); + PartitionField partitionField = fields.get(i); + if (partitionField.transform().isIdentity()) { + Type type = structType.fieldType(partitionField.name()); + if (type != null && type.typeId().equals(Type.TypeID.DATE)) { + // iceberg use integer to store date, + // we need transform it to string + value = DateTimeUtil.daysToIsoDate((Integer) obj); + } + } + partitionValues.add(value); } // Counts the number of partitions read From 197ee0e8fac02d48ef2bda25b70c28224b63a3b0 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Sun, 28 Jan 2024 22:23:04 +0800 Subject: [PATCH 3/3] add case --- ...est_external_catalog_iceberg_partition.out | 33 +++++++++++++++++++ ..._external_catalog_iceberg_partition.groovy | 21 ++++++++++++ 2 files changed, 54 insertions(+) diff --git a/regression-test/data/external_table_p2/iceberg/test_external_catalog_iceberg_partition.out b/regression-test/data/external_table_p2/iceberg/test_external_catalog_iceberg_partition.out index c2582691ccb29a..a0fd39be5f2945 100644 --- a/regression-test/data/external_table_p2/iceberg/test_external_catalog_iceberg_partition.out +++ b/regression-test/data/external_table_p2/iceberg/test_external_catalog_iceberg_partition.out @@ -71,3 +71,36 @@ -- !q09 -- 100 0.3 test3 2023-01-03T00:00 +-- !q01 -- +1 true 2020-01-02 2020-01-02T01:02:03.123456 + +-- !q02 -- +1 true 2020-01-02 2020-01-02T01:02:03.123456 + +-- !q03 -- +1 true 2020-01-02 2020-01-02T01:02:03.123456 + +-- !q04 -- +1 true 2020-01-02 2020-01-02T01:02:03.123456 + +-- !q05 -- +1 true 2020-01-02 2020-01-02T01:02:03.123456 + +-- !q06 -- +1 2020-01-02 + +-- !q07 -- +1 2020-01-02 + +-- !q08 -- +1 2020-01-02 + +-- !q09 -- +1 2020-01-02 + +-- !q10 -- +1 2020-01-02 + +-- !q11 -- +1 2020-01-02 + diff --git a/regression-test/suites/external_table_p2/iceberg/test_external_catalog_iceberg_partition.groovy b/regression-test/suites/external_table_p2/iceberg/test_external_catalog_iceberg_partition.groovy index 9429887e8c020d..dfdd923bcc4b01 100644 --- a/regression-test/suites/external_table_p2/iceberg/test_external_catalog_iceberg_partition.groovy +++ b/regression-test/suites/external_table_p2/iceberg/test_external_catalog_iceberg_partition.groovy @@ -55,9 +55,30 @@ suite("test_external_catalog_iceberg_partition", "p2,external,iceberg,external_r qt_q08 """ select * from iceberg_catalog.orc_partitioned_truncate_and_fields where t_int is null order by t_float """ qt_q09 """ select * from iceberg_catalog.orc_partitioned_truncate_and_fields where t_int is not null order by t_float """ } + + // test date for partition and predict + def q01_date = { + + qt_q01 """ select * from user_case_date_without_partition where d = '2020-01-02' """ + qt_q02 """ select * from user_case_date_without_partition where d > '2020-01-01' """ + qt_q03 """ select * from user_case_date_without_partition where d < '2020-01-03' """ + qt_q04 """ select * from user_case_date_without_partition where ts < '2020-01-03' """ + qt_q05 """ select * from user_case_date_without_partition where ts > '2020-01-01' """ + + qt_q06 """ select * from user_case_date_with_date_partition where d = '2020-01-02' """ + qt_q07 """ select * from user_case_date_with_date_partition where d < '2020-01-03' """ + qt_q08 """ select * from user_case_date_with_date_partition where d > '2020-01-01' """ + + qt_q09 """ select * from user_case_date_with_days_date_partition where d = '2020-01-02' """ + qt_q10 """ select * from user_case_date_with_days_date_partition where d < '2020-01-03' """ + qt_q11 """ select * from user_case_date_with_days_date_partition where d > '2020-01-01' """ + + } + sql """ use `iceberg_catalog`; """ q01_parquet() q01_orc() + q01_date() } }