diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 8e51fac6df1628..b878770d25d87e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -37,7 +37,6 @@ import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.SchemaCacheValue; import org.apache.doris.datasource.TablePartitionValues; -import org.apache.doris.datasource.hudi.HudiMvccSnapshot; import org.apache.doris.datasource.hudi.HudiSchemaCacheKey; import org.apache.doris.datasource.hudi.HudiSchemaCacheValue; import org.apache.doris.datasource.hudi.HudiUtils; @@ -1143,7 +1142,7 @@ public void beforeMTMVRefresh(MTMV mtmv) throws DdlException { @Override public MvccSnapshot loadSnapshot(Optional tableSnapshot, Optional scanParams) { if (getDlaType() == DLAType.HUDI) { - return new HudiMvccSnapshot(HudiUtils.getPartitionValues(tableSnapshot, this)); + return HudiUtils.getHudiMvccSnapshot(tableSnapshot, this); } else if (getDlaType() == DLAType.ICEBERG) { return new IcebergMvccSnapshot( IcebergUtils.getIcebergSnapshotCacheValue( diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HudiDlaTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HudiDlaTable.java index fe1cbf5c0cecb4..9a2a99093b3f1a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HudiDlaTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HudiDlaTable.java @@ -102,11 +102,6 @@ public boolean isPartitionColumnAllowNull() { return true; } - public HMSSchemaCacheValue getHudiSchemaCacheValue(Optional snapshot) { - TablePartitionValues snapshotCacheValue = getOrFetchHudiSnapshotCacheValue(snapshot); - return getHudiSchemaCacheValue(snapshotCacheValue.getLastUpdateTimestamp()); - } - private TablePartitionValues getOrFetchHudiSnapshotCacheValue(Optional snapshot) { if (snapshot.isPresent()) { return ((HudiMvccSnapshot) snapshot.get()).getTablePartitionValues(); @@ -115,6 +110,16 @@ private TablePartitionValues getOrFetchHudiSnapshotCacheValue(Optional snapshot) { + long timestamp = 0L; + if (snapshot.isPresent()) { + timestamp = ((HudiMvccSnapshot) snapshot.get()).getTimestamp(); + } else { + timestamp = HudiUtils.getLastTimeStamp(hmsTable); + } + return getHudiSchemaCacheValue(timestamp); + } + private HMSSchemaCacheValue getHudiSchemaCacheValue(long timestamp) { ExternalSchemaCache cache = Env.getCurrentEnv().getExtMetaCacheMgr().getSchemaCache(hmsTable.getCatalog()); Optional schemaCacheValue = cache.getSchemaValue( diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiMvccSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiMvccSnapshot.java index 0f01291e54c374..8821d113a7f5ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiMvccSnapshot.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiMvccSnapshot.java @@ -27,6 +27,7 @@ */ public class HudiMvccSnapshot implements MvccSnapshot { private final TablePartitionValues tablePartitionValues; + private final long timestamp; /** * Creates a new HudiMvccSnapshot with the specified partition values. @@ -34,13 +35,18 @@ public class HudiMvccSnapshot implements MvccSnapshot { * @param tablePartitionValues The partition values for the snapshot * @throws IllegalArgumentException if tablePartitionValues is null */ - public HudiMvccSnapshot(TablePartitionValues tablePartitionValues) { + public HudiMvccSnapshot(TablePartitionValues tablePartitionValues, Long timeStamp) { if (tablePartitionValues == null) { throw new IllegalArgumentException("TablePartitionValues cannot be null"); } + this.timestamp = timeStamp; this.tablePartitionValues = tablePartitionValues; } + public long getTimestamp() { + return timestamp; + } + /** * Gets the table partition values associated with this snapshot. * diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java index d34ea7fd0a23d4..1455c23b44c833 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java @@ -255,6 +255,29 @@ private static Type handleUnionType(Schema avroSchema) { return Type.UNSUPPORTED; } + public static HudiMvccSnapshot getHudiMvccSnapshot(Optional tableSnapshot, + HMSExternalTable hmsTable) { + long timestamp = 0L; + if (tableSnapshot.isPresent()) { + String queryInstant = tableSnapshot.get().getValue().replaceAll("[-: ]", ""); + timestamp = Long.parseLong(queryInstant); + } else { + timestamp = getLastTimeStamp(hmsTable); + } + + return new HudiMvccSnapshot(HudiUtils.getPartitionValues(tableSnapshot, hmsTable), timestamp); + } + + public static long getLastTimeStamp(HMSExternalTable hmsTable) { + HoodieTableMetaClient hudiClient = hmsTable.getHudiClient(); + HoodieTimeline timeline = hudiClient.getCommitsAndCompactionTimeline().filterCompletedInstants(); + Option snapshotInstant = timeline.lastInstant(); + if (!snapshotInstant.isPresent()) { + return 0L; + } + return Long.parseLong(snapshotInstant.get().getTimestamp()); + } + public static TablePartitionValues getPartitionValues(Optional tableSnapshot, HMSExternalTable hmsTable) { TablePartitionValues partitionValues = new TablePartitionValues(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java index ef921cbfa47a3a..f302458f8cae37 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java @@ -143,10 +143,6 @@ public TablePartitionValues getPartitionValues(HMSExternalTable table, HoodieTab partitionValues.writeLock().lock(); try { - long lastUpdateTimestamp = partitionValues.getLastUpdateTimestamp(); - if (lastTimestamp <= lastUpdateTimestamp) { - return partitionValues; - } HMSExternalCatalog catalog = (HMSExternalCatalog) table.getCatalog(); List partitionNames; if (useHiveSyncPartition) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index ba8421f9a160ab..0d537dfea17170 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -56,11 +56,13 @@ import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.util.Option; import org.apache.hudi.internal.schema.InternalSchema; +import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter; import org.apache.hudi.storage.StoragePath; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -277,8 +279,20 @@ private void setHudiParams(TFileRangeDesc rangeDesc, HudiSplit hudiSplit) { .getCommitInstantInternalSchema(hudiClient, commitInstantTime); params.history_schema_info.computeIfAbsent( internalSchema.schemaId(), - k -> HudiUtils.getSchemaInfo(internalSchema)); - fileDesc.setSchemaId(internalSchema.schemaId()); //for schema change. (native reader) + k -> HudiUtils.getSchemaInfo(internalSchema)); //for schema change. (native reader) + fileDesc.setSchemaId(internalSchema.schemaId()); + } else { + try { + TableSchemaResolver schemaUtil = new TableSchemaResolver(hudiClient); + InternalSchema internalSchema = + AvroInternalSchemaConverter.convert(schemaUtil.getTableAvroSchema(true)); + params.history_schema_info.computeIfAbsent( + internalSchema.schemaId(), + k -> HudiUtils.getSchemaInfo(internalSchema)); // Handle column name case for BE. + fileDesc.setSchemaId(internalSchema.schemaId()); + } catch (Exception e) { + throw new RuntimeException("Cannot get hudi table schema.", e); + } } } tableFormatFileDesc.setHudiParams(fileDesc); diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_schema_change.out b/regression-test/data/external_table_p2/hudi/test_hudi_schema_change.out index 13c9f5535646f9..50ae6ba756a3ee 100644 --- a/regression-test/data/external_table_p2/hudi/test_hudi_schema_change.out +++ b/regression-test/data/external_table_p2/hudi/test_hudi_schema_change.out @@ -214,22 +214,22 @@ 10 101.1 Judy Austin \N -- !orc_time_travel -- -20250314162744620 20250314162744620_0_0 1 84ab609d-947a-4a24-af9a-6360348cf977-0_0-80-105_20250314162744620.parquet 1 \N Alice \N \N -20250314162747350 20250314162747350_0_0 2 07b6bd0b-0f2c-4500-a8c8-75d3cd90e85e-0_0-88-112_20250314162747350.parquet 2 \N Bob \N \N -20250314162759470 20250314162759470_0_0 3 58382f07-0dca-431d-ad4d-d5d94140d60f-0_0-96-119_20250314162759470.parquet 3 \N Charlie New York \N -20250314162804702 20250314162804702_0_0 4 05d28f5c-acc5-4530-8163-c82bdf96b720-0_0-104-126_20250314162804702.parquet 4 \N David Los Angeles \N -20250314162809486 20250314162809486_0_0 5 9164c294-2606-4537-bb84-e7ba4dbb98e5-0_0-112-133_20250314162809486.parquet 5 \N Eve Chicago \N -20250314162813019 20250314162813019_0_0 6 43b432a3-3581-439b-83b6-6c171bd6492a-0_0-120-140_20250314162813019.parquet 6 85.5 Frank San Francisco \N -20250314162814849 20250314162814849_0_0 7 28ad4dfc-07ae-4108-926e-7ba35b1ac5ce-0_0-130-149_20250314162814849.parquet 7 90.0 Grace Seattle \N -20250314162817433 20250314162817433_0_0 8 a07d9dfb-791a-4cdc-bc7c-5f0d0d0d6a77-0_0-142-160_20250314162817433.parquet 8 95.5 Heidi Portland \N +20250314162744620 20250314162744620_0_0 1 84ab609d-947a-4a24-af9a-6360348cf977-0_0-80-105_20250314162744620.parquet 1 \N \N Alice +20250314162747350 20250314162747350_0_0 2 07b6bd0b-0f2c-4500-a8c8-75d3cd90e85e-0_0-88-112_20250314162747350.parquet 2 \N \N Bob +20250314162759470 20250314162759470_0_0 3 58382f07-0dca-431d-ad4d-d5d94140d60f-0_0-96-119_20250314162759470.parquet 3 New York \N Charlie +20250314162804702 20250314162804702_0_0 4 05d28f5c-acc5-4530-8163-c82bdf96b720-0_0-104-126_20250314162804702.parquet 4 Los Angeles \N David +20250314162809486 20250314162809486_0_0 5 9164c294-2606-4537-bb84-e7ba4dbb98e5-0_0-112-133_20250314162809486.parquet 5 Chicago \N Eve +20250314162813019 20250314162813019_0_0 6 43b432a3-3581-439b-83b6-6c171bd6492a-0_0-120-140_20250314162813019.parquet 6 San Francisco 85.5 Frank +20250314162814849 20250314162814849_0_0 7 28ad4dfc-07ae-4108-926e-7ba35b1ac5ce-0_0-130-149_20250314162814849.parquet 7 Seattle 90.0 Grace +20250314162817433 20250314162817433_0_0 8 a07d9dfb-791a-4cdc-bc7c-5f0d0d0d6a77-0_0-142-160_20250314162817433.parquet 8 Portland 95.5 Heidi -- !parquet_time_travel -- -20250314163405965 20250314163405965_0_0 1 193e809e-9620-412e-ab3f-c408a84129ca-0_0-191-205_20250314163405965.parquet 1 \N Alice \N \N -20250314163409045 20250314163409045_0_0 2 d47ed400-2407-4ec3-a3ae-1bb8251edba1-0_0-199-212_20250314163409045.parquet 2 \N Bob \N \N -20250314163412409 20250314163412409_0_0 3 d82c289c-ffcb-4806-b893-d10d4ffe185e-0_0-207-219_20250314163412409.parquet 3 \N Charlie New York \N -20250314163416966 20250314163416966_0_0 4 b0c5e6d8-b9fd-4532-9a55-b65185719b84-0_0-215-226_20250314163416966.parquet 4 \N David Los Angeles \N -20250314163421827 20250314163421827_0_0 5 33648978-cbee-455a-a382-f40744a11509-0_0-223-233_20250314163421827.parquet 5 \N Eve Chicago \N -20250314163425482 20250314163425482_0_0 6 ce12666a-5f10-488c-a143-069d2b478922-0_0-231-240_20250314163425482.parquet 6 85.5 Frank San Francisco \N +20250314163405965 20250314163405965_0_0 1 193e809e-9620-412e-ab3f-c408a84129ca-0_0-191-205_20250314163405965.parquet 1 \N Alice \N +20250314163409045 20250314163409045_0_0 2 d47ed400-2407-4ec3-a3ae-1bb8251edba1-0_0-199-212_20250314163409045.parquet 2 \N Bob \N +20250314163412409 20250314163412409_0_0 3 d82c289c-ffcb-4806-b893-d10d4ffe185e-0_0-207-219_20250314163412409.parquet 3 \N Charlie New York +20250314163416966 20250314163416966_0_0 4 b0c5e6d8-b9fd-4532-9a55-b65185719b84-0_0-215-226_20250314163416966.parquet 4 \N David Los Angeles +20250314163421827 20250314163421827_0_0 5 33648978-cbee-455a-a382-f40744a11509-0_0-223-233_20250314163421827.parquet 5 \N Eve Chicago +20250314163425482 20250314163425482_0_0 6 ce12666a-5f10-488c-a143-069d2b478922-0_0-231-240_20250314163425482.parquet 6 85.5 Frank San Francisco -- !parquet_inc_1 -- 20250314163425482 20250314163425482_0_0 6 ce12666a-5f10-488c-a143-069d2b478922-0_0-231-240_20250314163425482.parquet 6 85.5 Frank San Francisco \N diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_schema_change.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_schema_change.groovy index 648a4079a6eaed..3b9b7a2dde9691 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_schema_change.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_schema_change.groovy @@ -63,7 +63,7 @@ suite("test_hudi_schema_change", "p2,external,hudi,external_remote,external_remo } - qt_orc_time_travel """ select * from hudi_sc_orc_cow FOR TIME AS OF "20250314162817433_0_0" order by id; """ //1-8 + qt_orc_time_travel """ select * from hudi_sc_orc_cow FOR TIME AS OF "20250314162817433" order by id; """ //1-8 qt_parquet_time_travel """ select * from hudi_sc_parquet_cow FOR TIME AS OF "20250314163425482" order by id; """//1-6 qt_parquet_inc_1 """ SELECT * from hudi_sc_parquet_cow@incr('beginTime'='20250314163421827') order by id; """