Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.SchemaCacheValue;
import org.apache.doris.datasource.TablePartitionValues;
import org.apache.doris.datasource.hudi.HudiMvccSnapshot;
import org.apache.doris.datasource.hudi.HudiSchemaCacheKey;
import org.apache.doris.datasource.hudi.HudiSchemaCacheValue;
import org.apache.doris.datasource.hudi.HudiUtils;
Expand Down Expand Up @@ -1143,7 +1142,7 @@ public void beforeMTMVRefresh(MTMV mtmv) throws DdlException {
@Override
public MvccSnapshot loadSnapshot(Optional<TableSnapshot> tableSnapshot, Optional<TableScanParams> scanParams) {
if (getDlaType() == DLAType.HUDI) {
return new HudiMvccSnapshot(HudiUtils.getPartitionValues(tableSnapshot, this));
return HudiUtils.getHudiMvccSnapshot(tableSnapshot, this);
} else if (getDlaType() == DLAType.ICEBERG) {
return new IcebergMvccSnapshot(
IcebergUtils.getIcebergSnapshotCacheValue(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,6 @@ public boolean isPartitionColumnAllowNull() {
return true;
}

public HMSSchemaCacheValue getHudiSchemaCacheValue(Optional<MvccSnapshot> snapshot) {
TablePartitionValues snapshotCacheValue = getOrFetchHudiSnapshotCacheValue(snapshot);
return getHudiSchemaCacheValue(snapshotCacheValue.getLastUpdateTimestamp());
}

private TablePartitionValues getOrFetchHudiSnapshotCacheValue(Optional<MvccSnapshot> snapshot) {
if (snapshot.isPresent()) {
return ((HudiMvccSnapshot) snapshot.get()).getTablePartitionValues();
Expand All @@ -115,6 +110,16 @@ private TablePartitionValues getOrFetchHudiSnapshotCacheValue(Optional<MvccSnaps
}
}

public HMSSchemaCacheValue getHudiSchemaCacheValue(Optional<MvccSnapshot> snapshot) {
long timestamp = 0L;
if (snapshot.isPresent()) {
timestamp = ((HudiMvccSnapshot) snapshot.get()).getTimestamp();
} else {
timestamp = HudiUtils.getLastTimeStamp(hmsTable);
}
return getHudiSchemaCacheValue(timestamp);
}

private HMSSchemaCacheValue getHudiSchemaCacheValue(long timestamp) {
ExternalSchemaCache cache = Env.getCurrentEnv().getExtMetaCacheMgr().getSchemaCache(hmsTable.getCatalog());
Optional<SchemaCacheValue> schemaCacheValue = cache.getSchemaValue(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,26 @@
*/
public class HudiMvccSnapshot implements MvccSnapshot {
private final TablePartitionValues tablePartitionValues;
private final long timestamp;

/**
* Creates a new HudiMvccSnapshot with the specified partition values.
*
* @param tablePartitionValues The partition values for the snapshot
* @throws IllegalArgumentException if tablePartitionValues is null
*/
public HudiMvccSnapshot(TablePartitionValues tablePartitionValues) {
public HudiMvccSnapshot(TablePartitionValues tablePartitionValues, Long timeStamp) {
if (tablePartitionValues == null) {
throw new IllegalArgumentException("TablePartitionValues cannot be null");
}
this.timestamp = timeStamp;
this.tablePartitionValues = tablePartitionValues;
}

public long getTimestamp() {
return timestamp;
}

/**
* Gets the table partition values associated with this snapshot.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,29 @@ private static Type handleUnionType(Schema avroSchema) {
return Type.UNSUPPORTED;
}

public static HudiMvccSnapshot getHudiMvccSnapshot(Optional<TableSnapshot> tableSnapshot,
HMSExternalTable hmsTable) {
long timestamp = 0L;
if (tableSnapshot.isPresent()) {
String queryInstant = tableSnapshot.get().getValue().replaceAll("[-: ]", "");
timestamp = Long.parseLong(queryInstant);
} else {
timestamp = getLastTimeStamp(hmsTable);
}

return new HudiMvccSnapshot(HudiUtils.getPartitionValues(tableSnapshot, hmsTable), timestamp);
}

public static long getLastTimeStamp(HMSExternalTable hmsTable) {
HoodieTableMetaClient hudiClient = hmsTable.getHudiClient();
HoodieTimeline timeline = hudiClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
Option<HoodieInstant> snapshotInstant = timeline.lastInstant();
if (!snapshotInstant.isPresent()) {
return 0L;
}
return Long.parseLong(snapshotInstant.get().getTimestamp());
}

public static TablePartitionValues getPartitionValues(Optional<TableSnapshot> tableSnapshot,
HMSExternalTable hmsTable) {
TablePartitionValues partitionValues = new TablePartitionValues();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,6 @@ public TablePartitionValues getPartitionValues(HMSExternalTable table, HoodieTab

partitionValues.writeLock().lock();
try {
long lastUpdateTimestamp = partitionValues.getLastUpdateTimestamp();
if (lastTimestamp <= lastUpdateTimestamp) {
return partitionValues;
}
HMSExternalCatalog catalog = (HMSExternalCatalog) table.getCatalog();
List<String> partitionNames;
if (useHiveSyncPartition) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,13 @@
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.internal.schema.InternalSchema;
import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter;
import org.apache.hudi.storage.StoragePath;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -277,8 +279,20 @@ private void setHudiParams(TFileRangeDesc rangeDesc, HudiSplit hudiSplit) {
.getCommitInstantInternalSchema(hudiClient, commitInstantTime);
params.history_schema_info.computeIfAbsent(
internalSchema.schemaId(),
k -> HudiUtils.getSchemaInfo(internalSchema));
fileDesc.setSchemaId(internalSchema.schemaId()); //for schema change. (native reader)
k -> HudiUtils.getSchemaInfo(internalSchema)); //for schema change. (native reader)
fileDesc.setSchemaId(internalSchema.schemaId());
} else {
try {
TableSchemaResolver schemaUtil = new TableSchemaResolver(hudiClient);
InternalSchema internalSchema =
AvroInternalSchemaConverter.convert(schemaUtil.getTableAvroSchema(true));
params.history_schema_info.computeIfAbsent(
internalSchema.schemaId(),
k -> HudiUtils.getSchemaInfo(internalSchema)); // Handle column name case for BE.
fileDesc.setSchemaId(internalSchema.schemaId());
} catch (Exception e) {
throw new RuntimeException("Cannot get hudi table schema.", e);
}
}
}
tableFormatFileDesc.setHudiParams(fileDesc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,22 +214,22 @@
10 101.1 Judy Austin \N

-- !orc_time_travel --
20250314162744620 20250314162744620_0_0 1 84ab609d-947a-4a24-af9a-6360348cf977-0_0-80-105_20250314162744620.parquet 1 \N Alice \N \N
20250314162747350 20250314162747350_0_0 2 07b6bd0b-0f2c-4500-a8c8-75d3cd90e85e-0_0-88-112_20250314162747350.parquet 2 \N Bob \N \N
20250314162759470 20250314162759470_0_0 3 58382f07-0dca-431d-ad4d-d5d94140d60f-0_0-96-119_20250314162759470.parquet 3 \N Charlie New York \N
20250314162804702 20250314162804702_0_0 4 05d28f5c-acc5-4530-8163-c82bdf96b720-0_0-104-126_20250314162804702.parquet 4 \N David Los Angeles \N
20250314162809486 20250314162809486_0_0 5 9164c294-2606-4537-bb84-e7ba4dbb98e5-0_0-112-133_20250314162809486.parquet 5 \N Eve Chicago \N
20250314162813019 20250314162813019_0_0 6 43b432a3-3581-439b-83b6-6c171bd6492a-0_0-120-140_20250314162813019.parquet 6 85.5 Frank San Francisco \N
20250314162814849 20250314162814849_0_0 7 28ad4dfc-07ae-4108-926e-7ba35b1ac5ce-0_0-130-149_20250314162814849.parquet 7 90.0 Grace Seattle \N
20250314162817433 20250314162817433_0_0 8 a07d9dfb-791a-4cdc-bc7c-5f0d0d0d6a77-0_0-142-160_20250314162817433.parquet 8 95.5 Heidi Portland \N
20250314162744620 20250314162744620_0_0 1 84ab609d-947a-4a24-af9a-6360348cf977-0_0-80-105_20250314162744620.parquet 1 \N \N Alice
20250314162747350 20250314162747350_0_0 2 07b6bd0b-0f2c-4500-a8c8-75d3cd90e85e-0_0-88-112_20250314162747350.parquet 2 \N \N Bob
20250314162759470 20250314162759470_0_0 3 58382f07-0dca-431d-ad4d-d5d94140d60f-0_0-96-119_20250314162759470.parquet 3 New York \N Charlie
20250314162804702 20250314162804702_0_0 4 05d28f5c-acc5-4530-8163-c82bdf96b720-0_0-104-126_20250314162804702.parquet 4 Los Angeles \N David
20250314162809486 20250314162809486_0_0 5 9164c294-2606-4537-bb84-e7ba4dbb98e5-0_0-112-133_20250314162809486.parquet 5 Chicago \N Eve
20250314162813019 20250314162813019_0_0 6 43b432a3-3581-439b-83b6-6c171bd6492a-0_0-120-140_20250314162813019.parquet 6 San Francisco 85.5 Frank
20250314162814849 20250314162814849_0_0 7 28ad4dfc-07ae-4108-926e-7ba35b1ac5ce-0_0-130-149_20250314162814849.parquet 7 Seattle 90.0 Grace
20250314162817433 20250314162817433_0_0 8 a07d9dfb-791a-4cdc-bc7c-5f0d0d0d6a77-0_0-142-160_20250314162817433.parquet 8 Portland 95.5 Heidi

-- !parquet_time_travel --
20250314163405965 20250314163405965_0_0 1 193e809e-9620-412e-ab3f-c408a84129ca-0_0-191-205_20250314163405965.parquet 1 \N Alice \N \N
20250314163409045 20250314163409045_0_0 2 d47ed400-2407-4ec3-a3ae-1bb8251edba1-0_0-199-212_20250314163409045.parquet 2 \N Bob \N \N
20250314163412409 20250314163412409_0_0 3 d82c289c-ffcb-4806-b893-d10d4ffe185e-0_0-207-219_20250314163412409.parquet 3 \N Charlie New York \N
20250314163416966 20250314163416966_0_0 4 b0c5e6d8-b9fd-4532-9a55-b65185719b84-0_0-215-226_20250314163416966.parquet 4 \N David Los Angeles \N
20250314163421827 20250314163421827_0_0 5 33648978-cbee-455a-a382-f40744a11509-0_0-223-233_20250314163421827.parquet 5 \N Eve Chicago \N
20250314163425482 20250314163425482_0_0 6 ce12666a-5f10-488c-a143-069d2b478922-0_0-231-240_20250314163425482.parquet 6 85.5 Frank San Francisco \N
20250314163405965 20250314163405965_0_0 1 193e809e-9620-412e-ab3f-c408a84129ca-0_0-191-205_20250314163405965.parquet 1 \N Alice \N
20250314163409045 20250314163409045_0_0 2 d47ed400-2407-4ec3-a3ae-1bb8251edba1-0_0-199-212_20250314163409045.parquet 2 \N Bob \N
20250314163412409 20250314163412409_0_0 3 d82c289c-ffcb-4806-b893-d10d4ffe185e-0_0-207-219_20250314163412409.parquet 3 \N Charlie New York
20250314163416966 20250314163416966_0_0 4 b0c5e6d8-b9fd-4532-9a55-b65185719b84-0_0-215-226_20250314163416966.parquet 4 \N David Los Angeles
20250314163421827 20250314163421827_0_0 5 33648978-cbee-455a-a382-f40744a11509-0_0-223-233_20250314163421827.parquet 5 \N Eve Chicago
20250314163425482 20250314163425482_0_0 6 ce12666a-5f10-488c-a143-069d2b478922-0_0-231-240_20250314163425482.parquet 6 85.5 Frank San Francisco

-- !parquet_inc_1 --
20250314163425482 20250314163425482_0_0 6 ce12666a-5f10-488c-a143-069d2b478922-0_0-231-240_20250314163425482.parquet 6 85.5 Frank San Francisco \N
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ suite("test_hudi_schema_change", "p2,external,hudi,external_remote,external_remo


}
qt_orc_time_travel """ select * from hudi_sc_orc_cow FOR TIME AS OF "20250314162817433_0_0" order by id; """ //1-8
qt_orc_time_travel """ select * from hudi_sc_orc_cow FOR TIME AS OF "20250314162817433" order by id; """ //1-8
qt_parquet_time_travel """ select * from hudi_sc_parquet_cow FOR TIME AS OF "20250314163425482" order by id; """//1-6

qt_parquet_inc_1 """ SELECT * from hudi_sc_parquet_cow@incr('beginTime'='20250314163421827') order by id; """
Expand Down