Skip to content

Commit d8d3657

Browse files
branch-3.1: [fix](hive) Fix partition path scheme mismatch when inserting into Hive partitioned tables on object storage #57973 (#58027)
Cherry-picked from #57973 Co-authored-by: zy-kkk <zhongyk10@gmail.com>
1 parent 37247e6 commit d8d3657

File tree

2 files changed

+119
-1
lines changed

2 files changed

+119
-1
lines changed

fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,11 +270,16 @@ public void finishInsertTable(NameMapping nameMapping) {
270270
case NEW:
271271
case OVERWRITE:
272272
StorageDescriptor sd = table.getSd();
273+
// For object storage (FILE_S3), use writePath to keep original scheme (oss://, cos://)
274+
// For HDFS, use targetPath which is the final path after rename
275+
String pathForHMS = this.fileType == TFileType.FILE_S3
276+
? writePath
277+
: pu.getLocation().getTargetPath();
273278
HivePartition hivePartition = new HivePartition(
274279
nameMapping,
275280
false,
276281
sd.getInputFormat(),
277-
pu.getLocation().getTargetPath(),
282+
pathForHMS,
278283
HiveUtil.toPartitionValues(pu.getName()),
279284
Maps.newHashMap(),
280285
sd.getOutputFormat(),

regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,106 @@ suite("hive_on_hms_and_dlf", "p2,external,new_catalog_property") {
8383
assert dropResult.size() == 0
8484
}
8585

86+
/*--------test partition table insert---------*/
87+
def testPartitionTableInsert = { String catalogProperties, String prefix, String dbLocation ->
88+
def catalog_name = "${prefix}_catalog"
89+
sql """
90+
DROP CATALOG IF EXISTS ${catalog_name};
91+
"""
92+
sql """
93+
CREATE CATALOG IF NOT EXISTS ${catalog_name} PROPERTIES (
94+
${catalogProperties}
95+
);
96+
"""
97+
sql """
98+
switch ${catalog_name};
99+
"""
100+
101+
def db_name = prefix + "_db" + System.currentTimeMillis() + ThreadLocalRandom.current().nextInt(1000)
102+
sql """
103+
DROP DATABASE IF EXISTS ${db_name} FORCE;
104+
"""
105+
sql """
106+
CREATE DATABASE IF NOT EXISTS ${db_name}
107+
PROPERTIES ('location'='${dbLocation}');
108+
"""
109+
110+
def dbResult = sql """
111+
show databases like "${db_name}";
112+
"""
113+
assert dbResult.size() == 1
114+
115+
sql """
116+
use ${db_name};
117+
"""
118+
119+
def table_name = prefix + ThreadLocalRandom.current().nextInt(1000) + "_partition_table"
120+
121+
// Create partitioned table
122+
sql """
123+
CREATE TABLE ${table_name} (
124+
id INT COMMENT 'id',
125+
name VARCHAR(20) COMMENT 'name',
126+
age INT COMMENT 'age',
127+
pt1 VARCHAR(20) COMMENT 'partition key'
128+
) ENGINE=hive
129+
PARTITION BY LIST (pt1) ()
130+
PROPERTIES (
131+
'file_format'='orc',
132+
'compression'='zlib'
133+
);
134+
"""
135+
136+
// Test 1: Insert into new partition
137+
sql """
138+
insert into ${table_name} values (1, 'alice', 20, 'p1');
139+
"""
140+
def result1 = sql """
141+
SELECT * FROM ${table_name} ORDER BY id;
142+
"""
143+
assert result1.size() == 1
144+
assert result1[0][0] == 1
145+
146+
// Test 2: Insert into existing partition (APPEND mode)
147+
sql """
148+
insert into ${table_name} values (2, 'bob', 25, 'p1');
149+
"""
150+
def result2 = sql """
151+
SELECT * FROM ${table_name} WHERE pt1='p1' ORDER BY id;
152+
"""
153+
assert result2.size() == 2
154+
155+
// Test 3: Insert into another new partition
156+
sql """
157+
insert into ${table_name} values (3, 'charlie', 30, 'p2');
158+
"""
159+
def result3 = sql """
160+
SELECT * FROM ${table_name} ORDER BY id;
161+
"""
162+
assert result3.size() == 3
163+
164+
// Test 4: Multiple inserts to verify scheme consistency
165+
sql """
166+
insert into ${table_name} values (4, 'david', 35, 'p1'), (5, 'eve', 28, 'p2');
167+
"""
168+
def result4 = sql """
169+
SELECT COUNT(*) FROM ${table_name};
170+
"""
171+
assert result4[0][0] == 5
172+
173+
sql """
174+
DROP TABLE ${table_name};
175+
"""
176+
sql """
177+
DROP DATABASE ${db_name} FORCE;
178+
"""
179+
180+
def dropResult = sql """
181+
show databases like "${db_name}";
182+
"""
183+
assert dropResult.size() == 0
184+
}
185+
86186
/*--------only execute query---------*/
87187
def testQuery = { String catalog_properties, String prefix, String db_name, String table_name, int data_count ->
88188

@@ -267,6 +367,10 @@ suite("hive_on_hms_and_dlf", "p2,external,new_catalog_property") {
267367
testQueryAndInsert(hms_properties + obs_region_param + obs_storage_properties, "hive_hms_obs_test_region", db_location)
268368
testQueryAndInsert(hms_type_properties + hms_kerberos_old_prop + obs_storage_properties, "hive_hms_on_obs_kerberos_old", db_location)
269369
testQueryAndInsert(hms_type_properties + hms_kerberos_new_prop + obs_storage_properties, "hive_hms_on_obs_kerberos_new", db_location)
370+
371+
//OBS - Partition table tests
372+
db_location = "obs://${obs_parent_path}/hive/hms/partition/" + System.currentTimeMillis()
373+
testPartitionTableInsert(hms_properties + obs_storage_properties, "hive_hms_obs_partition_test", db_location)
270374
//GCS
271375
if(context.config.otherConfigs.get("enableGCS")){
272376
db_location = "gs://${gcs_parent_path}/hive/hms/" + System.currentTimeMillis()
@@ -283,6 +387,10 @@ suite("hive_on_hms_and_dlf", "p2,external,new_catalog_property") {
283387
testQueryAndInsert(hms_type_properties + hms_kerberos_old_prop + cos_storage_properties, "hive_hms_on_cos_kerberos_old", db_location)
284388
testQueryAndInsert(hms_type_properties + hms_kerberos_new_prop + cos_storage_properties, "hive_hms_on_cos_kerberos_new", db_location)
285389

390+
//COS - Partition table tests
391+
db_location = "cosn://${cos_parent_path}/hive/hms/partition/" + System.currentTimeMillis()
392+
testPartitionTableInsert(hms_properties + cos_storage_properties, "hive_hms_cos_partition_test", db_location)
393+
286394
db_location = "cos://${cos_parent_path}/hive/hms/" + System.currentTimeMillis()
287395
testQueryAndInsert(hms_properties + cos_storage_properties, "hive_hms_cos_test", db_location)
288396

@@ -293,6 +401,11 @@ suite("hive_on_hms_and_dlf", "p2,external,new_catalog_property") {
293401
testQueryAndInsert(hms_type_properties + hms_kerberos_old_prop + oss_storage_properties, "hive_hms_on_oss_kerberos_old", db_location)
294402
testQueryAndInsert(hms_type_properties + hms_kerberos_new_prop + oss_storage_properties, "hive_hms_on_oss_kerberos_new", db_location)
295403

404+
//OSS - Partition table tests (fix for partition path scheme mismatch)
405+
db_location = "oss://${oss_parent_path}/hive/hms/partition/" + System.currentTimeMillis()
406+
testPartitionTableInsert(hms_properties + oss_storage_properties, "hive_hms_oss_partition_test", db_location)
407+
testPartitionTableInsert(hms_properties + oss_region_param + oss_storage_properties, "hive_hms_oss_partition_test_region", db_location)
408+
296409
//s3
297410
db_location = "s3a://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
298411
testQueryAndInsert(hms_properties + s3_storage_properties, "hive_hms_s3_test", db_location)

0 commit comments

Comments
 (0)