Skip to content

Commit 3b6c98b

Browse files
committed
Add logic to derive partition column id from partition.column.ids pro… (linkedin#122)
* Add logic to derive partition column id from partition.column.ids property
1 parent 26b7e11 commit 3b6c98b

File tree

1 file changed

+35
-3
lines changed

1 file changed

+35
-3
lines changed

hivelink-core/src/main/java/org/apache/iceberg/hivelink/core/LegacyHiveTableUtils.java

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.apache.iceberg.hivelink.core;
2121

22+
import java.util.Arrays;
2223
import java.util.HashMap;
2324
import java.util.List;
2425
import java.util.Map;
@@ -42,6 +43,7 @@
4243
import org.apache.iceberg.hivelink.core.utils.HiveTypeUtil;
4344
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
4445
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
46+
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
4547
import org.apache.iceberg.types.Conversions;
4648
import org.apache.iceberg.types.Type;
4749
import org.apache.iceberg.types.Types;
@@ -88,7 +90,8 @@ static Schema getSchema(org.apache.hadoop.hive.metastore.api.Table table) {
8890
Types.StructType dataStructType = schema.asStruct();
8991
List<Types.NestedField> fields = Lists.newArrayList(dataStructType.fields());
9092

91-
Schema partitionSchema = partitionSchema(table.getPartitionKeys(), schema);
93+
String partitionColumnIdMappingString = props.get("partition.column.ids");
94+
Schema partitionSchema = partitionSchema(table.getPartitionKeys(), schema, partitionColumnIdMappingString);
9295
Types.StructType partitionStructType = partitionSchema.asStruct();
9396
fields.addAll(partitionStructType.fields());
9497
return new Schema(fields);
@@ -107,7 +110,8 @@ static StructTypeInfo structTypeInfoFromCols(List<FieldSchema> cols) {
107110
return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos);
108111
}
109112

110-
private static Schema partitionSchema(List<FieldSchema> partitionKeys, Schema dataSchema) {
113+
private static Schema partitionSchema(List<FieldSchema> partitionKeys, Schema dataSchema, String idMapping) {
114+
Map<String, Integer> nameToId = parsePartitionColId(idMapping);
111115
AtomicInteger fieldId = new AtomicInteger(10000);
112116
List<Types.NestedField> partitionFields = Lists.newArrayList();
113117
partitionKeys.forEach(f -> {
@@ -117,11 +121,39 @@ private static Schema partitionSchema(List<FieldSchema> partitionKeys, Schema da
117121
}
118122
partitionFields.add(
119123
Types.NestedField.optional(
120-
fieldId.incrementAndGet(), f.getName(), primitiveIcebergType(f.getType()), f.getComment()));
124+
nameToId.containsKey(f.getName()) ? nameToId.get(f.getName()) : fieldId.incrementAndGet(),
125+
f.getName(), primitiveIcebergType(f.getType()), f.getComment()));
121126
});
122127
return new Schema(partitionFields);
123128
}
124129

130+
/**
131+
*
132+
* @param idMapping A comma separated string representation of column name
133+
* and its id, e.g. partitionCol1:10,partitionCol2:11, no
134+
* whitespace is allowed in the middle
135+
* @return The parsed in-mem Map representation of the name to
136+
* id mapping
137+
*/
138+
private static Map<String, Integer> parsePartitionColId(String idMapping) {
139+
Map<String, Integer> nameToId = Maps.newHashMap();
140+
if (idMapping != null) {
141+
// parse idMapping string
142+
Arrays.stream(idMapping.split(",")).forEach(kv -> {
143+
String[] split = kv.split(":");
144+
if (split.length != 2) {
145+
throw new IllegalStateException(String.format(
146+
"partition.column.ids property is invalid format: %s",
147+
idMapping));
148+
}
149+
String name = split[0];
150+
Integer id = Integer.parseInt(split[1]);
151+
nameToId.put(name, id);
152+
});
153+
}
154+
return nameToId;
155+
}
156+
125157
private static Type primitiveIcebergType(String hiveTypeString) {
126158
PrimitiveTypeInfo primitiveTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(hiveTypeString);
127159
return HiveTypeUtil.convert(primitiveTypeInfo);

0 commit comments

Comments
 (0)