1919
2020package org .apache .iceberg .hivelink .core ;
2121
22+ import java .util .Arrays ;
2223import java .util .HashMap ;
2324import java .util .List ;
2425import java .util .Map ;
4243import org .apache .iceberg .hivelink .core .utils .HiveTypeUtil ;
4344import org .apache .iceberg .relocated .com .google .common .base .Preconditions ;
4445import org .apache .iceberg .relocated .com .google .common .collect .Lists ;
46+ import org .apache .iceberg .relocated .com .google .common .collect .Maps ;
4547import org .apache .iceberg .types .Conversions ;
4648import org .apache .iceberg .types .Type ;
4749import org .apache .iceberg .types .Types ;
@@ -88,7 +90,8 @@ static Schema getSchema(org.apache.hadoop.hive.metastore.api.Table table) {
8890 Types .StructType dataStructType = schema .asStruct ();
8991 List <Types .NestedField > fields = Lists .newArrayList (dataStructType .fields ());
9092
91- Schema partitionSchema = partitionSchema (table .getPartitionKeys (), schema );
93+ String partitionColumnIdMappingString = props .get ("partition.column.ids" );
94+ Schema partitionSchema = partitionSchema (table .getPartitionKeys (), schema , partitionColumnIdMappingString );
9295 Types .StructType partitionStructType = partitionSchema .asStruct ();
9396 fields .addAll (partitionStructType .fields ());
9497 return new Schema (fields );
@@ -107,7 +110,8 @@ static StructTypeInfo structTypeInfoFromCols(List<FieldSchema> cols) {
107110 return (StructTypeInfo ) TypeInfoFactory .getStructTypeInfo (fieldNames , fieldTypeInfos );
108111 }
109112
110- private static Schema partitionSchema (List <FieldSchema > partitionKeys , Schema dataSchema ) {
113+ private static Schema partitionSchema (List <FieldSchema > partitionKeys , Schema dataSchema , String idMapping ) {
114+ Map <String , Integer > nameToId = parsePartitionColId (idMapping );
111115 AtomicInteger fieldId = new AtomicInteger (10000 );
112116 List <Types .NestedField > partitionFields = Lists .newArrayList ();
113117 partitionKeys .forEach (f -> {
@@ -117,11 +121,39 @@ private static Schema partitionSchema(List<FieldSchema> partitionKeys, Schema da
117121 }
118122 partitionFields .add (
119123 Types .NestedField .optional (
120- fieldId .incrementAndGet (), f .getName (), primitiveIcebergType (f .getType ()), f .getComment ()));
124+ nameToId .containsKey (f .getName ()) ? nameToId .get (f .getName ()) : fieldId .incrementAndGet (),
125+ f .getName (), primitiveIcebergType (f .getType ()), f .getComment ()));
121126 });
122127 return new Schema (partitionFields );
123128 }
124129
130+ /**
131+ *
132+ * @param idMapping A comma separated string representation of column name
133+ * and its id, e.g. partitionCol1:10,partitionCol2:11, no
134+ * whitespace is allowed in the middle
135+ * @return The parsed in-mem Map representation of the name to
136+ * id mapping
137+ */
138+ private static Map <String , Integer > parsePartitionColId (String idMapping ) {
139+ Map <String , Integer > nameToId = Maps .newHashMap ();
140+ if (idMapping != null ) {
141+ // parse idMapping string
142+ Arrays .stream (idMapping .split ("," )).forEach (kv -> {
143+ String [] split = kv .split (":" );
144+ if (split .length != 2 ) {
145+ throw new IllegalStateException (String .format (
146+ "partition.column.ids property is invalid format: %s" ,
147+ idMapping ));
148+ }
149+ String name = split [0 ];
150+ Integer id = Integer .parseInt (split [1 ]);
151+ nameToId .put (name , id );
152+ });
153+ }
154+ return nameToId ;
155+ }
156+
125157 private static Type primitiveIcebergType (String hiveTypeString ) {
126158 PrimitiveTypeInfo primitiveTypeInfo = TypeInfoFactory .getPrimitiveTypeInfo (hiveTypeString );
127159 return HiveTypeUtil .convert (primitiveTypeInfo );
0 commit comments