Skip to content

Commit 78a1208

Browse files
committed
Hive Metadata Scan: Support case insensitive name mapping (#52)
1 parent a01b227 commit 78a1208

File tree

6 files changed

+423
-5
lines changed

6 files changed

+423
-5
lines changed

core/src/main/java/org/apache/iceberg/TableMetadata.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,18 @@ static TableMetadata newTableMetadata(Schema schema,
122122
ImmutableList.of(), ImmutableList.of());
123123
}
124124

125+
public static TableMetadata newTableMetadataWithoutFreshIds(Schema schema,
126+
PartitionSpec spec,
127+
String location,
128+
Map<String, String> properties) {
129+
return new TableMetadata(null, DEFAULT_TABLE_FORMAT_VERSION, UUID.randomUUID().toString(), location,
130+
INITIAL_SEQUENCE_NUMBER, System.currentTimeMillis(),
131+
-1, schema, INITIAL_SPEC_ID, ImmutableList.of(spec),
132+
SortOrder.unsorted().orderId(), ImmutableList.of(SortOrder.unsorted()),
133+
ImmutableMap.copyOf(properties), -1, ImmutableList.of(),
134+
ImmutableList.of(), ImmutableList.of());
135+
}
136+
125137
public static class SnapshotLogEntry implements HistoryEntry {
126138
private final long timestampMillis;
127139
private final long snapshotId;

core/src/main/java/org/apache/iceberg/mapping/MappingUtil.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,19 @@ public static NameMapping create(Schema schema) {
5454
return new NameMapping(TypeUtil.visit(schema, CreateMapping.INSTANCE));
5555
}
5656

57+
/**
58+
* Create a name-based mapping for a schema.
59+
* <p>
60+
* The mapping returned by this method will use the schema's name for each field.
61+
*
62+
* @param schema a {@link Schema}
63+
* @param caseSensitive whether names should be matched case sensitively
64+
* @return a {@link NameMapping} initialized with the schema's fields and names
65+
*/
66+
public static NameMapping create(Schema schema, boolean caseSensitive) {
67+
return new NameMapping(TypeUtil.visit(schema, CreateMapping.INSTANCE), caseSensitive);
68+
}
69+
5770
/**
5871
* Update a name-based mapping using changes to a schema.
5972
*

core/src/main/java/org/apache/iceberg/mapping/NameMapping.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.io.Serializable;
2323
import java.util.List;
2424
import java.util.Map;
25+
import java.util.stream.Collectors;
2526
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
2627
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
2728

@@ -44,25 +45,47 @@ public static NameMapping of(MappedFields fields) {
4445
}
4546

4647
private final MappedFields mapping;
48+
private final boolean caseSensitive;
4749
private transient Map<Integer, MappedField> fieldsById;
4850
private transient Map<String, MappedField> fieldsByName;
51+
private transient Map<String, MappedField> fieldsByNameLowercase;
4952

5053
NameMapping(MappedFields mapping) {
54+
this(mapping, true);
55+
}
56+
57+
NameMapping(MappedFields mapping, boolean caseSensitive) {
5158
this.mapping = mapping;
59+
this.caseSensitive = caseSensitive;
5260
lazyFieldsById();
5361
lazyFieldsByName();
62+
if (!caseSensitive) {
63+
lazyFieldsByNameLowercase();
64+
}
5465
}
5566

5667
public MappedField find(int id) {
5768
return lazyFieldsById().get(id);
5869
}
5970

6071
public MappedField find(String... names) {
61-
return lazyFieldsByName().get(DOT.join(names));
72+
return find(DOT.join(names));
6273
}
6374

6475
public MappedField find(List<String> names) {
65-
return lazyFieldsByName().get(DOT.join(names));
76+
return find(DOT.join(names));
77+
}
78+
79+
private MappedField find(String qualifiedName) {
80+
MappedField field = lazyFieldsByName().get(qualifiedName);
81+
if (field == null && !caseSensitive) {
82+
field = lazyFieldsByNameLowercase().get(qualifiedName.toLowerCase());
83+
}
84+
return field;
85+
}
86+
87+
public boolean isCaseSensitive() {
88+
return caseSensitive;
6689
}
6790

6891
public MappedFields asMappedFields() {
@@ -83,6 +106,14 @@ private Map<String, MappedField> lazyFieldsByName() {
83106
return fieldsByName;
84107
}
85108

109+
private Map<String, MappedField> lazyFieldsByNameLowercase() {
110+
if (fieldsByNameLowercase == null) {
111+
this.fieldsByNameLowercase = lazyFieldsByName().entrySet().stream()
112+
.collect(Collectors.toMap(x -> x.getKey().toLowerCase(), Map.Entry::getValue, (u, v) -> u));
113+
}
114+
return fieldsByNameLowercase;
115+
}
116+
86117
@Override
87118
public String toString() {
88119
if (mapping.fields().isEmpty()) {

core/src/main/java/org/apache/iceberg/mapping/NameMappingParser.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@
4141
* { "field-id": 5, "names": ["longitude", "long"] }
4242
* ] } ]
4343
* </pre>
44+
* or
45+
* <pre>
46+
* { "case-sensitive": false,
47+
* "mapping": [ { "field-id": 1, "names": ["id", "record_id"] },
48+
* { "field-id": 2, "names": ["data"] },
49+
* { "field-id": 3, "names": ["location"], "fields": [
50+
* { "field-id": 4, "names": ["latitude", "lat"] },
51+
* { "field-id": 5, "names": ["longitude", "long"] }
52+
* ] } ]
53+
* }
54+
* </pre>
4455
*/
4556
public class NameMappingParser {
4657

@@ -50,6 +61,8 @@ private NameMappingParser() {
5061
private static final String FIELD_ID = "field-id";
5162
private static final String NAMES = "names";
5263
private static final String FIELDS = "fields";
64+
private static final String CASE_SENSITIVE = "case-sensitive";
65+
private static final String MAPPING = "mapping";
5366

5467
public static String toJson(NameMapping mapping) {
5568
try {
@@ -65,7 +78,11 @@ public static String toJson(NameMapping mapping) {
6578
}
6679

6780
static void toJson(NameMapping nameMapping, JsonGenerator generator) throws IOException {
81+
generator.writeStartObject();
82+
generator.writeBooleanField(CASE_SENSITIVE, nameMapping.isCaseSensitive());
83+
generator.writeFieldName(MAPPING);
6884
toJson(nameMapping.asMappedFields(), generator);
85+
generator.writeEndObject();
6986
}
7087

7188
private static void toJson(MappedFields mapping, JsonGenerator generator) throws IOException {
@@ -107,7 +124,14 @@ public static NameMapping fromJson(String json) {
107124
}
108125

109126
static NameMapping fromJson(JsonNode node) {
110-
return new NameMapping(fieldsFromJson(node));
127+
Preconditions.checkArgument(node.isObject() || node.isArray(),
128+
"Cannot parse non-object or non-array name mapping: %s", node);
129+
if (node.isObject()) {
130+
boolean caseSensitive = JsonUtil.getBool(CASE_SENSITIVE, node);
131+
return new NameMapping(fieldsFromJson(node.get(MAPPING)), caseSensitive);
132+
} else {
133+
return new NameMapping(fieldsFromJson(node));
134+
}
111135
}
112136

113137
private static MappedFields fieldsFromJson(JsonNode node) {

0 commit comments

Comments
 (0)