Skip to content

Commit c18f4c4

Browse files
author
Shenoda Guirguis
authored
[#2039] Support default value semantics: API changes (#63)
1 parent 766407e commit c18f4c4

File tree

4 files changed

+255
-12
lines changed

4 files changed

+255
-12
lines changed

api/src/main/java/org/apache/iceberg/types/Types.java

Lines changed: 89 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.apache.iceberg.types;
2121

2222
import java.io.Serializable;
23+
import java.util.ArrayList;
2324
import java.util.Arrays;
2425
import java.util.List;
2526
import java.util.Locale;
@@ -414,42 +415,108 @@ public int hashCode() {
414415

415416
public static class NestedField implements Serializable {
416417
public static NestedField optional(int id, String name, Type type) {
417-
return new NestedField(true, id, name, type, null);
418+
return new NestedField(true, id, name, type, null, null);
418419
}
419420

420421
public static NestedField optional(int id, String name, Type type, String doc) {
421-
return new NestedField(true, id, name, type, doc);
422+
return new NestedField(true, id, name, type, null, doc);
423+
}
424+
425+
public static NestedField optional(int id, String name, Type type, Object defaultValue, String doc) {
426+
return new NestedField(true, id, name, type, defaultValue, doc);
422427
}
423428

424429
public static NestedField required(int id, String name, Type type) {
425-
return new NestedField(false, id, name, type, null);
430+
return new NestedField(false, id, name, type, null, null);
426431
}
427432

428433
public static NestedField required(int id, String name, Type type, String doc) {
429-
return new NestedField(false, id, name, type, doc);
434+
return new NestedField(false, id, name, type, null, doc);
435+
}
436+
437+
public static NestedField required(int id, String name, Type type, Object defaultValue, String doc) {
438+
validateDefaultValueForRequiredField(defaultValue, name);
439+
return new NestedField(false, id, name, type, defaultValue, doc);
430440
}
431441

432442
public static NestedField of(int id, boolean isOptional, String name, Type type) {
433-
return new NestedField(isOptional, id, name, type, null);
443+
return new NestedField(isOptional, id, name, type, null, null);
434444
}
435445

436446
public static NestedField of(int id, boolean isOptional, String name, Type type, String doc) {
437-
return new NestedField(isOptional, id, name, type, doc);
447+
return new NestedField(isOptional, id, name, type, null, doc);
448+
}
449+
450+
public static NestedField of(int id, boolean isOptional, String name, Type type, Object defaultValue, String doc) {
451+
return new NestedField(isOptional, id, name, type, defaultValue, doc);
452+
}
453+
454+
private static void validateDefaultValueForRequiredField(Object defaultValue, String fieldName) {
455+
Preconditions.checkArgument(defaultValue != null,
456+
"Cannot create NestedField with a null default for the required field: " + fieldName);
457+
}
458+
459+
private static void validateDefaultValue(Object defaultValue, Type type) {
460+
if (defaultValue == null) {
461+
return;
462+
}
463+
switch (type.typeId()) {
464+
case STRUCT:
465+
Preconditions.checkArgument(List.class.isInstance(defaultValue),
466+
"defaultValue should be a List of Objects for StructType");
467+
if (defaultValue == null) {
468+
return;
469+
}
470+
List<Object> defaultList = (List) defaultValue;
471+
Preconditions.checkArgument(defaultList.size() == type.asStructType().fields().size());
472+
for (int i = 0; i < defaultList.size(); i++) {
473+
NestedField.validateDefaultValue(defaultList.get(i), type.asStructType().fields().get(i).type);
474+
}
475+
break;
476+
case LIST:
477+
Preconditions.checkArgument(defaultValue instanceof ArrayList,
478+
"defaultValue should be an ArrayList of Objects, for ListType");
479+
List<Object> defaultArrayList = (ArrayList<Object>) defaultValue;
480+
if (defaultArrayList == null || defaultArrayList.size() == 0) {
481+
return;
482+
}
483+
defaultArrayList.forEach(dv -> NestedField.validateDefaultValue(dv, type.asListType().elementField.type));
484+
break;
485+
case MAP:
486+
Preconditions.checkArgument(Map.class.isInstance(defaultValue),
487+
"defaultValue should be an instance of Map for MapType");
488+
Map<Object, Object> defaultMap = (Map<Object, Object>) defaultValue;
489+
if (defaultMap == null || defaultMap.isEmpty()) {
490+
return;
491+
}
492+
for (Map.Entry e : defaultMap.entrySet()) {
493+
NestedField.validateDefaultValue(e.getKey(), type.asMapType().keyField.type);
494+
NestedField.validateDefaultValue(e.getValue(), type.asMapType().valueField.type);
495+
}
496+
break;
497+
default:
498+
Preconditions.checkArgument(type.typeId().javaClass().isInstance(defaultValue),
499+
"defaultValue should be of same java class of the type, defaultValue class: " + defaultValue.getClass() +
500+
", type class: " + type.typeId().javaClass());
501+
}
438502
}
439503

440504
private final boolean isOptional;
441505
private final int id;
442506
private final String name;
443507
private final Type type;
508+
private final Object defaultValue;
444509
private final String doc;
445510

446-
private NestedField(boolean isOptional, int id, String name, Type type, String doc) {
511+
private NestedField(boolean isOptional, int id, String name, Type type, Object defaultValue, String doc) {
447512
Preconditions.checkNotNull(name, "Name cannot be null");
448513
Preconditions.checkNotNull(type, "Type cannot be null");
514+
validateDefaultValue(defaultValue, type);
449515
this.isOptional = isOptional;
450516
this.id = id;
451517
this.name = name;
452518
this.type = type;
519+
this.defaultValue = defaultValue;
453520
this.doc = doc;
454521
}
455522

@@ -461,7 +528,7 @@ public NestedField asOptional() {
461528
if (isOptional) {
462529
return this;
463530
}
464-
return new NestedField(true, id, name, type, doc);
531+
return new NestedField(true, id, name, type, defaultValue, doc);
465532
}
466533

467534
public boolean isRequired() {
@@ -472,7 +539,15 @@ public NestedField asRequired() {
472539
if (!isOptional) {
473540
return this;
474541
}
475-
return new NestedField(false, id, name, type, doc);
542+
return new NestedField(false, id, name, type, defaultValue, doc);
543+
}
544+
545+
public boolean hasDefaultValue() {
546+
return defaultValue != null;
547+
}
548+
549+
public Object getDefaultValue() {
550+
return defaultValue;
476551
}
477552

478553
public int fieldId() {
@@ -495,6 +570,7 @@ public String doc() {
495570
public String toString() {
496571
return String.format("%d: %s: %s %s",
497572
id, name, isOptional ? "optional" : "required", type) +
573+
(hasDefaultValue() ? ", default value: " + defaultValue + ", " : "") +
498574
(doc != null ? " (" + doc + ")" : "");
499575
}
500576

@@ -513,6 +589,8 @@ public boolean equals(Object o) {
513589
return false;
514590
} else if (!name.equals(that.name)) {
515591
return false;
592+
} else if (!Objects.equals(defaultValue, that.defaultValue)) {
593+
return false;
516594
} else if (!Objects.equals(doc, that.doc)) {
517595
return false;
518596
}
@@ -521,7 +599,8 @@ public boolean equals(Object o) {
521599

522600
@Override
523601
public int hashCode() {
524-
return Objects.hash(NestedField.class, id, isOptional, name, type);
602+
return hasDefaultValue() ? Objects.hash(NestedField.class, id, isOptional, name, type, defaultValue) :
603+
Objects.hash(NestedField.class, id, isOptional, name, type);
525604
}
526605
}
527606

@@ -739,7 +818,6 @@ public boolean equals(Object o) {
739818
} else if (!(o instanceof ListType)) {
740819
return false;
741820
}
742-
743821
ListType listType = (ListType) o;
744822
return elementField.equals(listType.elementField);
745823
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.iceberg.types;
21+
22+
import java.util.ArrayList;
23+
import java.util.Arrays;
24+
import java.util.List;
25+
import org.junit.Assert;
26+
import org.junit.BeforeClass;
27+
import org.junit.Test;
28+
29+
import static org.apache.iceberg.types.Types.NestedField;
30+
import static org.apache.iceberg.types.Types.StructType;
31+
32+
public class TestDefaultValuesForContainerTypes {
33+
34+
static NestedField intFieldType;
35+
static NestedField stringFieldType;
36+
static StructType structType;
37+
38+
@BeforeClass
39+
public static void beforeClass() {
40+
intFieldType = NestedField.optional(0, "optionalIntField", Types.IntegerType.get());
41+
stringFieldType = NestedField.required(1, "requiredStringField", Types.StringType.get());
42+
structType = StructType.of(Arrays.asList(intFieldType, stringFieldType));
43+
}
44+
45+
@Test
46+
public void testStructTypeDefault() {
47+
List<Object> structDefaultvalue = new ArrayList<>();
48+
structDefaultvalue.add(Integer.valueOf(1));
49+
structDefaultvalue.add("two");
50+
NestedField structField = NestedField.optional(2, "optionalStructField", structType, structDefaultvalue, "doc");
51+
Assert.assertTrue(structField.hasDefaultValue());
52+
Assert.assertEquals(structDefaultvalue, structField.getDefaultValue());
53+
}
54+
55+
@Test (expected = IllegalArgumentException.class)
56+
public void testStructTypeDefaultInvalidFieldsTypes() {
57+
List<Object> structDefaultvalue = new ArrayList<>();
58+
structDefaultvalue.add("one");
59+
structDefaultvalue.add("two");
60+
NestedField.optional(2, "optionalStructField", structType, structDefaultvalue, "doc");
61+
}
62+
63+
@Test (expected = IllegalArgumentException.class)
64+
public void testStructTypeDefaultInvalidNumberFields() {
65+
List<Object> structDefaultvalue = new ArrayList<>();
66+
structDefaultvalue.add(Integer.valueOf(1));
67+
structDefaultvalue.add("two");
68+
structDefaultvalue.add("three");
69+
NestedField.optional(2, "optionalStructField", structType, structDefaultvalue, "doc");
70+
}
71+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.iceberg.types;
21+
22+
import org.apache.iceberg.types.Types.NestedField;
23+
import org.junit.Assert;
24+
import org.junit.Test;
25+
26+
import static org.apache.iceberg.types.Types.NestedField.optional;
27+
import static org.apache.iceberg.types.Types.NestedField.required;
28+
29+
30+
public class TestNestedFieldDefaultValues {
31+
32+
private final int id = 1;
33+
private final String fieldName = "fieldName";
34+
private final Type fieldType = Types.IntegerType.get();
35+
private final String doc = "field doc";
36+
private final Integer defaultValue = 100;
37+
38+
@Test
39+
public void testConstructorsValidCases() {
40+
// optional constructors
41+
Assert.assertFalse(optional(id, fieldName, fieldType).hasDefaultValue());
42+
Assert.assertFalse(optional(id, fieldName, fieldType, doc).hasDefaultValue());
43+
NestedField nestedFieldWithDefault = optional(id, fieldName, fieldType, defaultValue, doc);
44+
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
45+
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());
46+
nestedFieldWithDefault = optional(id, fieldName, fieldType, defaultValue, null);
47+
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
48+
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());
49+
50+
// required constructors
51+
Assert.assertFalse(required(id, fieldName, fieldType).hasDefaultValue());
52+
Assert.assertFalse(required(id, fieldName, fieldType, doc).hasDefaultValue());
53+
nestedFieldWithDefault = required(id, fieldName, fieldType, defaultValue, doc);
54+
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
55+
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());
56+
nestedFieldWithDefault = required(id, fieldName, fieldType, defaultValue, null);
57+
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
58+
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());
59+
60+
// of constructors
61+
Assert.assertFalse(NestedField.of(id, true, fieldName, fieldType).hasDefaultValue());
62+
Assert.assertFalse(NestedField.of(id, true, fieldName, fieldType, doc).hasDefaultValue());
63+
nestedFieldWithDefault = NestedField.of(id, true, fieldName, fieldType, defaultValue, doc);
64+
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
65+
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());
66+
}
67+
68+
@Test (expected = IllegalArgumentException.class)
69+
public void testRequiredNullDefault() {
70+
// illegal case (required with null defaultValue)
71+
required(id, fieldName, fieldType, null, doc);
72+
}
73+
74+
@Test (expected = IllegalArgumentException.class)
75+
public void testRequiredWithDefaultNullDefault() {
76+
// illegal case (required with null defaultValue)
77+
required(id, fieldName, fieldType, null, null);
78+
}
79+
80+
@Test (expected = IllegalArgumentException.class)
81+
public void testOptionalWithInvalidDefaultValueClass() {
82+
// class of default value does not match class of type
83+
Long wrongClassDefaultValue = 100L;
84+
optional(id, fieldName, fieldType, wrongClassDefaultValue, doc);
85+
}
86+
}

site/docs/spec.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ A table's **schema** is a list of named columns. All data types are either primi
9090

9191
For the representations of these types in Avro, ORC, and Parquet file formats, see Appendix A.
9292

93+
Default values for fields are supported, see Neted Types below.
94+
9395
#### Nested Types
9496

9597
A **`struct`** is a tuple of typed values. Each field in the tuple is named and has an integer id that is unique in the table schema. Each field can be either optional or required, meaning that values can (or cannot) be null. Fields may be any type. Fields may have an optional comment or doc string.
@@ -98,6 +100,13 @@ A **`list`** is a collection of values with some element type. The element field
98100

99101
A **`map`** is a collection of key-value pairs with a key type and a value type. Both the key field and value field each have an integer id that is unique in the table schema. Map keys are required and map values can be either optional or required. Both map keys and map values may be any type, including nested types.
100102

103+
Iceberg supports default-value semantics for fields of nested types (i.e., struct, list and map). Specifically, a field
104+
of a nested type field can have a default value that will be returned upon reading this field, if it is not manifested.
105+
The default value can be defined with both required and optional fields. Null default values are allowed with optional
106+
fields only, and it's behavior is identical to optional fields with no default value, that is a Null is returned upon
107+
reading this field when it is not manifested.
108+
109+
101110
#### Primitive Types
102111

103112
| Primitive type | Description | Requirements |
@@ -692,7 +701,6 @@ This serialization scheme is for storing single values as individual binary valu
692701
| **`list`** | Not supported |
693702
| **`map`** | Not supported |
694703

695-
696704
## Format version changes
697705

698706
### Version 2

0 commit comments

Comments
 (0)