Skip to content

Commit f68559d

Browse files
authored
Merge branch 'main' into spec-v3-type-promotion
2 parents 37ebab2 + 0747b60 commit f68559d

File tree

398 files changed

+28859
-2734
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

398 files changed

+28859
-2734
lines changed

.baseline/checkstyle/checkstyle.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,11 @@
427427
<property name="illegalPkgs" value="org.hamcrest"/>
428428
<message key="import.illegal" value="Prefer using org.assertj.core.api.Assertions instead."/>
429429
</module>
430+
<module name="IllegalImport">
431+
<property name="id" value="BanJUnit5Assertions"/>
432+
<property name="illegalPkgs" value="org.junit.jupiter.api.Assertions"/>
433+
<message key="import.illegal" value="Prefer using org.assertj.core.api.Assertions instead."/>
434+
</module>
430435
<module name="RegexpSinglelineJava">
431436
<property name="ignoreComments" value="true"/>
432437
<property name="format" value="@Json(S|Des)erialize"/>

.github/ISSUE_TEMPLATE/iceberg_bug_report.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ body:
2828
description: What Apache Iceberg version are you using?
2929
multiple: false
3030
options:
31-
- "1.6.0 (latest release)"
31+
- "1.6.1 (latest release)"
32+
- "1.6.0"
3233
- "1.5.2"
3334
- "1.5.1"
3435
- "1.5.0"

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ spark/v3.4/spark/benchmark/*
3737
spark/v3.4/spark-extensions/benchmark/*
3838
spark/v3.5/spark/benchmark/*
3939
spark/v3.5/spark-extensions/benchmark/*
40-
data/benchmark/*
40+
*/benchmark/*
4141

4242
__pycache__/
4343
*.py[cod]

.palantir/revapi.yml

Lines changed: 85 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,14 +1025,11 @@ acceptedBreaks:
10251025
new: "class org.apache.iceberg.types.Types.NestedField"
10261026
justification: "new Constructor added"
10271027
org.apache.iceberg:iceberg-core:
1028-
- code: "java.method.visibilityReduced"
1029-
old: "method void org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
1030-
new: "method void org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
1031-
justification: "Deprecations for 1.6.0 release"
10321028
- code: "java.element.noLongerDeprecated"
10331029
old: "method void org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
10341030
new: "method void org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
1035-
justification: "Constructor became private as part of deprecations cleanup for 1.6.0 release"
1031+
justification: "Constructor became private as part of deprecations cleanup for\
1032+
\ 1.6.0 release"
10361033
- code: "java.element.noLongerDeprecated"
10371034
old: "method void org.apache.iceberg.rest.auth.OAuth2Util.AuthSession::<init>(java.util.Map<java.lang.String,\
10381035
\ java.lang.String>, java.lang.String, java.lang.String, java.lang.String,\
@@ -1056,6 +1053,89 @@ acceptedBreaks:
10561053
- code: "java.method.removed"
10571054
old: "method org.apache.iceberg.DataFiles.Builder org.apache.iceberg.DataFiles.Builder::withEqualityFieldIds(java.util.List<java.lang.Integer>)"
10581055
justification: "Deprecations for 1.6.0 release"
1056+
- code: "java.method.visibilityReduced"
1057+
old: "method void org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
1058+
new: "method void org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
1059+
justification: "Deprecations for 1.6.0 release"
1060+
"1.6.0":
1061+
org.apache.iceberg:iceberg-common:
1062+
- code: "java.method.removed"
1063+
old: "method <T> org.apache.iceberg.common.DynFields.StaticField<T> org.apache.iceberg.common.DynFields.Builder::buildStaticChecked()\
1064+
\ throws java.lang.NoSuchFieldException"
1065+
justification: "Removing deprecated code"
1066+
- code: "java.method.removed"
1067+
old: "method java.lang.Class<? extends C> org.apache.iceberg.common.DynConstructors.Ctor<C>::getConstructedClass()"
1068+
justification: "Removing deprecated code"
1069+
- code: "java.method.removed"
1070+
old: "method org.apache.iceberg.common.DynConstructors.Builder org.apache.iceberg.common.DynConstructors.Builder::hiddenImpl(java.lang.Class<?>[])"
1071+
justification: "Removing deprecated code"
1072+
- code: "java.method.removed"
1073+
old: "method org.apache.iceberg.common.DynMethods.Builder org.apache.iceberg.common.DynMethods.Builder::ctorImpl(java.lang.Class<?>,\
1074+
\ java.lang.Class<?>[])"
1075+
justification: "Removing deprecated code"
1076+
- code: "java.method.removed"
1077+
old: "method org.apache.iceberg.common.DynMethods.Builder org.apache.iceberg.common.DynMethods.Builder::ctorImpl(java.lang.String,\
1078+
\ java.lang.Class<?>[])"
1079+
justification: "Removing deprecated code"
1080+
- code: "java.method.visibilityReduced"
1081+
old: "method <R> R org.apache.iceberg.common.DynMethods.UnboundMethod::invokeChecked(java.lang.Object,\
1082+
\ java.lang.Object[]) throws java.lang.Exception"
1083+
new: "method <R> R org.apache.iceberg.common.DynMethods.UnboundMethod::invokeChecked(java.lang.Object,\
1084+
\ java.lang.Object[]) throws java.lang.Exception"
1085+
justification: "Reduced visibility and scoped to package"
1086+
org.apache.iceberg:iceberg-core:
1087+
- code: "java.class.defaultSerializationChanged"
1088+
old: "class org.apache.iceberg.GenericManifestFile"
1089+
new: "class org.apache.iceberg.GenericManifestFile"
1090+
justification: "Serialization across versions is not supported"
1091+
- code: "java.class.removed"
1092+
old: "enum org.apache.iceberg.BaseMetastoreTableOperations.CommitStatus"
1093+
justification: "Removing deprecated code"
1094+
- code: "java.method.removed"
1095+
old: "method java.lang.String org.apache.iceberg.FileScanTaskParser::toJson(org.apache.iceberg.FileScanTask)"
1096+
justification: "Removing deprecated code"
1097+
- code: "java.method.removed"
1098+
old: "method org.apache.iceberg.FileScanTask org.apache.iceberg.FileScanTaskParser::fromJson(java.lang.String,\
1099+
\ boolean)"
1100+
justification: "Removing deprecated code"
1101+
- code: "java.method.removed"
1102+
old: "method org.apache.iceberg.io.ContentCache.CacheEntry org.apache.iceberg.io.ContentCache::get(java.lang.String,\
1103+
\ java.util.function.Function<java.lang.String, org.apache.iceberg.io.ContentCache.FileContent>)"
1104+
justification: "Removing deprecated code"
1105+
- code: "java.method.removed"
1106+
old: "method org.apache.iceberg.io.ContentCache.CacheEntry org.apache.iceberg.io.ContentCache::getIfPresent(java.lang.String)"
1107+
justification: "Removing deprecated code"
1108+
- code: "java.method.removed"
1109+
old: "method org.apache.iceberg.io.InputFile org.apache.iceberg.io.ContentCache::tryCache(org.apache.iceberg.io.FileIO,\
1110+
\ java.lang.String, long)"
1111+
justification: "Removing deprecated code"
1112+
- code: "java.method.removed"
1113+
old: "method org.apache.iceberg.io.OutputFile org.apache.iceberg.SnapshotProducer<ThisT>::newManifestOutput()\
1114+
\ @ org.apache.iceberg.BaseOverwriteFiles"
1115+
justification: "Removing deprecated code"
1116+
- code: "java.method.removed"
1117+
old: "method org.apache.iceberg.io.OutputFile org.apache.iceberg.SnapshotProducer<ThisT>::newManifestOutput()\
1118+
\ @ org.apache.iceberg.BaseReplacePartitions"
1119+
justification: "Removing deprecated code"
1120+
- code: "java.method.removed"
1121+
old: "method org.apache.iceberg.io.OutputFile org.apache.iceberg.SnapshotProducer<ThisT>::newManifestOutput()\
1122+
\ @ org.apache.iceberg.BaseRewriteManifests"
1123+
justification: "Removing deprecated code"
1124+
- code: "java.method.removed"
1125+
old: "method org.apache.iceberg.io.OutputFile org.apache.iceberg.SnapshotProducer<ThisT>::newManifestOutput()\
1126+
\ @ org.apache.iceberg.StreamingDelete"
1127+
justification: "Removing deprecated code"
1128+
- code: "java.method.removed"
1129+
old: "method void org.apache.iceberg.rest.auth.OAuth2Util.AuthSession::<init>(java.util.Map<java.lang.String,\
1130+
\ java.lang.String>, java.lang.String, java.lang.String, java.lang.String,\
1131+
\ java.lang.String, java.lang.String)"
1132+
justification: "Removing deprecated code"
1133+
- code: "java.method.returnTypeChanged"
1134+
old: "method org.apache.iceberg.BaseMetastoreTableOperations.CommitStatus org.apache.iceberg.BaseMetastoreTableOperations::checkCommitStatus(java.lang.String,\
1135+
\ org.apache.iceberg.TableMetadata)"
1136+
new: "method org.apache.iceberg.BaseMetastoreOperations.CommitStatus org.apache.iceberg.BaseMetastoreTableOperations::checkCommitStatus(java.lang.String,\
1137+
\ org.apache.iceberg.TableMetadata)"
1138+
justification: "Removing deprecated code"
10591139
apache-iceberg-0.14.0:
10601140
org.apache.iceberg:iceberg-api:
10611141
- code: "java.class.defaultSerializationChanged"

aliyun/src/main/java/org/apache/iceberg/aliyun/oss/OSSInputStream.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ private void closeStream() throws IOException {
157157
}
158158
}
159159

160-
@SuppressWarnings("checkstyle:NoFinalizer")
160+
@SuppressWarnings({"checkstyle:NoFinalizer", "Finalize"})
161161
@Override
162162
protected void finalize() throws Throwable {
163163
super.finalize();

aliyun/src/main/java/org/apache/iceberg/aliyun/oss/OSSOutputStream.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ private void cleanUpStagingFiles() {
165165
}
166166
}
167167

168-
@SuppressWarnings("checkstyle:NoFinalizer")
168+
@SuppressWarnings({"checkstyle:NoFinalizer", "Finalize"})
169169
@Override
170170
protected void finalize() throws Throwable {
171171
super.finalize();

api/src/main/java/org/apache/iceberg/Metrics.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ private static void writeByteBufferMap(
179179
* @throws IOException On serialization error
180180
* @throws ClassNotFoundException If the class is not found
181181
*/
182+
@SuppressWarnings("DangerousJavaDeserialization")
182183
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
183184
rowCount = (Long) in.readObject();
184185
columnSizes = (Map<Integer, Long>) in.readObject();
@@ -190,6 +191,7 @@ private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundE
190191
upperBounds = readByteBufferMap(in);
191192
}
192193

194+
@SuppressWarnings("DangerousJavaDeserialization")
193195
private static Map<Integer, ByteBuffer> readByteBufferMap(ObjectInputStream in)
194196
throws IOException, ClassNotFoundException {
195197
int size = in.readInt();

api/src/main/java/org/apache/iceberg/PartitionSpec.java

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ public static class Builder {
371371
new AtomicInteger(unpartitionedLastAssignedId());
372372
// check if there are conflicts between partition and schema field name
373373
private boolean checkConflicts = true;
374+
private boolean caseSensitive = true;
374375

375376
private Builder(Schema schema) {
376377
this.schema = schema;
@@ -390,7 +391,8 @@ Builder checkConflicts(boolean check) {
390391
}
391392

392393
private void checkAndAddPartitionName(String name, Integer sourceColumnId) {
393-
Types.NestedField schemaField = schema.findField(name);
394+
Types.NestedField schemaField =
395+
this.caseSensitive ? schema.findField(name) : schema.caseInsensitiveFindField(name);
394396
if (checkConflicts) {
395397
if (sourceColumnId != null) {
396398
// for identity transform case we allow conflicts between partition and schema field name
@@ -427,20 +429,31 @@ private void checkForRedundantPartitions(PartitionField field) {
427429
dedupFields.put(dedupKey, field);
428430
}
429431

432+
public Builder caseSensitive(boolean sensitive) {
433+
this.caseSensitive = sensitive;
434+
return this;
435+
}
436+
430437
public Builder withSpecId(int newSpecId) {
431438
this.specId = newSpecId;
432439
return this;
433440
}
434441

435442
private Types.NestedField findSourceColumn(String sourceName) {
436-
Types.NestedField sourceColumn = schema.findField(sourceName);
443+
Types.NestedField sourceColumn =
444+
this.caseSensitive
445+
? schema.findField(sourceName)
446+
: schema.caseInsensitiveFindField(sourceName);
437447
Preconditions.checkArgument(
438448
sourceColumn != null, "Cannot find source column: %s", sourceName);
439449
return sourceColumn;
440450
}
441451

442452
Builder identity(String sourceName, String targetName) {
443-
Types.NestedField sourceColumn = findSourceColumn(sourceName);
453+
return identity(findSourceColumn(sourceName), targetName);
454+
}
455+
456+
private Builder identity(Types.NestedField sourceColumn, String targetName) {
444457
checkAndAddPartitionName(targetName, sourceColumn.fieldId());
445458
PartitionField field =
446459
new PartitionField(
@@ -451,12 +464,16 @@ Builder identity(String sourceName, String targetName) {
451464
}
452465

453466
public Builder identity(String sourceName) {
454-
return identity(sourceName, sourceName);
467+
Types.NestedField sourceColumn = findSourceColumn(sourceName);
468+
return identity(sourceColumn, schema.findColumnName(sourceColumn.fieldId()));
455469
}
456470

457471
public Builder year(String sourceName, String targetName) {
472+
return year(findSourceColumn(sourceName), targetName);
473+
}
474+
475+
private Builder year(Types.NestedField sourceColumn, String targetName) {
458476
checkAndAddPartitionName(targetName);
459-
Types.NestedField sourceColumn = findSourceColumn(sourceName);
460477
PartitionField field =
461478
new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.year());
462479
checkForRedundantPartitions(field);
@@ -465,12 +482,17 @@ public Builder year(String sourceName, String targetName) {
465482
}
466483

467484
public Builder year(String sourceName) {
468-
return year(sourceName, sourceName + "_year");
485+
Types.NestedField sourceColumn = findSourceColumn(sourceName);
486+
String columnName = schema.findColumnName(sourceColumn.fieldId());
487+
return year(sourceColumn, columnName + "_year");
469488
}
470489

471490
public Builder month(String sourceName, String targetName) {
491+
return month(findSourceColumn(sourceName), targetName);
492+
}
493+
494+
private Builder month(Types.NestedField sourceColumn, String targetName) {
472495
checkAndAddPartitionName(targetName);
473-
Types.NestedField sourceColumn = findSourceColumn(sourceName);
474496
PartitionField field =
475497
new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.month());
476498
checkForRedundantPartitions(field);
@@ -479,12 +501,17 @@ public Builder month(String sourceName, String targetName) {
479501
}
480502

481503
public Builder month(String sourceName) {
482-
return month(sourceName, sourceName + "_month");
504+
Types.NestedField sourceColumn = findSourceColumn(sourceName);
505+
String columnName = schema.findColumnName(sourceColumn.fieldId());
506+
return month(sourceColumn, columnName + "_month");
483507
}
484508

485509
public Builder day(String sourceName, String targetName) {
510+
return day(findSourceColumn(sourceName), targetName);
511+
}
512+
513+
private Builder day(Types.NestedField sourceColumn, String targetName) {
486514
checkAndAddPartitionName(targetName);
487-
Types.NestedField sourceColumn = findSourceColumn(sourceName);
488515
PartitionField field =
489516
new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.day());
490517
checkForRedundantPartitions(field);
@@ -493,12 +520,17 @@ public Builder day(String sourceName, String targetName) {
493520
}
494521

495522
public Builder day(String sourceName) {
496-
return day(sourceName, sourceName + "_day");
523+
Types.NestedField sourceColumn = findSourceColumn(sourceName);
524+
String columnName = schema.findColumnName(sourceColumn.fieldId());
525+
return day(sourceColumn, columnName + "_day");
497526
}
498527

499528
public Builder hour(String sourceName, String targetName) {
529+
return hour(findSourceColumn(sourceName), targetName);
530+
}
531+
532+
private Builder hour(Types.NestedField sourceColumn, String targetName) {
500533
checkAndAddPartitionName(targetName);
501-
Types.NestedField sourceColumn = findSourceColumn(sourceName);
502534
PartitionField field =
503535
new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.hour());
504536
checkForRedundantPartitions(field);
@@ -507,37 +539,52 @@ public Builder hour(String sourceName, String targetName) {
507539
}
508540

509541
public Builder hour(String sourceName) {
510-
return hour(sourceName, sourceName + "_hour");
542+
Types.NestedField sourceColumn = findSourceColumn(sourceName);
543+
String columnName = schema.findColumnName(sourceColumn.fieldId());
544+
return hour(sourceColumn, columnName + "_hour");
511545
}
512546

513547
public Builder bucket(String sourceName, int numBuckets, String targetName) {
548+
return bucket(findSourceColumn(sourceName), numBuckets, targetName);
549+
}
550+
551+
private Builder bucket(Types.NestedField sourceColumn, int numBuckets, String targetName) {
514552
checkAndAddPartitionName(targetName);
515-
Types.NestedField sourceColumn = findSourceColumn(sourceName);
516553
fields.add(
517554
new PartitionField(
518555
sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.bucket(numBuckets)));
519556
return this;
520557
}
521558

522559
public Builder bucket(String sourceName, int numBuckets) {
523-
return bucket(sourceName, numBuckets, sourceName + "_bucket");
560+
Types.NestedField sourceColumn = findSourceColumn(sourceName);
561+
String columnName = schema.findColumnName(sourceColumn.fieldId());
562+
return bucket(sourceColumn, numBuckets, columnName + "_bucket");
524563
}
525564

526565
public Builder truncate(String sourceName, int width, String targetName) {
566+
return truncate(findSourceColumn(sourceName), width, targetName);
567+
}
568+
569+
private Builder truncate(Types.NestedField sourceColumn, int width, String targetName) {
527570
checkAndAddPartitionName(targetName);
528-
Types.NestedField sourceColumn = findSourceColumn(sourceName);
529571
fields.add(
530572
new PartitionField(
531573
sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.truncate(width)));
532574
return this;
533575
}
534576

535577
public Builder truncate(String sourceName, int width) {
536-
return truncate(sourceName, width, sourceName + "_trunc");
578+
Types.NestedField sourceColumn = findSourceColumn(sourceName);
579+
String columnName = schema.findColumnName(sourceColumn.fieldId());
580+
return truncate(sourceColumn, width, columnName + "_trunc");
537581
}
538582

539583
public Builder alwaysNull(String sourceName, String targetName) {
540-
Types.NestedField sourceColumn = findSourceColumn(sourceName);
584+
return alwaysNull(findSourceColumn(sourceName), targetName);
585+
}
586+
587+
private Builder alwaysNull(Types.NestedField sourceColumn, String targetName) {
541588
checkAndAddPartitionName(
542589
targetName, sourceColumn.fieldId()); // can duplicate a source column name
543590
fields.add(
@@ -547,7 +594,9 @@ public Builder alwaysNull(String sourceName, String targetName) {
547594
}
548595

549596
public Builder alwaysNull(String sourceName) {
550-
return alwaysNull(sourceName, sourceName + "_null");
597+
Types.NestedField sourceColumn = findSourceColumn(sourceName);
598+
String columnName = schema.findColumnName(sourceColumn.fieldId());
599+
return alwaysNull(sourceColumn, columnName + "_null");
551600
}
552601

553602
// add a partition field with an auto-increment partition field id starting from

0 commit comments

Comments
 (0)