jasonf20 · jasonf20 · Dec 12, 2023 · Dec 12, 2023 · Dec 12, 2023 · Dec 12, 2023
diff --git a/.github/workflows/publish-snapshot.yml b/.github/workflows/publish-snapshot.yml
@@ -41,4 +41,4 @@ jobs:
       - run: |
           ./gradlew printVersion
           ./gradlew -DallVersions publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
-          ./gradlew -DflinkVersions= -DsparkVersions=3.2,3.3,3.4,3.5 -DscalaVersion=2.13 -DhiveVersions= publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
+          ./gradlew -DflinkVersions= -DsparkVersions=3.3,3.4,3.5 -DscalaVersion=2.13 -DhiveVersions= publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml
@@ -58,7 +58,7 @@ jobs:
     strategy:
       matrix:
         jvm: [8, 11]
-        spark: ['3.2', '3.3', '3.4', '3.5']
+        spark: ['3.3', '3.4', '3.5']
     env:
       SPARK_LOCAL_IP: localhost
     steps:
@@ -88,7 +88,7 @@ jobs:
     strategy:
       matrix:
         jvm: [8, 11]
-        spark: ['3.2','3.3','3.4','3.5']
+        spark: ['3.3','3.4','3.5']
     env:
       SPARK_LOCAL_IP: localhost
     steps:

diff --git a/.gitignore b/.gitignore
@@ -27,7 +27,6 @@ lib/
 site/site
 
 # benchmark output
-spark/v3.2/spark/benchmark/*
 spark/v3.3/spark/benchmark/*
 spark/v3.3/spark-extensions/benchmark/*
 spark/v3.4/spark/benchmark/*

diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml
@@ -874,6 +874,15 @@ acceptedBreaks:
       justification: "Static utility class - should not have public constructor"
   "1.4.0":
     org.apache.iceberg:iceberg-core:
+    - code: "java.class.removed"
+      old: "class org.apache.iceberg.actions.BinPackStrategy"
+      justification: "Removing deprecated code"
+    - code: "java.class.removed"
+      old: "class org.apache.iceberg.actions.SortStrategy"
+      justification: "Removing deprecated code"
+    - code: "java.class.removed"
+      old: "interface org.apache.iceberg.actions.RewriteStrategy"
+      justification: "Removing deprecated code"
     - code: "java.field.serialVersionUIDChanged"
       new: "field org.apache.iceberg.util.SerializableMap<K, V>.serialVersionUID"
       justification: "Serialization is not be used"

diff --git a/README.md b/README.md
@@ -31,15 +31,17 @@ Background and documentation is available at <https://iceberg.apache.org>
 
 Iceberg is under active development at the Apache Software Foundation.
 
-The core Java library that tracks table snapshots and metadata is complete, but still evolving. Current work is focused on adding row-level deletes and upserts, and integration work with new engines like Flink and Hive.
+The [Iceberg format specification][iceberg-spec] is stable and new features are added with each version.
 
-The [Iceberg format specification][iceberg-spec] is being actively updated and is open for comment. Until the specification is complete and released, it carries no compatibility guarantees. The spec is currently evolving as the Java reference implementation changes.
+The core Java library is located in this repository and is the reference implementation for other libraries.
 
-[Java API javadocs][iceberg-javadocs] are available for the main.
+[Documentation][iceberg-docs] is available for all libraries and integrations.
 
-[iceberg-javadocs]: https://iceberg.apache.org/javadoc/latest
-[iceberg-spec]: https://iceberg.apache.org/spec
+Current work is tracked in the [roadmap][roadmap].
 
+[iceberg-docs]: https://iceberg.apache.org/docs/latest/
+[iceberg-spec]: https://iceberg.apache.org/spec
+[roadmap]: https://iceberg.apache.org/roadmap/
 
 ## Collaboration
 

diff --git a/api/src/main/java/org/apache/iceberg/AppendFiles.java b/api/src/main/java/org/apache/iceberg/AppendFiles.java
@@ -42,16 +42,16 @@ public interface AppendFiles extends SnapshotUpdate<AppendFiles> {
    * <p>The manifest must contain only appended files. All files in the manifest will be appended to
    * the table in the snapshot created by this update.
    *
-   * <p>By default, the manifest will be rewritten to assign all entries this update's snapshot ID.
-   * In that case, it is always the responsibility of the caller to manage the lifecycle of the
-   * original manifest.
+   * <p>The manifest will be used directly if snapshot ID inheritance is enabled (all tables with
+   * the format version &gt; 1 or if the inheritance is enabled explicitly via table properties).
+   * Otherwise, the manifest will be rewritten to assign all entries this update's snapshot ID.
    *
-   * <p>If manifest entries are allowed to inherit the snapshot ID assigned on commit, the manifest
-   * should never be deleted manually if the commit succeeds as it will become part of the table
-   * metadata and will be cleaned up on expiry. If the manifest gets merged with others while
-   * preparing a new snapshot, it will be deleted automatically if this operation is successful. If
-   * the commit fails, the manifest will never be deleted and it is up to the caller whether to
-   * delete or reuse it.
+   * <p>If the manifest is rewritten, it is always the responsibility of the caller to manage the
+   * lifecycle of the original manifest. If the manifest is used directly, it should never be
+   * deleted manually if the commit succeeds as it will become part of the table metadata and will
+   * be cleaned upon expiry. If the manifest gets merged with others while preparing a new snapshot,
+   * it will be deleted automatically if this operation is successful. If the commit fails, the
+   * manifest will never be deleted, and it is up to the caller whether to delete or reuse it.
    *
    * @param file a manifest file
    * @return this for method chaining

diff --git a/api/src/main/java/org/apache/iceberg/StreamingUpdate.java b/api/src/main/java/org/apache/iceberg/StreamingUpdate.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.expressions.Expression;
+
+/**
+ * API for appending sequential updates to a table
+ *
+ * <p>This API accumulates batches of file additions and deletions by order, produces a new {@link
+ * Snapshot} of the changes where each batch is added to a new data sequence number, and commits
+ * that snapshot as the current.
+ *
+ * <p>When committing, these changes will be applied to the latest table snapshot. Commit conflicts
+ * will be resolved by applying the changes to the new latest snapshot and reattempting the commit.
+ * If any of the deleted files are no longer in the latest snapshot when reattempting, the commit
+ * will throw a {@link ValidationException}.
+ */
+public interface StreamingUpdate extends SnapshotUpdate<StreamingUpdate> {
+
+  /**
+   * Start a new batch of changes. The changes in this batch will have a sequence number larger than
+   * the changes in the previous batches.
+   *
+   * @return this for method chaining
+   */
+  default StreamingUpdate newBatch() {
+    throw new UnsupportedOperationException(
+        this.getClass().getName() + " does not implement newBatch");
+  }
+
+  /**
+   * Add a new data file to the current batch. All files in this batch will receive the same data
+   * sequence number.
+   *
+   * @param dataFile a new data file
+   * @return this for method chaining
+   */
+  default StreamingUpdate addFile(DataFile dataFile) {
+    throw new UnsupportedOperationException(
+        this.getClass().getName() + " does not implement addFile");
+  }
+
+  /**
+   * Add a new delete file to the current batch. All files in this batch will receive the same data
+   * sequence number.
+   *
+   * @param deleteFile a new delete file
+   * @return this for method chaining
+   */
+  default StreamingUpdate addFile(DeleteFile deleteFile) {
+    throw new UnsupportedOperationException(
+        this.getClass().getName() + " does not implement addFile");
+  }
+
+  /**
+   * Set the snapshot ID used in any reads for this operation.
+   *
+   * <p>Validations will check changes after this snapshot ID. If the from snapshot is not set, all
+   * ancestor snapshots through the table's initial snapshot are validated.
+   *
+   * @param snapshotId a snapshot ID
+   * @return this for method chaining
+   */
+  StreamingUpdate validateFromSnapshot(long snapshotId);
+
+  /**
+   * Sets a conflict detection filter used to validate concurrently added data and delete files.
+   *
+   * <p>If not called, a true literal will be used as the conflict detection filter.
+   *
+   * @param conflictDetectionFilter an expression on rows in the table
+   * @return this for method chaining
+   */
+  StreamingUpdate conflictDetectionFilter(Expression conflictDetectionFilter);
+
+  /**
+   * Enables validation that data files added concurrently do not conflict with this commit's
+   * operation.
+   *
+   * <p>This method should be called when the table is queried to determine which files to
+   * delete/append. If a concurrent operation commits a new file after the data was read and that
+   * file might contain rows matching the specified conflict detection filter, this operation will
+   * detect this during retries and fail.
+   *
+   * <p>Calling this method is required to maintain serializable isolation for update/delete
+   * operations. Otherwise, the isolation level will be snapshot isolation.
+   *
+   * <p>Validation uses the conflict detection filter passed to {@link
+   * #conflictDetectionFilter(Expression)} and applies to operations that happened after the
+   * snapshot passed to {@link #validateFromSnapshot(long)}.
+   *
+   * @return this for method chaining
+   */
+  StreamingUpdate validateNoConflictingDataFiles();
+
+  /**
+   * Enables validation that delete files added concurrently do not conflict with this commit's
+   * operation.
+   *
+   * <p>This method must be called when the table is queried to produce a row delta for UPDATE and
+   * MERGE operations independently of the isolation level. Calling this method isn't required for
+   * DELETE operations as it is OK to delete a record that is also deleted concurrently.
+   *
+   * <p>Validation uses the conflict detection filter passed to {@link
+   * #conflictDetectionFilter(Expression)} and applies to operations that happened after the
+   * snapshot passed to {@link #validateFromSnapshot(long)}.
+   *
+   * @return this for method chaining
+   */
+  StreamingUpdate validateNoConflictingDeleteFiles();
+}
diff --git a/api/src/main/java/org/apache/iceberg/Table.java b/api/src/main/java/org/apache/iceberg/Table.java
@@ -233,6 +233,17 @@ default AppendFiles newFastAppend() {
    */
   RewriteFiles newRewrite();
 
+  /**
+   * Create a new {@link StreamingUpdate streaming update API} to append sequential upserts to the
+   * table.
+   *
+   * @return a new {@link StreamingUpdate}
+   */
+  default StreamingUpdate newStreamingUpdate() {
+    throw new UnsupportedOperationException(
+        this.getClass().getName() + " doesn't implement newStreamingUpdate()");
+  }
+
   /**
    * Create a new {@link RewriteManifests rewrite manifests API} to replace manifests for this table
    * and commit.

diff --git a/api/src/main/java/org/apache/iceberg/Transaction.java b/api/src/main/java/org/apache/iceberg/Transaction.java
@@ -95,6 +95,17 @@ default AppendFiles newFastAppend() {
    */
   RewriteFiles newRewrite();
 
+  /**
+   * Create a new {@link StreamingUpdate streaming update API} to append sequential upserts to the
+   * table.
+   *
+   * @return a new {@link StreamingUpdate}
+   */
+  default StreamingUpdate newStreamingUpdate() {
+    throw new UnsupportedOperationException(
+        this.getClass().getName() + " doesn't implement newStreamingUpdate()");
+  }
+
   /**
    * Create a new {@link RewriteManifests rewrite manifests API} to replace manifests for this
    * table.

diff --git a/api/src/main/java/org/apache/iceberg/encryption/EncryptedOutputFile.java b/api/src/main/java/org/apache/iceberg/encryption/EncryptedOutputFile.java
@@ -37,4 +37,9 @@ public interface EncryptedOutputFile {
    * #encryptingOutputFile()}.
    */
   EncryptionKeyMetadata keyMetadata();
+
+  /** Underlying output file for native encryption. */
+  default OutputFile plainOutputFile() {
+    throw new UnsupportedOperationException("Not implemented");
+  }
 }
diff --git a/api/src/main/java/org/apache/iceberg/exceptions/RuntimeIOException.java b/api/src/main/java/org/apache/iceberg/exceptions/RuntimeIOException.java
@@ -22,11 +22,7 @@
 import java.io.IOException;
 import java.io.UncheckedIOException;
 
-/**
- * @deprecated Use java.io.UncheckedIOException directly instead.
- *     <p>Exception used to wrap {@link IOException} as a {@link RuntimeException} and add context.
- */
-@Deprecated
+/** Exception used to wrap {@link IOException} as a {@link RuntimeException} and add context. */
 public class RuntimeIOException extends UncheckedIOException {
 
   public RuntimeIOException(IOException cause) {

diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
@@ -21,7 +21,6 @@
 import java.io.Serializable;
 import java.util.Collection;
 import java.util.Iterator;
-import java.util.Objects;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
@@ -168,22 +167,29 @@ public void clear() {
   }
 
   @Override
-  public boolean equals(Object o) {
-    if (this == o) {
+  public boolean equals(Object other) {
+    if (this == other) {
       return true;
+    } else if (!(other instanceof Set)) {
+      return false;
     }
 
-    if (o == null || getClass() != o.getClass()) {
+    Set<?> that = (Set<?>) other;
+
+    if (size() != that.size()) {
       return false;
     }
 
-    CharSequenceSet that = (CharSequenceSet) o;
-    return wrapperSet.equals(that.wrapperSet);
+    try {
+      return containsAll(that);
+    } catch (ClassCastException | NullPointerException unused) {
+      return false;
+    }
   }
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(wrapperSet);
+    return wrapperSet.stream().mapToInt(CharSequenceWrapper::hashCode).sum();
   }
 
   @Override

diff --git a/...e/iceberg/view/SQLViewRepresentation.java → ...e/iceberg/view/SQLViewRepresentation.java b/...e/iceberg/view/SQLViewRepresentation.java → ...e/iceberg/view/SQLViewRepresentation.java
@@ -18,9 +18,7 @@
  */
 package org.apache.iceberg.view;
 
-import org.immutables.value.Value;
-
-@Value.Immutable
+/** SQLViewRepresentation represents views in SQL with a given dialect */
 public interface SQLViewRepresentation extends ViewRepresentation {
 
   @Override

diff --git a/api/src/main/java/org/apache/iceberg/view/View.java b/api/src/main/java/org/apache/iceberg/view/View.java
@@ -121,4 +121,15 @@ default UpdateLocation updateLocation() {
   default UUID uuid() {
     throw new UnsupportedOperationException("Retrieving a view's uuid is not supported");
   }
+
+  /**
+   * Returns the view representation for the given SQL dialect
+   *
+   * @return the view representation for the given SQL dialect, or null if no representation could
+   *     be resolved
+   */
+  default SQLViewRepresentation sqlFor(String dialect) {
+    throw new UnsupportedOperationException(
+        "Resolving a sql with a given dialect is not supported");
+  }
 }