apache · guykhazma · May 9, 2025
diff --git a/...4/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotUpdateSparkAction.java b/...4/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotUpdateSparkAction.java
@@ -21,6 +21,7 @@
 import java.util.Map;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.spark.CommitMetadata;
 import org.apache.spark.sql.SparkSession;
 
 abstract class BaseSnapshotUpdateSparkAction<ThisT> extends BaseSparkAction<ThisT> {
@@ -37,11 +38,17 @@ public ThisT snapshotProperty(String property, String value) {
   }
 
   protected void commit(org.apache.iceberg.SnapshotUpdate<?> update) {
+    if (!CommitMetadata.commitProperties().isEmpty()) {
+      summary.putAll(CommitMetadata.commitProperties());
+    }
     summary.forEach(update::set);
     update.commit();
   }
 
   protected Map<String, String> commitSummary() {
+    if (!CommitMetadata.commitProperties().isEmpty()) {
+      summary.putAll(CommitMetadata.commitProperties());
+    }
     return ImmutableMap.copyOf(summary);
   }
 }
diff --git a/.../spark/src/test/java/org/apache/iceberg/spark/actions/TestRemoveDanglingDeleteAction.java b/.../spark/src/test/java/org/apache/iceberg/spark/actions/TestRemoveDanglingDeleteAction.java
@@ -24,6 +24,7 @@
 import java.io.File;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
@@ -37,12 +38,16 @@
 import org.apache.iceberg.Parameters;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotSummary;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.actions.RemoveDanglingDeleteFiles;
 import org.apache.iceberg.hadoop.HadoopTables;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.spark.CommitMetadata;
 import org.apache.iceberg.spark.TestBase;
 import org.apache.iceberg.types.Types;
 import org.apache.spark.sql.Encoders;
@@ -434,4 +439,57 @@ public void testUnpartitionedTable() {
         SparkActions.get().removeDanglingDeleteFiles(table).execute();
     assertThat(result.removedDeleteFiles()).as("No-op for unpartitioned tables").isEmpty();
   }
+
+  @TestTemplate
+  public void testRemoveDanglingDeleteCommitProperties() throws InterruptedException {
+    setupPartitionedTable();
+    // Add Data Files
+    table.newAppend().appendFile(FILE_A).appendFile(FILE_C).appendFile(FILE_D).commit();
+    // Add Data Files with EQ and POS deletes
+    table
+        .newRowDelta()
+        .addRows(FILE_A2)
+        .addRows(FILE_B2)
+        .addRows(FILE_C2)
+        .addRows(FILE_D2)
+        .addDeletes(FILE_A_POS_DELETES)
+        .addDeletes(FILE_A2_POS_DELETES)
+        .addDeletes(FILE_A_EQ_DELETES)
+        .addDeletes(FILE_A2_EQ_DELETES)
+        .addDeletes(FILE_B_POS_DELETES)
+        .addDeletes(FILE_B2_POS_DELETES)
+        .addDeletes(FILE_B_EQ_DELETES)
+        .addDeletes(FILE_B2_EQ_DELETES)
+        .commit();
+    Thread removeDanglingDeletesThread =
+        new Thread(
+            () -> {
+              Map<String, String> properties =
+                  ImmutableMap.of(
+                      "writer-thread",
+                      String.valueOf(Thread.currentThread().getName()),
+                      SnapshotSummary.EXTRA_METADATA_PREFIX + "extra-key",
+                      "someValue",
+                      SnapshotSummary.EXTRA_METADATA_PREFIX + "another-key",
+                      "anotherValue");
+              CommitMetadata.withCommitProperties(
+                  properties,
+                  () -> {
+                    SparkActions.get().removeDanglingDeleteFiles(table).execute();
+                    return 0;
+                  },
+                  RuntimeException.class);
+            });
+    removeDanglingDeletesThread.setName("test-extra-commit-message-remove-dangling-delete");
+    removeDanglingDeletesThread.start();
+    removeDanglingDeletesThread.join();
+
+    table.refresh();
+    List<Snapshot> snapshots = Lists.newArrayList(table.snapshots());
+    assertThat(snapshots.get(1).summary()).doesNotContainKey("writer-thread");
+    assertThat(snapshots.get(2).summary())
+        .containsEntry("writer-thread", "test-extra-commit-message-remove-dangling-delete")
+        .containsEntry("extra-key", "someValue")
+        .containsEntry("another-key", "anotherValue");
+  }
 }
diff --git a/...v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/...v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -102,6 +102,7 @@
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Streams;
+import org.apache.iceberg.spark.CommitMetadata;
 import org.apache.iceberg.spark.FileRewriteCoordinator;
 import org.apache.iceberg.spark.ScanTaskSetManager;
 import org.apache.iceberg.spark.SparkTableUtil;
@@ -1888,6 +1889,41 @@ public void testZOrderRewriteWithSpecificOutputSpecId() {
     shouldRewriteDataFilesWithPartitionSpec(table, outputSpecId);
   }
 
+  @TestTemplate
+  public void testRewriteDataFilesCommitProperties() throws InterruptedException {
+    Table table = createTablePartitioned(4, 2);
+    Thread rewriteDataFilesThread =
+        new Thread(
+            () -> {
+              Map<String, String> properties =
+                  ImmutableMap.of(
+                      "writer-thread",
+                      String.valueOf(Thread.currentThread().getName()),
+                      SnapshotSummary.EXTRA_METADATA_PREFIX + "extra-key",
+                      "someValue",
+                      SnapshotSummary.EXTRA_METADATA_PREFIX + "another-key",
+                      "anotherValue");
+              CommitMetadata.withCommitProperties(
+                  properties,
+                  () -> {
+                    basicRewrite(table).execute();
+                    return 0;
+                  },
+                  RuntimeException.class);
+            });
+    rewriteDataFilesThread.setName("test-extra-commit-message-rewrite-data-files");
+    rewriteDataFilesThread.start();
+    rewriteDataFilesThread.join();
+
+    table.refresh();
+    List<Snapshot> snapshots = Lists.newArrayList(table.snapshots());
+    assertThat(snapshots.get(0).summary()).doesNotContainKey("writer-thread");
+    assertThat(snapshots.get(1).summary())
+        .containsEntry("writer-thread", "test-extra-commit-message-rewrite-data-files")
+        .containsEntry("extra-key", "someValue")
+        .containsEntry("another-key", "anotherValue");
+  }
+
   protected void shouldRewriteDataFilesWithPartitionSpec(Table table, int outputSpecId) {
     List<DataFile> rewrittenFiles = currentDataFiles(table);
     assertThat(rewrittenFiles).allMatch(file -> file.specId() == outputSpecId);

diff --git a/...v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteManifestsAction.java b/...v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteManifestsAction.java
@@ -57,6 +57,7 @@
 import org.apache.iceberg.RowDelta;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotSummary;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
@@ -73,6 +74,7 @@
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.spark.CommitMetadata;
 import org.apache.iceberg.spark.SparkTableUtil;
 import org.apache.iceberg.spark.SparkWriteOptions;
 import org.apache.iceberg.spark.TestBase;
@@ -1013,6 +1015,63 @@ public void testRewriteManifestsAfterUpgradeToV3() throws IOException {
     }
   }
 
+  @TestTemplate
+  public void testRewriteManifestsActionCommitProperties() throws InterruptedException {
+    PartitionSpec spec = PartitionSpec.unpartitioned();
+    Map<String, String> options = Maps.newHashMap();
+    options.put(TableProperties.FORMAT_VERSION, String.valueOf(formatVersion));
+    options.put(TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED, snapshotIdInheritanceEnabled);
+    Table table = TABLES.create(SCHEMA, spec, options, tableLocation);
+
+    List<ThreeColumnRecord> records1 =
+        Lists.newArrayList(
+            new ThreeColumnRecord(1, null, "AAAA"), new ThreeColumnRecord(1, "BBBBBBBBBB", "BBBB"));
+    writeRecords(records1);
+
+    List<ThreeColumnRecord> records2 =
+        Lists.newArrayList(
+            new ThreeColumnRecord(2, "CCCCCCCCCC", "CCCC"),
+            new ThreeColumnRecord(2, "DDDDDDDDDD", "DDDD"));
+    writeRecords(records2);
+
+    Thread rewriteManifestThread =
+        new Thread(
+            () -> {
+              Map<String, String> properties =
+                  ImmutableMap.of(
+                      "writer-thread",
+                      String.valueOf(Thread.currentThread().getName()),
+                      SnapshotSummary.EXTRA_METADATA_PREFIX + "extra-key",
+                      "someValue",
+                      SnapshotSummary.EXTRA_METADATA_PREFIX + "another-key",
+                      "anotherValue");
+              CommitMetadata.withCommitProperties(
+                  properties,
+                  () -> {
+                    SparkActions actions = SparkActions.get();
+
+                    actions
+                        .rewriteManifests(table)
+                        .rewriteIf(manifest -> true)
+                        .option(RewriteManifestsSparkAction.USE_CACHING, useCaching)
+                        .execute();
+                    return 0;
+                  },
+                  RuntimeException.class);
+            });
+    rewriteManifestThread.setName("test-extra-commit-message-rewrite-manifest");
+    rewriteManifestThread.start();
+    rewriteManifestThread.join();
+
+    table.refresh();
+    List<Snapshot> snapshots = Lists.newArrayList(table.snapshots());
+    assertThat(snapshots.get(1).summary()).doesNotContainKey("writer-thread");
+    assertThat(snapshots.get(2).summary())
+        .containsEntry("writer-thread", "test-extra-commit-message-rewrite-manifest")
+        .containsEntry("extra-key", "someValue")
+        .containsEntry("another-key", "anotherValue");
+  }
+
   private List<ThreeColumnRecord> actualRecords() {
     return spark
         .read()

diff --git a/...5/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotUpdateSparkAction.java b/...5/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotUpdateSparkAction.java
@@ -21,6 +21,7 @@
 import java.util.Map;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.spark.CommitMetadata;
 import org.apache.spark.sql.SparkSession;
 
 abstract class BaseSnapshotUpdateSparkAction<ThisT> extends BaseSparkAction<ThisT> {
@@ -37,11 +38,17 @@ public ThisT snapshotProperty(String property, String value) {
   }
 
   protected void commit(org.apache.iceberg.SnapshotUpdate<?> update) {
+    if (!CommitMetadata.commitProperties().isEmpty()) {
+      summary.putAll(CommitMetadata.commitProperties());
+    }
     summary.forEach(update::set);
     update.commit();
   }
 
   protected Map<String, String> commitSummary() {
+    if (!CommitMetadata.commitProperties().isEmpty()) {
+      summary.putAll(CommitMetadata.commitProperties());
+    }
     return ImmutableMap.copyOf(summary);
   }
 }
diff --git a/.../spark/src/test/java/org/apache/iceberg/spark/actions/TestRemoveDanglingDeleteAction.java b/.../spark/src/test/java/org/apache/iceberg/spark/actions/TestRemoveDanglingDeleteAction.java
@@ -25,6 +25,7 @@
 import java.io.File;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
@@ -39,13 +40,16 @@
 import org.apache.iceberg.Parameters;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotSummary;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.actions.RemoveDanglingDeleteFiles;
 import org.apache.iceberg.hadoop.HadoopTables;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.spark.CommitMetadata;
 import org.apache.iceberg.spark.TestBase;
 import org.apache.iceberg.types.Types;
 import org.apache.spark.sql.Encoders;
@@ -515,6 +519,91 @@ public void testPartitionedDeletesWithDanglingDvs() {
     assertThat(actualAfter).containsExactlyInAnyOrderElementsOf(expectedAfter);
   }
 
+  @TestTemplate
+  public void testRemoveDanglingDeleteCommitProperties() throws InterruptedException {
+    setupPartitionedTable();
+
+    // Add Data Files
+    table.newAppend().appendFile(FILE_B).appendFile(FILE_C).appendFile(FILE_D).commit();
+
+    // Add Delete Files
+    DeleteFile fileADeletes = fileADeletes();
+    DeleteFile fileA2Deletes = fileA2Deletes();
+    DeleteFile fileBDeletes = fileBDeletes();
+    DeleteFile fileB2Deletes = fileB2Deletes();
+    table
+        .newRowDelta()
+        .addDeletes(fileADeletes)
+        .addDeletes(fileA2Deletes)
+        .addDeletes(fileBDeletes)
+        .addDeletes(fileB2Deletes)
+        .addDeletes(FILE_A_EQ_DELETES)
+        .addDeletes(FILE_A2_EQ_DELETES)
+        .addDeletes(FILE_B_EQ_DELETES)
+        .addDeletes(FILE_B2_EQ_DELETES)
+        .commit();
+
+    // Add More Data Files
+    table
+        .newAppend()
+        .appendFile(FILE_A2)
+        .appendFile(FILE_B2)
+        .appendFile(FILE_C2)
+        .appendFile(FILE_D2)
+        .commit();
+
+    List<Tuple2<Long, String>> actual = allEntries();
+    List<Tuple2<Long, String>> expected =
+        ImmutableList.of(
+            Tuple2.apply(1L, FILE_B.location()),
+            Tuple2.apply(1L, FILE_C.location()),
+            Tuple2.apply(1L, FILE_D.location()),
+            Tuple2.apply(2L, FILE_A_EQ_DELETES.location()),
+            Tuple2.apply(2L, fileADeletes.location()),
+            Tuple2.apply(2L, FILE_A2_EQ_DELETES.location()),
+            Tuple2.apply(2L, fileA2Deletes.location()),
+            Tuple2.apply(2L, FILE_B_EQ_DELETES.location()),
+            Tuple2.apply(2L, fileBDeletes.location()),
+            Tuple2.apply(2L, FILE_B2_EQ_DELETES.location()),
+            Tuple2.apply(2L, fileB2Deletes.location()),
+            Tuple2.apply(3L, FILE_A2.location()),
+            Tuple2.apply(3L, FILE_B2.location()),
+            Tuple2.apply(3L, FILE_C2.location()),
+            Tuple2.apply(3L, FILE_D2.location()));
+    assertThat(actual).containsExactlyInAnyOrderElementsOf(expected);
+
+    Thread removeDanglingDeletesThread =
+        new Thread(
+            () -> {
+              Map<String, String> properties =
+                  ImmutableMap.of(
+                      "writer-thread",
+                      String.valueOf(Thread.currentThread().getName()),
+                      SnapshotSummary.EXTRA_METADATA_PREFIX + "extra-key",
+                      "someValue",
+                      SnapshotSummary.EXTRA_METADATA_PREFIX + "another-key",
+                      "anotherValue");
+              CommitMetadata.withCommitProperties(
+                  properties,
+                  () -> {
+                    SparkActions.get().removeDanglingDeleteFiles(table).execute();
+                    return 0;
+                  },
+                  RuntimeException.class);
+            });
+    removeDanglingDeletesThread.setName("test-extra-commit-message-remove-dangling-delete");
+    removeDanglingDeletesThread.start();
+    removeDanglingDeletesThread.join();
+
+    table.refresh();
+    List<Snapshot> snapshots = Lists.newArrayList(table.snapshots());
+    assertThat(snapshots.get(2).summary()).doesNotContainKey("writer-thread");
+    assertThat(snapshots.get(3).summary())
+        .containsEntry("writer-thread", "test-extra-commit-message-remove-dangling-delete")
+        .containsEntry("extra-key", "someValue")
+        .containsEntry("another-key", "anotherValue");
+  }
+
   private List<Tuple2<Long, String>> liveEntries() {
     return spark
         .read()