4141import org .apache .iceberg .relocated .com .google .common .collect .Lists ;
4242import org .apache .iceberg .relocated .com .google .common .collect .Sets ;
4343import org .apache .iceberg .util .CharSequenceSet ;
44+ import org .apache .iceberg .util .Pair ;
4445import org .slf4j .Logger ;
4546import org .slf4j .LoggerFactory ;
4647
@@ -62,6 +63,9 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
6263 ImmutableSet .of (DataOperations .OVERWRITE , DataOperations .REPLACE , DataOperations .DELETE );
6364 private static final Set <String > VALIDATE_DATA_FILES_EXIST_SKIP_DELETE_OPERATIONS =
6465 ImmutableSet .of (DataOperations .OVERWRITE , DataOperations .REPLACE );
66+ // delete files can be added in "overwrite" or "delete" operations
67+ private static final Set <String > VALIDATE_REPLACED_DATA_FILES_OPERATIONS =
68+ ImmutableSet .of (DataOperations .OVERWRITE , DataOperations .DELETE );
6569
6670 private final String tableName ;
6771 private final TableOperations ops ;
@@ -253,28 +257,10 @@ protected void validateAddedDataFiles(TableMetadata base, Long startingSnapshotI
253257 return ;
254258 }
255259
256- List <ManifestFile > manifests = Lists .newArrayList ();
257- Set <Long > newSnapshots = Sets .newHashSet ();
258-
259- Long currentSnapshotId = base .currentSnapshot ().snapshotId ();
260- while (currentSnapshotId != null && !currentSnapshotId .equals (startingSnapshotId )) {
261- Snapshot currentSnapshot = ops .current ().snapshot (currentSnapshotId );
262-
263- ValidationException .check (currentSnapshot != null ,
264- "Cannot determine history between starting snapshot %s and current %s" ,
265- startingSnapshotId , currentSnapshotId );
266-
267- if (VALIDATE_ADDED_FILES_OPERATIONS .contains (currentSnapshot .operation ())) {
268- newSnapshots .add (currentSnapshotId );
269- for (ManifestFile manifest : currentSnapshot .dataManifests ()) {
270- if (manifest .snapshotId () == (long ) currentSnapshotId ) {
271- manifests .add (manifest );
272- }
273- }
274- }
275-
276- currentSnapshotId = currentSnapshot .parentId ();
277- }
260+ Pair <List <ManifestFile >, Set <Long >> history =
261+ validationHistory (base , startingSnapshotId , VALIDATE_ADDED_FILES_OPERATIONS , ManifestContent .DATA );
262+ List <ManifestFile > manifests = history .first ();
263+ Set <Long > newSnapshots = history .second ();
278264
279265 ManifestGroup conflictGroup = new ManifestGroup (ops .io (), manifests , ImmutableList .of ())
280266 .caseSensitive (caseSensitive )
@@ -297,6 +283,39 @@ protected void validateAddedDataFiles(TableMetadata base, Long startingSnapshotI
297283 }
298284 }
299285
286+ /**
287+ * Validates that no new delete files that must be applied to the given data files have been added to the table since
288+ * a starting snapshot.
289+ *
290+ * @param base table metadata to validate
291+ * @param startingSnapshotId id of the snapshot current at the start of the operation
292+ * @param dataFiles data files to validate have no new row deletes
293+ */
294+ protected void validateNoNewDeletesForDataFiles (TableMetadata base , Long startingSnapshotId ,
295+ Iterable <DataFile > dataFiles ) {
296+ // if there is no current table state, no files have been added
297+ if (base .currentSnapshot () == null ) {
298+ return ;
299+ }
300+
301+ Pair <List <ManifestFile >, Set <Long >> history =
302+ validationHistory (base , startingSnapshotId , VALIDATE_REPLACED_DATA_FILES_OPERATIONS , ManifestContent .DELETES );
303+ List <ManifestFile > deleteManifests = history .first ();
304+
305+ long startingSequenceNumber = startingSnapshotId == null ? 0 : base .snapshot (startingSnapshotId ).sequenceNumber ();
306+ DeleteFileIndex deletes = DeleteFileIndex .builderFor (ops .io (), deleteManifests )
307+ .afterSequenceNumber (startingSequenceNumber )
308+ .specsById (ops .current ().specsById ())
309+ .build ();
310+
311+ for (DataFile dataFile : dataFiles ) {
312+ // if any delete is found that applies to files written in or before the starting snapshot, fail
313+ if (deletes .forDataFile (startingSequenceNumber , dataFile ).length > 0 ) {
314+ throw new ValidationException ("Cannot commit, found new delete for replaced data file: %s" , dataFile );
315+ }
316+ }
317+ }
318+
300319 @ SuppressWarnings ("CollectionUndefinedEquality" )
301320 protected void validateDataFilesExist (TableMetadata base , Long startingSnapshotId ,
302321 CharSequenceSet requiredDataFiles , boolean skipDeletes ) {
@@ -309,6 +328,31 @@ protected void validateDataFilesExist(TableMetadata base, Long startingSnapshotI
309328 VALIDATE_DATA_FILES_EXIST_SKIP_DELETE_OPERATIONS :
310329 VALIDATE_DATA_FILES_EXIST_OPERATIONS ;
311330
331+ Pair <List <ManifestFile >, Set <Long >> history =
332+ validationHistory (base , startingSnapshotId , matchingOperations , ManifestContent .DATA );
333+ List <ManifestFile > manifests = history .first ();
334+ Set <Long > newSnapshots = history .second ();
335+
336+ ManifestGroup matchingDeletesGroup = new ManifestGroup (ops .io (), manifests , ImmutableList .of ())
337+ .filterManifestEntries (entry -> entry .status () != ManifestEntry .Status .ADDED &&
338+ newSnapshots .contains (entry .snapshotId ()) && requiredDataFiles .contains (entry .file ().path ()))
339+ .specsById (base .specsById ())
340+ .ignoreExisting ();
341+
342+ try (CloseableIterator <ManifestEntry <DataFile >> deletes = matchingDeletesGroup .entries ().iterator ()) {
343+ if (deletes .hasNext ()) {
344+ throw new ValidationException ("Cannot commit, missing data files: %s" ,
345+ Iterators .toString (Iterators .transform (deletes , entry -> entry .file ().path ().toString ())));
346+ }
347+
348+ } catch (IOException e ) {
349+ throw new UncheckedIOException ("Failed to validate required files exist" , e );
350+ }
351+ }
352+
353+ private Pair <List <ManifestFile >, Set <Long >> validationHistory (TableMetadata base , Long startingSnapshotId ,
354+ Set <String > matchingOperations ,
355+ ManifestContent content ) {
312356 List <ManifestFile > manifests = Lists .newArrayList ();
313357 Set <Long > newSnapshots = Sets .newHashSet ();
314358
@@ -322,31 +366,25 @@ protected void validateDataFilesExist(TableMetadata base, Long startingSnapshotI
322366
323367 if (matchingOperations .contains (currentSnapshot .operation ())) {
324368 newSnapshots .add (currentSnapshotId );
325- for (ManifestFile manifest : currentSnapshot .dataManifests ()) {
326- if (manifest .snapshotId () == (long ) currentSnapshotId ) {
327- manifests .add (manifest );
369+ if (content == ManifestContent .DATA ) {
370+ for (ManifestFile manifest : currentSnapshot .dataManifests ()) {
371+ if (manifest .snapshotId () == (long ) currentSnapshotId ) {
372+ manifests .add (manifest );
373+ }
374+ }
375+ } else {
376+ for (ManifestFile manifest : currentSnapshot .deleteManifests ()) {
377+ if (manifest .snapshotId () == (long ) currentSnapshotId ) {
378+ manifests .add (manifest );
379+ }
328380 }
329381 }
330382 }
331383
332384 currentSnapshotId = currentSnapshot .parentId ();
333385 }
334386
335- ManifestGroup matchingDeletesGroup = new ManifestGroup (ops .io (), manifests , ImmutableList .of ())
336- .filterManifestEntries (entry -> entry .status () != ManifestEntry .Status .ADDED &&
337- newSnapshots .contains (entry .snapshotId ()) && requiredDataFiles .contains (entry .file ().path ()))
338- .specsById (base .specsById ())
339- .ignoreExisting ();
340-
341- try (CloseableIterator <ManifestEntry <DataFile >> deletes = matchingDeletesGroup .entries ().iterator ()) {
342- if (deletes .hasNext ()) {
343- throw new ValidationException ("Cannot commit, missing data files: %s" ,
344- Iterators .toString (Iterators .transform (deletes , entry -> entry .file ().path ().toString ())));
345- }
346-
347- } catch (IOException e ) {
348- throw new UncheckedIOException ("Failed to validate required files exist" , e );
349- }
387+ return Pair .of (manifests , newSnapshots );
350388 }
351389
352390 @ Override
0 commit comments