diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 8de7507f5205..de95b569232e 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -674,7 +674,6 @@ The following table describes error codes you may encounter in the `multiStageQu | `InsertCannotAllocateSegment` | The controller task could not allocate a new segment ID due to conflict with existing segments or pending segments. Common reasons for such conflicts:

| `dataSource`

`interval`: The interval for the attempted new segment allocation. | | `InsertCannotBeEmpty` | An INSERT or REPLACE query did not generate any output rows in a situation where output rows are required for success. This can happen for INSERT or REPLACE queries with `PARTITIONED BY` set to something other than `ALL` or `ALL TIME`. | `dataSource` | | `InsertCannotOrderByDescending` | An INSERT query contained a `CLUSTERED BY` expression in descending order. Druid's segment generation code only supports ascending order. | `columnName` | -| `InsertCannotReplaceExistingSegment` | A REPLACE query cannot proceed because an existing segment partially overlaps those bounds, and the portion within the bounds is not fully overshadowed by query results.

There are two ways to address this without modifying your query:| `segmentId`: The existing segment
| `InsertLockPreempted` | An INSERT or REPLACE query was canceled by a higher-priority ingestion job, such as a real-time ingestion task. | | | `InsertTimeNull` | An INSERT or REPLACE query encountered a null timestamp in the `__time` field.

This can happen due to using an expression like `TIME_PARSE(timestamp) AS __time` with a timestamp that cannot be parsed. (TIME_PARSE returns null when it cannot parse a timestamp.) In this case, try parsing your timestamps using a different function or pattern.

If your timestamps may genuinely be null, consider using COALESCE to provide a default value. One option is CURRENT_TIMESTAMP, which represents the start time of the job. | | `InsertTimeOutOfBounds` | A REPLACE query generated a timestamp outside the bounds of the TIMESTAMP parameter for your OVERWRITE WHERE clause.

To avoid this error, verify that the you specified is valid. | `interval`: time chunk interval corresponding to the out-of-bounds timestamp | diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index e190edae9e67..180323caab02 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -24,7 +24,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.google.common.util.concurrent.FutureCallback; @@ -62,12 +61,11 @@ import org.apache.druid.indexing.common.TaskReport; import org.apache.druid.indexing.common.actions.LockListAction; import org.apache.druid.indexing.common.actions.MarkSegmentsAsUnusedAction; -import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.indexing.common.actions.SegmentAllocateAction; import org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction; import org.apache.druid.indexing.common.actions.TaskActionClient; +import org.apache.druid.indexing.common.task.batch.parallel.TombstoneHelper; import org.apache.druid.indexing.overlord.SegmentPublishResult; -import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; @@ -103,7 +101,6 @@ import org.apache.druid.msq.indexing.error.InsertCannotAllocateSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotBeEmptyFault; import org.apache.druid.msq.indexing.error.InsertCannotOrderByDescendingFault; -import org.apache.druid.msq.indexing.error.InsertCannotReplaceExistingSegmentFault; import org.apache.druid.msq.indexing.error.InsertLockPreemptedFault; import org.apache.druid.msq.indexing.error.InsertTimeOutOfBoundsFault; import org.apache.druid.msq.indexing.error.MSQErrorReport; @@ -1247,48 +1244,33 @@ private void postResultPartitionBoundariesForStage( /** * Publish the list of segments. Additionally, if {@link DataSourceMSQDestination#isReplaceTimeChunks()}, * also drop all other segments within the replacement intervals. - *

- * If any existing segments cannot be dropped because their intervals are not wholly contained within the - * replacement parameter, throws a {@link MSQException} with {@link InsertCannotReplaceExistingSegmentFault}. */ private void publishAllSegments(final Set segments) throws IOException { final DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); - final Set segmentsToDrop; + final Set segmentsWithTombstones = new HashSet<>(segments); if (destination.isReplaceTimeChunks()) { final List intervalsToDrop = findIntervalsToDrop(Preconditions.checkNotNull(segments, "segments")); - if (intervalsToDrop.isEmpty()) { - segmentsToDrop = null; - } else { - // Determine which segments to drop as part of the replace operation. This is safe because, in the case where we - // are doing a replace, the isReady method (which runs prior to the task starting) acquires an exclusive lock. - segmentsToDrop = - ImmutableSet.copyOf( - context.taskActionClient().submit( - new RetrieveUsedSegmentsAction( - task.getDataSource(), - null, - intervalsToDrop, - Segments.ONLY_VISIBLE - ) - ) - ); - - // Validate that there are no segments that partially overlap the intervals-to-drop. Otherwise, the replace - // may be incomplete. - for (final DataSegment segmentToDrop : segmentsToDrop) { - if (destination.getReplaceTimeChunks() - .stream() - .noneMatch(interval -> interval.contains(segmentToDrop.getInterval()))) { - throw new MSQException(new InsertCannotReplaceExistingSegmentFault(segmentToDrop.getId())); - } + if (!intervalsToDrop.isEmpty()) { + TombstoneHelper tombstoneHelper = new TombstoneHelper(context.taskActionClient()); + try { + Set tombstones = tombstoneHelper.computeTombstoneSegmentsForReplace( + intervalsToDrop, + destination.getReplaceTimeChunks(), + task.getDataSource(), + destination.getSegmentGranularity() + ); + segmentsWithTombstones.addAll(tombstones); + } + catch (IllegalStateException e) { + throw new MSQException(e, InsertLockPreemptedFault.instance()); } } - if (segments.isEmpty()) { + if (segmentsWithTombstones.isEmpty()) { // Nothing to publish, only drop. We already validated that the intervalsToDrop do not have any // partially-overlapping segments, so it's safe to drop them as intervals instead of as specific segments. for (final Interval interval : intervalsToDrop) { @@ -1298,7 +1280,7 @@ private void publishAllSegments(final Set segments) throws IOExcept } else { performSegmentPublish( context.taskActionClient(), - SegmentTransactionalInsertAction.overwriteAction(null, segmentsToDrop, segments) + SegmentTransactionalInsertAction.overwriteAction(null, null, segmentsWithTombstones) ); } } else if (!segments.isEmpty()) { @@ -2590,6 +2572,7 @@ static ClusterStatisticsMergeMode finalizeClusterStatisticsMergeMode( return mergeMode; } + /** * Interface used by {@link #contactWorkersForStage}. */ diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java index 73e87b5e6657..dccd42bad94f 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java @@ -45,7 +45,6 @@ import org.apache.druid.msq.indexing.error.InsertCannotAllocateSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotBeEmptyFault; import org.apache.druid.msq.indexing.error.InsertCannotOrderByDescendingFault; -import org.apache.druid.msq.indexing.error.InsertCannotReplaceExistingSegmentFault; import org.apache.druid.msq.indexing.error.InsertLockPreemptedFault; import org.apache.druid.msq.indexing.error.InsertTimeNullFault; import org.apache.druid.msq.indexing.error.InsertTimeOutOfBoundsFault; @@ -106,7 +105,6 @@ public class MSQIndexingModule implements DruidModule InsertCannotAllocateSegmentFault.class, InsertCannotBeEmptyFault.class, InsertCannotOrderByDescendingFault.class, - InsertCannotReplaceExistingSegmentFault.class, InsertLockPreemptedFault.class, InsertTimeNullFault.class, InsertTimeOutOfBoundsFault.class, diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertCannotReplaceExistingSegmentFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertCannotReplaceExistingSegmentFault.java deleted file mode 100644 index ed1d14fcc005..000000000000 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertCannotReplaceExistingSegmentFault.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.msq.indexing.error; - -import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.timeline.SegmentId; - -import java.util.Objects; - -public class InsertCannotReplaceExistingSegmentFault extends BaseMSQFault -{ - static final String CODE = "InsertCannotReplaceExistingSegment"; - - private final String segmentId; - - public InsertCannotReplaceExistingSegmentFault(@JsonProperty("segmentId") String segmentId) - { - super( - CODE, - "Cannot replace existing segment [%s] because it is not within the " - + "bounds specified by replaceExistingTimeChunks", - segmentId - ); - this.segmentId = segmentId; - } - - public InsertCannotReplaceExistingSegmentFault(final SegmentId segmentId) - { - this(segmentId.toString()); - } - - @JsonProperty - public String getSegmentId() - { - return segmentId; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - if (!super.equals(o)) { - return false; - } - InsertCannotReplaceExistingSegmentFault that = (InsertCannotReplaceExistingSegmentFault) o; - return Objects.equals(segmentId, that.segmentId); - } - - @Override - public int hashCode() - { - return Objects.hash(super.hashCode(), segmentId); - } -} diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQFaultsTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQFaultsTest.java index f7028f57aad2..4a9c34bc79aa 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQFaultsTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQFaultsTest.java @@ -21,14 +21,12 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.indexing.common.actions.SegmentAllocateAction; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.msq.indexing.error.InsertCannotAllocateSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotBeEmptyFault; import org.apache.druid.msq.indexing.error.InsertCannotOrderByDescendingFault; -import org.apache.druid.msq.indexing.error.InsertCannotReplaceExistingSegmentFault; import org.apache.druid.msq.indexing.error.InsertTimeNullFault; import org.apache.druid.msq.indexing.error.InsertTimeOutOfBoundsFault; import org.apache.druid.msq.indexing.error.TooManyClusteredByColumnsFault; @@ -40,7 +38,6 @@ import org.apache.druid.msq.test.MSQTestFileUtils; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.junit.Test; import org.mockito.Mockito; @@ -114,30 +111,6 @@ public void testInsertCannotOrderByDescendingFault() .verifyResults(); } - @Test - public void testInsertCannotReplaceExistingSegmentFault() - { - RowSignature rowSignature = RowSignature.builder() - .add("__time", ColumnType.LONG) - .add("dim1", ColumnType.STRING) - .add("cnt", ColumnType.LONG).build(); - - // Create a datasegment which lies partially outside the generated segment - DataSegment existingDataSegment = DataSegment.builder() - .interval(Intervals.of("2001-01-01T/2003-01-04T")) - .size(50) - .version("1").dataSource("foo1") - .build(); - Mockito.doReturn(ImmutableSet.of(existingDataSegment)).when(testTaskActionClient).submit(isA(RetrieveUsedSegmentsAction.class)); - - testIngestQuery().setSql( - "replace into foo1 overwrite where __time >= TIMESTAMP '2000-01-01 00:00:00' and __time < TIMESTAMP '2002-01-03 00:00:00' select __time, dim1 , count(*) as cnt from foo where dim1 is not null group by 1, 2 PARTITIONED by day clustered by dim1") - .setExpectedDataSource("foo1") - .setExpectedRowSignature(rowSignature) - .setExpectedMSQFault(new InsertCannotReplaceExistingSegmentFault(existingDataSegment.getId())) - .verifyResults(); - } - @Test public void testInsertTimeOutOfBoundsFault() { diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index 56ccb0560d0a..02c559ee257a 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -21,13 +21,17 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.msq.test.CounterSnapshotMatcher; import org.apache.druid.msq.test.MSQTestBase; import org.apache.druid.msq.test.MSQTestFileUtils; +import org.apache.druid.msq.test.MSQTestTaskActionClient; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.SqlPlanningException; +import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.DimensionRangeShardSpec; import org.hamcrest.CoreMatchers; @@ -35,6 +39,8 @@ import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; import javax.annotation.Nonnull; import java.io.File; @@ -499,6 +505,7 @@ public void testReplaceWhereClauseLargerThanData() .setExpectedRowSignature(rowSignature) .setQueryContext(context) .setExpectedDestinationIntervals(Collections.singletonList(Intervals.of("2000-01-01T/2002-01-01T"))) + .setExpectedTombstoneIntervals(ImmutableSet.of(Intervals.of("2001-01-01T/2001-02-01T"))) .setExpectedSegment(ImmutableSet.of(SegmentId.of( "foo", Intervals.of("2000-01-01T/P1M"), @@ -511,12 +518,6 @@ public void testReplaceWhereClauseLargerThanData() new Object[]{946771200000L, 2.0f} ) ) - .setExpectedSegment(ImmutableSet.of(SegmentId.of( - "foo", - Intervals.of("2000-01-01T/P1M"), - "test", - 0 - ))) .setExpectedCountersForStageWorkerChannel( CounterSnapshotMatcher .with().totalFiles(1), @@ -625,6 +626,7 @@ public void testReplaceTimeChunksLargerThanData() .setExpectedRowSignature(rowSignature) .setQueryContext(context) .setExpectedDestinationIntervals(Collections.singletonList(Intervals.of("2000-01-01T/2002-01-01T"))) + .setExpectedTombstoneIntervals(ImmutableSet.of(Intervals.of("2001-01-01T/2001-02-01T"))) .setExpectedSegment(ImmutableSet.of(SegmentId.of( "foo", Intervals.of("2000-01-01T/P1M"), @@ -694,6 +696,67 @@ public void testReplaceSegmentsInsertIntoNewTable() .verifyResults(); } + @Test + public void testReplaceTombstonesOverPartiallyOverlappingSegments() + { + RowSignature rowSignature = RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("dim1", ColumnType.STRING) + .add("cnt", ColumnType.LONG).build(); + + // Create a datasegment which lies partially outside the generated segment + DataSegment existingDataSegment = DataSegment.builder() + .interval(Intervals.of("2001-01-01T/2003-01-04T")) + .size(50) + .version(MSQTestTaskActionClient.VERSION) + .dataSource("foo1") + .build(); + + Mockito.doReturn(ImmutableSet.of(existingDataSegment)).when(testTaskActionClient).submit(ArgumentMatchers.isA(RetrieveUsedSegmentsAction.class)); + + List expectedResults; + if (NullHandling.sqlCompatible()) { + expectedResults = ImmutableList.of( + new Object[]{946684800000L, "", 1L}, + new Object[]{946771200000L, "10.1", 1L}, + new Object[]{946857600000L, "2", 1L}, + new Object[]{978307200000L, "1", 1L}, + new Object[]{978393600000L, "def", 1L}, + new Object[]{978480000000L, "abc", 1L} + ); + } else { + expectedResults = ImmutableList.of( + new Object[]{946771200000L, "10.1", 1L}, + new Object[]{946857600000L, "2", 1L}, + new Object[]{978307200000L, "1", 1L}, + new Object[]{978393600000L, "def", 1L}, + new Object[]{978480000000L, "abc", 1L} + ); + } + + testIngestQuery().setSql( + "REPLACE INTO foo1 " + + "OVERWRITE WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' and __time < TIMESTAMP '2002-01-01 00:00:00'" + + "SELECT __time, dim1 , count(*) as cnt " + + "FROM foo " + + "WHERE dim1 IS NOT NULL " + + "GROUP BY 1, 2 " + + "PARTITIONED by TIME_FLOOR(__time, 'P3M') " + + "CLUSTERED by dim1") + .setExpectedDataSource("foo1") + .setExpectedRowSignature(rowSignature) + .setExpectedShardSpec(DimensionRangeShardSpec.class) + .setExpectedTombstoneIntervals( + ImmutableSet.of( + Intervals.of("2001-04-01/P3M"), + Intervals.of("2001-07-01/P3M"), + Intervals.of("2001-10-01/P3M") + ) + ) + .setExpectedResultRows(expectedResults) + .verifyResults(); + } + @Nonnull private Set expectedFooSegments() { diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java index cffbd5e1c313..7a0ee66ea6cc 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java @@ -26,7 +26,6 @@ import org.apache.druid.msq.guice.MSQIndexingModule; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.timeline.SegmentId; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -57,9 +56,6 @@ public void testFaultSerde() throws IOException assertFaultSerde(new InsertCannotAllocateSegmentFault("the datasource", Intervals.ETERNITY)); assertFaultSerde(new InsertCannotBeEmptyFault("the datasource")); assertFaultSerde(new InsertCannotOrderByDescendingFault("the column")); - assertFaultSerde( - new InsertCannotReplaceExistingSegmentFault(SegmentId.of("the datasource", Intervals.ETERNITY, "v1", 1)) - ); assertFaultSerde(InsertLockPreemptedFault.INSTANCE); assertFaultSerde(InsertTimeNullFault.INSTANCE); assertFaultSerde(new InsertTimeOutOfBoundsFault(Intervals.ETERNITY)); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index 5ff019e67f31..8842e16958f3 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -28,6 +28,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; import com.google.inject.Binder; import com.google.inject.Injector; import com.google.inject.Key; @@ -162,6 +163,7 @@ import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.ShardSpec; +import org.apache.druid.timeline.partition.TombstoneShardSpec; import org.easymock.EasyMock; import org.hamcrest.Matcher; import org.hamcrest.MatcherAssert; @@ -184,6 +186,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -799,6 +802,7 @@ public abstract class MSQTester> protected MSQSpec expectedMSQSpec = null; protected MSQTuningConfig expectedTuningConfig = null; protected Set expectedSegments = null; + protected Set expectedTombstoneIntervals = null; protected List expectedResultRows = null; protected Matcher expectedValidationErrorMatcher = null; protected Matcher expectedExecutionErrorMatcher = null; @@ -835,6 +839,13 @@ public Builder setExpectedSegment(Set expectedSegments) return asBuilder(); } + public Builder setExpectedTombstoneIntervals(Set tombstoneIntervals) + { + Preconditions.checkArgument(!tombstoneIntervals.isEmpty(), "Segments cannot be empty"); + this.expectedTombstoneIntervals = tombstoneIntervals; + return asBuilder(); + } + public Builder setExpectedResultRows(List expectedResultRows) { Preconditions.checkArgument(expectedResultRows.size() > 0, "Results rows cannot be empty"); @@ -1143,6 +1154,41 @@ public void verifyResults() Assert.assertTrue(segmentIdVsOutputRowsMap.get(diskSegment).contains(Arrays.asList(row))); } } + + // Assert on the tombstone intervals + // Tombstone segments are only published, but since they donot have any data, they are not pushed by the + // SegmentGeneratorFrameProcessorFactory. We can get the tombstone segment ids published by taking a set + // difference of all the segments published with the segments that are created by the SegmentGeneratorFrameProcessorFactory + if (!testTaskActionClient.getPublishedSegments().isEmpty()) { + Set publishedSegmentIds = testTaskActionClient.getPublishedSegments() + .stream() + .map(DataSegment::getId) + .collect(Collectors.toSet()); + Set nonEmptySegmentIds = segmentIdVsOutputRowsMap.keySet(); + Set tombstoneSegmentIds = Sets.difference(publishedSegmentIds, nonEmptySegmentIds); + + // Generate the expected tombstone segment ids + Map tombstoneLoadSpec = new HashMap<>(); + tombstoneLoadSpec.put("type", DataSegment.TOMBSTONE_LOADSPEC_TYPE); + tombstoneLoadSpec.put("path", null); // tombstones do not have any backing file + Set expectedTombstoneSegmentIds = new HashSet<>(); + if (expectedTombstoneIntervals != null) { + expectedTombstoneSegmentIds.addAll( + expectedTombstoneIntervals.stream() + .map(interval -> DataSegment.builder() + .dataSource(expectedDataSource) + .interval(interval) + .version(MSQTestTaskActionClient.VERSION) + .shardSpec(new TombstoneShardSpec()) + .loadSpec(tombstoneLoadSpec) + .size(1) + .build()) + .map(DataSegment::getId) + .collect(Collectors.toSet()) + ); + } + Assert.assertEquals(expectedTombstoneSegmentIds, tombstoneSegmentIds); + } // assert results assertResultsEquals(sql, expectedResultRows, transformedOutputRows); } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestTaskActionClient.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestTaskActionClient.java index 2e6ee4a9bc6f..897e57c93bc8 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestTaskActionClient.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestTaskActionClient.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.TimeChunkLock; @@ -40,18 +41,29 @@ import org.apache.druid.timeline.SegmentId; import org.joda.time.Interval; +import java.util.HashSet; +import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; public class MSQTestTaskActionClient implements TaskActionClient { - private static final String VERSION = "test"; + public static final String VERSION = "test"; private final ObjectMapper mapper; private final ConcurrentHashMap segmentIdPartitionIdMap = new ConcurrentHashMap<>(); + private final Map> usedIntervals = ImmutableMap.of( + "foo", ImmutableList.of(Intervals.of("2001-01-01/2001-01-04"), Intervals.of("2000-01-01/2000-01-04")), + "foo2", ImmutableList.of(Intervals.of("2000-01-01/P1D")) + ); + private final Set publishedSegments = new HashSet<>(); - public MSQTestTaskActionClient(ObjectMapper mapper) + public MSQTestTaskActionClient( + ObjectMapper mapper + ) { this.mapper = mapper; } @@ -94,13 +106,32 @@ public RetType submit(TaskAction taskAction) 0 )); } else if (taskAction instanceof RetrieveUsedSegmentsAction) { - return (RetType) ImmutableSet.of(); + String dataSource = ((RetrieveUsedSegmentsAction) taskAction).getDataSource(); + if (!usedIntervals.containsKey(dataSource)) { + return (RetType) ImmutableSet.of(); + } else { + return (RetType) usedIntervals.get(dataSource) + .stream() + .map(interval -> DataSegment.builder() + .dataSource(dataSource) + .interval(interval) + .version(VERSION) + .size(1) + .build() + ).collect(Collectors.toSet()); + } } else if (taskAction instanceof SegmentTransactionalInsertAction) { // Always OK. final Set segments = ((SegmentTransactionalInsertAction) taskAction).getSegments(); + publishedSegments.addAll(segments); return (RetType) SegmentPublishResult.ok(segments); } else { return null; } } + + public Set getPublishedSegments() + { + return publishedSegments; + } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 288107d0b944..d4b04b824724 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -952,13 +952,12 @@ private TaskStatus generateAndPublishSegments( Set tombStones = Collections.emptySet(); if (getIngestionMode() == IngestionMode.REPLACE) { // check whether to generate tombstones... - TombstoneHelper tombstoneHelper = new TombstoneHelper( + TombstoneHelper tombstoneHelper = new TombstoneHelper(toolbox.getTaskActionClient()); + + List tombstoneIntervals = tombstoneHelper.computeTombstoneIntervals( pushed.getSegments(), - ingestionSchema.getDataSchema(), - toolbox.getTaskActionClient() + ingestionSchema.getDataSchema() ); - - List tombstoneIntervals = tombstoneHelper.computeTombstoneIntervals(); // now find the versions for the tombstone intervals Map tombstonesAndVersions = new HashMap<>(); for (Interval interval : tombstoneIntervals) { @@ -970,7 +969,7 @@ private TaskStatus generateAndPublishSegments( tombstonesAndVersions.put(interval, segmentIdWithShardSpec); } - tombStones = tombstoneHelper.computeTombstones(tombstonesAndVersions); + tombStones = tombstoneHelper.computeTombstones(ingestionSchema.getDataSchema(), tombstonesAndVersions); log.debugSegments(tombStones, "To publish tombstones"); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index ae2de54c3f19..85b3442b3151 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -1128,12 +1128,8 @@ private void publishSegments( Set tombStones = Collections.emptySet(); if (getIngestionMode() == IngestionMode.REPLACE) { - TombstoneHelper tombstoneHelper = new TombstoneHelper( - newSegments, - ingestionSchema.getDataSchema(), - toolbox.getTaskActionClient() - ); - List tombstoneIntervals = tombstoneHelper.computeTombstoneIntervals(); + TombstoneHelper tombstoneHelper = new TombstoneHelper(toolbox.getTaskActionClient()); + List tombstoneIntervals = tombstoneHelper.computeTombstoneIntervals(newSegments, ingestionSchema.getDataSchema()); if (!tombstoneIntervals.isEmpty()) { Map tombstonesAnShards = new HashMap<>(); @@ -1146,7 +1142,7 @@ private void publishSegments( tombstonesAnShards.put(interval, segmentIdWithShardSpec); } - tombStones = tombstoneHelper.computeTombstones(tombstonesAnShards); + tombStones = tombstoneHelper.computeTombstones(ingestionSchema.getDataSchema(), tombstonesAnShards); // add tombstones newSegments.addAll(tombStones); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/TombstoneHelper.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/TombstoneHelper.java index ee9ae03311c0..6ad3b22accbb 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/TombstoneHelper.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/TombstoneHelper.java @@ -20,15 +20,23 @@ package org.apache.druid.indexing.common.task.batch.parallel; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; +import org.apache.druid.indexing.common.TaskLock; +import org.apache.druid.indexing.common.actions.LockListAction; import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.indexing.common.actions.TaskActionClient; import org.apache.druid.indexing.overlord.Segments; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.JodaUtils; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.java.util.common.granularity.IntervalsByGranularity; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.GranularitySpec; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.ShardSpec; +import org.apache.druid.timeline.partition.TombstoneShardSpec; import org.joda.time.Interval; import java.io.IOException; @@ -43,28 +51,15 @@ public class TombstoneHelper { - private final DataSchema dataSchema; private final TaskActionClient taskActionClient; - private final Collection pushedSegments; - public TombstoneHelper( - Collection pushedSegments, - DataSchema dataSchema, - TaskActionClient taskActionClient - ) + public TombstoneHelper(TaskActionClient taskActionClient) { - Preconditions.checkNotNull(pushedSegments, "pushedSegments"); - Preconditions.checkNotNull(dataSchema, "dataSchema"); - Preconditions.checkNotNull(taskActionClient, "taskActionClient"); - - this.dataSchema = dataSchema; - this.taskActionClient = taskActionClient; - this.pushedSegments = pushedSegments; + this.taskActionClient = Preconditions.checkNotNull(taskActionClient, "taskActionClient"); } - - private List getCondensedPushedSegmentsIntervals() + private List getCondensedPushedSegmentsIntervals(Collection pushedSegments) { List pushedSegmentsIntervals = new ArrayList<>(); for (DataSegment pushedSegment : pushedSegments) { @@ -73,7 +68,10 @@ private List getCondensedPushedSegmentsIntervals() return JodaUtils.condenseIntervals(pushedSegmentsIntervals); } - public Set computeTombstones(Map tombstoneIntervalsAndVersions) + public Set computeTombstones( + DataSchema dataSchema, + Map tombstoneIntervalsAndVersions + ) { Set retVal = new HashSet<>(); String dataSource = dataSchema.getDataSource(); @@ -91,12 +89,16 @@ public Set computeTombstones(Map return retVal; } - public List computeTombstoneIntervals() throws IOException + public List computeTombstoneIntervals(Collection pushedSegments, DataSchema dataSchema) + throws IOException { List retVal = new ArrayList<>(); GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); - List pushedSegmentsIntervals = getCondensedPushedSegmentsIntervals(); - List intervalsForUsedSegments = getCondensedUsedIntervals(); + List pushedSegmentsIntervals = getCondensedPushedSegmentsIntervals(pushedSegments); + List intervalsForUsedSegments = getCondensedUsedIntervals( + dataSchema.getGranularitySpec().inputIntervals(), + dataSchema.getDataSource() + ); for (Interval timeChunkInterval : granularitySpec.sortedBucketIntervals()) { // is it an empty time chunk? boolean isEmpty = true; @@ -122,10 +124,113 @@ public List computeTombstoneIntervals() throws IOException return retVal; } - private DataSegment createTombstoneForTimeChunkInterval(String dataSource, String version, ShardSpec shardSpec, Interval timeChunkInterval) + public Set computeTombstoneSegmentsForReplace( + List intervalsToDrop, + List intervalsToReplace, + String dataSource, + Granularity replaceGranularity + ) throws IOException + { + Set tombstoneIntervals = computeTombstoneIntervalsForReplace( + intervalsToDrop, + intervalsToReplace, + dataSource, + replaceGranularity + ); + + final List locks = taskActionClient.submit(new LockListAction()); + + Set tombstones = new HashSet<>(); + for (Interval tombstoneInterval : tombstoneIntervals) { + String version = null; + for (final TaskLock lock : locks) { + if (lock.getInterval().contains(tombstoneInterval)) { + version = lock.getVersion(); + } + } + + if (version == null) { + // Unable to fetch the version number of the segment + throw new ISE("Unable to fetch the version of the segments in use. The lock for the task might " + + "have been revoked"); + } + + DataSegment tombstone = createTombstoneForTimeChunkInterval( + dataSource, + version, + new TombstoneShardSpec(), + tombstoneInterval + ); + tombstones.add(tombstone); + } + return tombstones; + } + + /** + * @param intervalsToDrop Empty intervals in the query that need to be dropped. They should be aligned with the + * replaceGranularity + * @param intervalsToReplace Intervals in the query which are eligible for replacement with new data. + * They should be aligned with the replaceGranularity + * @param dataSource Datasource on which the replace is to be performed + * @param replaceGranularity Granularity of the replace query + * @return Intervals computed for the tombstones + * @throws IOException + */ + public Set computeTombstoneIntervalsForReplace( + List intervalsToDrop, + List intervalsToReplace, + String dataSource, + Granularity replaceGranularity + ) throws IOException { + Set retVal = new HashSet<>(); + List usedIntervals = getCondensedUsedIntervals(intervalsToReplace, dataSource); + + for (Interval intervalToDrop : intervalsToDrop) { + for (Interval usedInterval : usedIntervals) { + + // Overlap will always be finite (not starting from -Inf or ending at +Inf) and lesser than or + // equal to the size of the usedInterval + Interval overlap = intervalToDrop.overlap(usedInterval); + + // No overlap of the dropped segment with the used interval due to which we donot need to generate any tombstone + if (overlap == null) { + continue; + } + + // Overlap might not be aligned with the granularity if the used interval is not aligned with the granularity + // However when fetching from the iterator, the first interval is found using the bucketStart, which + // ensures that the interval is "rounded down" to the first timestamp that aligns with the granularity + // Also, the interval would always be contained inside the "intervalToDrop" because the original REPLACE + // is aligned by the granularity, and by extension all the elements inside the intervals to drop would + // also be aligned by the same granularity (since intervalsToDrop = replaceIntervals - publishIntervals, and + // the right-hand side is always aligned) + // + // For example, if the replace granularity is DAY, intervalsToReplace are 20/02/2023 - 24/02/2023 (always + // aligned with the replaceGranularity), intervalsToDrop are 22/02/2023 - 24/02/2023 (they must also be aligned with the replaceGranularity) + // If the relevant usedIntervals for the datasource are from 22/02/2023 01:00:00 - 23/02/2023 02:00:00, then + // the overlap would be 22/02/2023 01:00:00 - 23/02/2023 02:00:00. When iterating over the overlap we will get + // the intervals from 22/02/2023 - 23/02/2023, and 23/02/2023 - 24/02/2023 + IntervalsByGranularity intervalsToDropByGranularity = new IntervalsByGranularity( + ImmutableList.of(overlap), + replaceGranularity + ); + // Helps in deduplication if required. Since all the intervals are uniformly granular, there should be no + // no overlap post deduplication + retVal.addAll(Sets.newHashSet(intervalsToDropByGranularity.granularityIntervalsIterator())); + } + } + return retVal; + } + public DataSegment createTombstoneForTimeChunkInterval( + String dataSource, + String version, + ShardSpec shardSpec, + Interval timeChunkInterval + ) + { // and the loadSpec is different too: Map tombstoneLoadSpec = new HashMap<>(); // since loadspec comes from prototype it is guaranteed to be non-null @@ -147,22 +252,28 @@ private DataSegment createTombstoneForTimeChunkInterval(String dataSource, Strin } - /** * Helper method to prune required tombstones. Only tombstones that cover used intervals will be created * since those that not cover used intervals will be redundant. + * + * @param inputIntervals Intervals corresponding to the task + * @param dataSource Datasource corresponding to the task * @return Intervals corresponding to used segments that overlap with any of the spec's input intervals * @throws IOException If used segments cannot be retrieved */ - public List getCondensedUsedIntervals() throws IOException + private List getCondensedUsedIntervals( + List inputIntervals, + String dataSource + ) throws IOException { List retVal = new ArrayList<>(); - List condensedInputIntervals = JodaUtils.condenseIntervals(dataSchema.getGranularitySpec().inputIntervals()); + List condensedInputIntervals = JodaUtils.condenseIntervals(inputIntervals); if (!condensedInputIntervals.isEmpty()) { Collection usedSegmentsInInputInterval = taskActionClient.submit(new RetrieveUsedSegmentsAction( - dataSchema.getDataSource(), null, + dataSource, + null, condensedInputIntervals, Segments.ONLY_VISIBLE )); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/TombstoneHelperTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/TombstoneHelperTest.java index bab7e5749fb0..3450bf270337 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/TombstoneHelperTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/TombstoneHelperTest.java @@ -19,10 +19,16 @@ package org.apache.druid.indexing.common.task.batch.parallel; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.apache.druid.indexing.common.actions.LockListAction; +import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.indexing.common.actions.TaskAction; import org.apache.druid.indexing.common.actions.TaskActionClient; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.GranularitySpec; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; @@ -34,11 +40,13 @@ import org.junit.Test; import org.mockito.Mockito; +import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import static org.mockito.ArgumentMatchers.any; @@ -62,16 +70,12 @@ public void noTombstonesWhenNoDataInInputIntervalAndNoExistingSegments() throws // Assume no used segments : Mockito.when(taskActionClient.submit(any(TaskAction.class))).thenReturn(Collections.emptyList()); - TombstoneHelper tombstoneHelper = new TombstoneHelper( - pushedSegments, - dataSchema, - taskActionClient - ); - List tombstoneIntervals = tombstoneHelper.computeTombstoneIntervals(); + TombstoneHelper tombstoneHelper = new TombstoneHelper(taskActionClient); + List tombstoneIntervals = tombstoneHelper.computeTombstoneIntervals(pushedSegments, dataSchema); Assert.assertTrue(tombstoneIntervals.isEmpty()); Map intervalToLockVersion = Collections.emptyMap(); - Set tombstones = tombstoneHelper.computeTombstones(intervalToLockVersion); + Set tombstones = tombstoneHelper.computeTombstones(dataSchema, intervalToLockVersion); Assert.assertEquals(0, tombstones.size()); @@ -100,13 +104,9 @@ public void tombstonesCreatedWhenNoDataInInputIntervalAndExistingSegments() thro Assert.assertFalse(existingUsedSegment.isTombstone()); Mockito.when(taskActionClient.submit(any(TaskAction.class))) .thenReturn(Collections.singletonList(existingUsedSegment)); - TombstoneHelper tombstoneHelper = new TombstoneHelper( - pushedSegments, - dataSchema, - taskActionClient - ); + TombstoneHelper tombstoneHelper = new TombstoneHelper(taskActionClient); - List tombstoneIntervals = tombstoneHelper.computeTombstoneIntervals(); + List tombstoneIntervals = tombstoneHelper.computeTombstoneIntervals(pushedSegments, dataSchema); Assert.assertEquals(3, tombstoneIntervals.size()); Map intervalToVersion = new HashMap<>(); for (Interval ti : tombstoneIntervals) { @@ -115,9 +115,172 @@ public void tombstonesCreatedWhenNoDataInInputIntervalAndExistingSegments() thro new SegmentIdWithShardSpec("test", ti, "newVersion", new TombstoneShardSpec()) ); } - Set tombstones = tombstoneHelper.computeTombstones(intervalToVersion); + Set tombstones = tombstoneHelper.computeTombstones(dataSchema, intervalToVersion); Assert.assertEquals(3, tombstones.size()); tombstones.forEach(ts -> Assert.assertTrue(ts.isTombstone())); } + @Test + public void tombstoneIntervalsCreatedForReplaceWhenReplaceIsContainedInUsedIntervals() throws Exception + { + Interval usedInterval = Intervals.of("2020-02-01/2020-04-01"); + Interval replaceInterval = Intervals.of("2020-03-01/2020-03-31"); + Interval intervalToDrop = Intervals.of("2020-03-05/2020-03-07"); + Granularity replaceGranularity = Granularities.DAY; + + DataSegment existingUsedSegment = + DataSegment.builder() + .dataSource("test") + .interval(usedInterval) + .version("oldVersion") + .size(100) + .build(); + Assert.assertFalse(existingUsedSegment.isTombstone()); + Mockito.when(taskActionClient.submit(any(TaskAction.class))) + .thenReturn(Collections.singletonList(existingUsedSegment)); + TombstoneHelper tombstoneHelper = new TombstoneHelper(taskActionClient); + + Set tombstoneIntervals = tombstoneHelper.computeTombstoneIntervalsForReplace( + ImmutableList.of(intervalToDrop), + ImmutableList.of(replaceInterval), + "test", + replaceGranularity + ); + Assert.assertEquals( + ImmutableSet.of(Intervals.of("2020-03-05/2020-03-06"), Intervals.of("2020-03-06/2020-03-07")), + tombstoneIntervals + ); + } + + @Test + public void tombstoneIntervalsCreatedForReplaceWhenThereIsAGapInUsedIntervals() throws Exception + { + List usedIntervals = ImmutableList.of( + Intervals.of("2020-02-01/2020-04-01"), + Intervals.of("2020-07-01/2020-11-01") + ); + Interval replaceInterval = Intervals.of("2020-01-01/2020-12-01"); + Interval intervalToDrop = Intervals.of("2020-03-01/2020-09-01"); + Granularity replaceGranularity = Granularities.MONTH; + + List existingUsedSegments = usedIntervals.stream().map( + usedInterval -> DataSegment.builder() + .dataSource("test") + .interval(usedInterval) + .version("oldVersion") + .size(100) + .build() + ).collect(Collectors.toList()); + Mockito.when(taskActionClient.submit(any(TaskAction.class))).thenReturn(existingUsedSegments); + TombstoneHelper tombstoneHelper = new TombstoneHelper(taskActionClient); + + Set tombstoneIntervals = tombstoneHelper.computeTombstoneIntervalsForReplace( + ImmutableList.of(intervalToDrop), + ImmutableList.of(replaceInterval), + "test", + replaceGranularity + ); + Assert.assertEquals( + ImmutableSet.of( + Intervals.of("2020-03-01/2020-04-01"), + Intervals.of("2020-07-01/2020-08-01"), + Intervals.of("2020-08-01/2020-09-01") + ), + tombstoneIntervals + ); + } + + @Test + public void tombstoneIntervalsCreatedForReplaceWhenUsedIntervalsDonotAlign() throws Exception + { + Interval usedInterval = Intervals.of("2020-02-01T12:12:12.121/2020-04-01T00:00:00.000"); + Interval replaceInterval = Intervals.of("2020-01-30/2020-03-31"); + Interval intervalToDrop = Intervals.of("2020-01-30/2020-02-02"); + Granularity replaceGranularity = Granularities.DAY; + + DataSegment existingUsedSegment = + DataSegment.builder() + .dataSource("test") + .interval(usedInterval) + .version("oldVersion") + .size(100) + .build(); + Assert.assertFalse(existingUsedSegment.isTombstone()); + Mockito.when(taskActionClient.submit(any(TaskAction.class))) + .thenReturn(Collections.singletonList(existingUsedSegment)); + TombstoneHelper tombstoneHelper = new TombstoneHelper(taskActionClient); + + Set tombstoneIntervals = tombstoneHelper.computeTombstoneIntervalsForReplace( + ImmutableList.of(intervalToDrop), + ImmutableList.of(replaceInterval), + "test", + replaceGranularity + ); + Assert.assertEquals(ImmutableSet.of(Intervals.of("2020-02-01/2020-02-02")), tombstoneIntervals); + } + + @Test + public void tombstoneIntervalsCreatedForReplaceWhenUsedIntervalsAreCompletelyDisjoint() throws Exception + { + Interval usedInterval = Intervals.of("2020-02-01T12:12:12.121/2020-04-01T00:00:00.000"); + Interval replaceInterval = Intervals.of("2023-01-30/2023-03-31"); + Interval intervalToDrop = Intervals.of("2023-01-30/2023-03-31"); + Granularity replaceGranularity = Granularities.DAY; + + DataSegment existingUsedSegment = + DataSegment.builder() + .dataSource("test") + .interval(usedInterval) + .version("oldVersion") + .size(100) + .build(); + Assert.assertFalse(existingUsedSegment.isTombstone()); + Mockito.when(taskActionClient.submit(any(TaskAction.class))) + .thenReturn(Collections.singletonList(existingUsedSegment)); + TombstoneHelper tombstoneHelper = new TombstoneHelper(taskActionClient); + + Set tombstoneIntervals = tombstoneHelper.computeTombstoneIntervalsForReplace( + ImmutableList.of(intervalToDrop), + ImmutableList.of(replaceInterval), + "test", + replaceGranularity + ); + Assert.assertEquals(ImmutableSet.of(), tombstoneIntervals); + } + + @Test + public void testTombstoneSegmentsForReplaceWhenLockRevoked() throws IOException + { + Interval usedInterval = Intervals.of("2020-02-01/2020-04-01"); + Interval replaceInterval = Intervals.of("2020-03-01/2020-03-31"); + Interval intervalToDrop = Intervals.of("2020-03-05/2020-03-07"); + Granularity replaceGranularity = Granularities.DAY; + + DataSegment existingUsedSegment = + DataSegment.builder() + .dataSource("test") + .interval(usedInterval) + .version("oldVersion") + .size(100) + .build(); + Assert.assertFalse(existingUsedSegment.isTombstone()); + Mockito.when(taskActionClient.submit(any(RetrieveUsedSegmentsAction.class))) + .thenReturn(Collections.singletonList(existingUsedSegment)); + Mockito.when(taskActionClient.submit(any(LockListAction.class))) + .thenReturn(ImmutableList.of()); + + TombstoneHelper tombstoneHelper = new TombstoneHelper(taskActionClient); + + Assert.assertThrows( + ISE.class, + () -> { + tombstoneHelper.computeTombstoneSegmentsForReplace( + ImmutableList.of(intervalToDrop), + ImmutableList.of(replaceInterval), + "test", + replaceGranularity + ); + } + ); + } }