Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix SLM check for restore in progress #50868

Merged
merged 2 commits into from
Jan 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ public static boolean okayToDeleteSnapshots(ClusterState state) {

// Cannot delete during a restore
final RestoreInProgress restoreInProgress = state.custom(RestoreInProgress.TYPE);
if (restoreInProgress != null) {
if (restoreInProgress != null && restoreInProgress.isEmpty() == false) {
return false;
}

Expand Down Expand Up @@ -480,6 +480,7 @@ public void onNewClusterState(ClusterState state) {
logger.debug("retrying SLM snapshot retention deletion after snapshot operation has completed");
reRun.accept(state);
} else {
logger.trace("received new cluster state but a snapshot operation is still running");
observer.waitForNextChange(this);
}
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotAction;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequest;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
Expand All @@ -22,6 +25,7 @@
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.RepositoriesService;
import org.elasticsearch.repositories.RepositoryException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.snapshots.ConcurrentSnapshotExecutionException;
import org.elasticsearch.snapshots.SnapshotInfo;
import org.elasticsearch.snapshots.SnapshotMissingException;
Expand All @@ -30,6 +34,7 @@
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
import org.elasticsearch.xpack.core.ilm.LifecycleSettings;
import org.elasticsearch.xpack.core.slm.SnapshotInvocationRecord;
import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicy;
import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicyItem;
import org.elasticsearch.xpack.core.slm.SnapshotRetentionConfiguration;
Expand Down Expand Up @@ -385,6 +390,74 @@ private void testUnsuccessfulSnapshotRetention(boolean partialSuccess) throws Ex
}
}

public void testSLMRetentionAfterRestore() throws Exception {
final String indexName = "test";
final String policyName = "test-policy";
int docCount = 20;
for (int i = 0; i < docCount; i++) {
index(indexName, i + "", Collections.singletonMap("foo", "bar"));
}

// Create a snapshot repo
initializeRepo(REPO);

logger.info("--> creating policy {}", policyName);
createSnapshotPolicy(policyName, "snap", NEVER_EXECUTE_CRON_SCHEDULE, REPO, indexName, true, false,
new SnapshotRetentionConfiguration(TimeValue.ZERO, null, null));

logger.info("--> executing snapshot lifecycle");
final String snapshotName = executePolicy(policyName);

// Check that the executed snapshot shows up in the SLM output
assertBusy(() -> {
GetSnapshotLifecycleAction.Response getResp =
client().execute(GetSnapshotLifecycleAction.INSTANCE, new GetSnapshotLifecycleAction.Request(policyName)).get();
logger.info("--> checking for in progress snapshot...");

assertThat(getResp.getPolicies().size(), greaterThan(0));
SnapshotLifecyclePolicyItem item = getResp.getPolicies().get(0);
SnapshotInvocationRecord lastSuccess = item.getLastSuccess();
assertNotNull(lastSuccess);
assertThat(lastSuccess.getSnapshotName(), equalTo(snapshotName));
});

logger.info("--> restoring index");
RestoreSnapshotRequest restoreReq = new RestoreSnapshotRequest(REPO, snapshotName);
restoreReq.indices(indexName);
restoreReq.renamePattern("(.+)");
restoreReq.renameReplacement("restored_$1");
restoreReq.waitForCompletion(true);
RestoreSnapshotResponse resp = client().execute(RestoreSnapshotAction.INSTANCE, restoreReq).get();
assertThat(resp.status(), equalTo(RestStatus.OK));

logger.info("--> executing SLM retention");
assertAcked(client().execute(ExecuteSnapshotRetentionAction.INSTANCE, new ExecuteSnapshotRetentionAction.Request()).get());
logger.info("--> waiting for {} snapshot to be deleted", snapshotName);
assertBusy(() -> {
try {
try {
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
.prepareGetSnapshots(REPO).setSnapshots(snapshotName).get();
assertThat(snapshotsStatusResponse.getSnapshots(REPO), empty());
} catch (SnapshotMissingException e) {
// This is what we want to happen
}
logger.info("--> snapshot [{}] has been deleted", snapshotName);
} catch (RepositoryException re) {
// Concurrent status calls and write operations may lead to failures in determining the current repository generation
// TODO: Remove this hack once tracking the current repository generation has been made consistent
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need to block this PR on it, but we should check if this hack is still necessary.

throw new AssertionError(re);
}
});

// Cancel/delete the snapshot
try {
client().admin().cluster().prepareDeleteSnapshot(REPO, snapshotName).get();
} catch (SnapshotMissingException e) {
// ignore
}
}

private SnapshotsStatusResponse getSnapshotStatus(String snapshotName) {
try {
return client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(snapshotName).get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.not;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class SnapshotRetentionTaskTests extends ESTestCase {

Expand Down Expand Up @@ -363,6 +364,14 @@ public void testOkToDeleteSnapshots() {
.build();

assertThat(SnapshotRetentionTask.okayToDeleteSnapshots(state), equalTo(false));

restoreInProgress = mock(RestoreInProgress.class);
when(restoreInProgress.isEmpty()).thenReturn(true);
state = ClusterState.builder(new ClusterName("cluster"))
.putCustom(RestoreInProgress.TYPE, restoreInProgress)
.build();

assertThat(SnapshotRetentionTask.okayToDeleteSnapshots(state), equalTo(true));
}

public void testSkipWhileStopping() throws Exception {
Expand Down Expand Up @@ -420,10 +429,10 @@ private void doTestRunManuallyDuringMode(OperationMode mode) throws Exception {
final String repoId = "repo";
SnapshotLifecyclePolicy policy = new SnapshotLifecyclePolicy(policyId, "snap", "1 * * * * ?",
repoId, null, new SnapshotRetentionConfiguration(TimeValue.timeValueDays(30), null, null));

ClusterState state = createState(mode, policy);
ClusterServiceUtils.setState(clusterService, state);

AtomicBoolean retentionWasRun = new AtomicBoolean(false);
MockSnapshotRetentionTask task = new MockSnapshotRetentionTask(noOpClient, clusterService,
new SnapshotLifecycleTaskTests.VerifyingHistoryStore(noOpClient, ZoneOffset.UTC, (historyItem) -> {
Expand All @@ -436,10 +445,10 @@ private void doTestRunManuallyDuringMode(OperationMode mode) throws Exception {
(deletionPolicyId, repo, snapId, slmStats, listener) -> {
},
System::nanoTime);

long time = System.currentTimeMillis();
task.triggered(new SchedulerEngine.Event(SnapshotRetentionService.SLM_RETENTION_MANUAL_JOB_ID, time, time));

assertTrue("retention should be run manually even if SLM is disabled", retentionWasRun.get());
} finally {
threadPool.shutdownNow();
Expand Down