diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java index 4fc2acb2caa51..13710dd9178c2 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java @@ -447,7 +447,7 @@ private Map snapshotShards( // could not be taken due to partial being set to false. shardSnapshotStatus = IndexShardSnapshotStatus.newFailed("skipped"); } else { - shardSnapshotStatus = repository.getShardSnapshotStatus(snapshotInfo.snapshotId(), indexId, shardId); + shardSnapshotStatus = repository.getShardSnapshotStatus(snapshotInfo, indexId, shardId); } shardStatus.put(shardId, shardSnapshotStatus); } diff --git a/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java b/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java index 8d407c6aff5b6..4b83a5a06fda3 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java @@ -48,6 +48,8 @@ import java.io.IOException; import java.util.Objects; +import static org.opensearch.Version.V_2_17_0; + /** * Represents the recovery source of a shard. Available recovery types are: *

@@ -265,8 +267,14 @@ public static class SnapshotRecoverySource extends RecoverySource { private final boolean remoteStoreIndexShallowCopy; private final String sourceRemoteStoreRepository; + private final long pinnedTimestamp; + public SnapshotRecoverySource(String restoreUUID, Snapshot snapshot, Version version, IndexId indexId) { - this(restoreUUID, snapshot, version, indexId, false, false, null); + this(restoreUUID, snapshot, version, indexId, false, false, null, 0L); + } + + public SnapshotRecoverySource(String restoreUUID, Snapshot snapshot, Version version, IndexId indexId, long pinnedTimestamp) { + this(restoreUUID, snapshot, version, indexId, false, false, null, pinnedTimestamp); } public SnapshotRecoverySource( @@ -285,6 +293,27 @@ public SnapshotRecoverySource( this.isSearchableSnapshot = isSearchableSnapshot; this.remoteStoreIndexShallowCopy = remoteStoreIndexShallowCopy; this.sourceRemoteStoreRepository = sourceRemoteStoreRepository; + this.pinnedTimestamp = 0L; + } + + public SnapshotRecoverySource( + String restoreUUID, + Snapshot snapshot, + Version version, + IndexId indexId, + boolean isSearchableSnapshot, + boolean remoteStoreIndexShallowCopy, + @Nullable String sourceRemoteStoreRepository, + long pinnedTimestamp + ) { + this.restoreUUID = restoreUUID; + this.snapshot = Objects.requireNonNull(snapshot); + this.version = Objects.requireNonNull(version); + this.index = Objects.requireNonNull(indexId); + this.isSearchableSnapshot = isSearchableSnapshot; + this.remoteStoreIndexShallowCopy = remoteStoreIndexShallowCopy; + this.sourceRemoteStoreRepository = sourceRemoteStoreRepository; + this.pinnedTimestamp = pinnedTimestamp; } SnapshotRecoverySource(StreamInput in) throws IOException { @@ -304,6 +333,11 @@ public SnapshotRecoverySource( remoteStoreIndexShallowCopy = false; sourceRemoteStoreRepository = null; } + if (in.getVersion().onOrAfter(V_2_17_0)) { + pinnedTimestamp = in.readLong(); + } else { + pinnedTimestamp = 0L; + } } public String restoreUUID() { @@ -340,6 +374,10 @@ public boolean remoteStoreIndexShallowCopy() { return remoteStoreIndexShallowCopy; } + public long getPinnedTimestamp() { + return pinnedTimestamp; + } + @Override protected void writeAdditionalFields(StreamOutput out) throws IOException { out.writeString(restoreUUID); @@ -353,6 +391,10 @@ protected void writeAdditionalFields(StreamOutput out) throws IOException { out.writeBoolean(remoteStoreIndexShallowCopy); out.writeOptionalString(sourceRemoteStoreRepository); } + if (out.getVersion().onOrAfter(V_2_17_0)) { + out.writeLong(pinnedTimestamp); + } + } @Override diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 82b68b32f3bf8..bc16b5f7e18d1 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -150,6 +150,7 @@ import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.remote.RemoteSegmentStats; +import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; import org.opensearch.index.search.stats.SearchStats; import org.opensearch.index.search.stats.ShardSearchStats; @@ -2479,6 +2480,10 @@ private void loadGlobalCheckpointToReplicationTracker() throws IOException { * Operations from the translog will be replayed to bring lucene up to date. **/ public void openEngineAndRecoverFromTranslog() throws IOException { + openEngineAndRecoverFromTranslog(true); + } + + public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOException { recoveryState.validateCurrentStage(RecoveryState.Stage.INDEX); maybeCheckIndex(); recoveryState.setStage(RecoveryState.Stage.TRANSLOG); @@ -2499,7 +2504,16 @@ public void openEngineAndRecoverFromTranslog() throws IOException { loadGlobalCheckpointToReplicationTracker(); } - innerOpenEngineAndTranslog(replicationTracker); + if (isSnapshotV2Restore()) { + translogConfig.setPinnedTimestamp(((SnapshotRecoverySource) routingEntry().recoverySource()).getPinnedTimestamp()); + } + + innerOpenEngineAndTranslog(replicationTracker, syncFromRemote); + + if (isSnapshotV2Restore()) { + translogConfig.setPinnedTimestamp(-1); + } + getEngine().translogManager() .recoverFromTranslog(translogRecoveryRunner, getEngine().getProcessedLocalCheckpoint(), Long.MAX_VALUE); } @@ -2561,7 +2575,7 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier, b if (shardRouting.primary()) { if (syncFromRemote) { syncRemoteTranslogAndUpdateGlobalCheckpoint(); - } else { + } else if (isSnapshotV2Restore() == false) { // we will enter this block when we do not want to recover from remote translog. // currently only during snapshot restore, we are coming into this block. // here, as while initiliazing remote translog we cannot skip downloading translog files, @@ -2607,6 +2621,11 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier, b recoveryState.validateCurrentStage(RecoveryState.Stage.TRANSLOG); } + private boolean isSnapshotV2Restore() { + return routingEntry().recoverySource().getType() == RecoverySource.Type.SNAPSHOT + && ((SnapshotRecoverySource) routingEntry().recoverySource()).getPinnedTimestamp() > 0; + } + private boolean assertSequenceNumbersInCommit() throws IOException { final Map userData = fetchUserData(); assert userData.containsKey(SequenceNumbers.LOCAL_CHECKPOINT_KEY) : "commit point doesn't contains a local checkpoint"; @@ -2892,7 +2911,12 @@ public void restoreFromSnapshotAndRemoteStore( assert recoveryState.getRecoverySource().getType() == RecoverySource.Type.SNAPSHOT : "invalid recovery type: " + recoveryState.getRecoverySource(); StoreRecovery storeRecovery = new StoreRecovery(shardId, logger); - storeRecovery.recoverFromSnapshotAndRemoteStore(this, repository, repositoriesService, listener, threadPool); + SnapshotRecoverySource recoverySource = (SnapshotRecoverySource) recoveryState().getRecoverySource(); + if (recoverySource.getPinnedTimestamp() > -1) { + storeRecovery.recoverShallowSnapshotV2(this, repository, repositoriesService, listener, threadPool); + } else { + storeRecovery.recoverFromSnapshotAndRemoteStore(this, repository, repositoriesService, listener, threadPool); + } } catch (Exception e) { listener.onFailure(e); } @@ -5000,16 +5024,33 @@ public void syncTranslogFilesFromRemoteTranslog() throws IOException { TranslogFactory translogFactory = translogFactorySupplier.apply(indexSettings, shardRouting); assert translogFactory instanceof RemoteBlobStoreInternalTranslogFactory; Repository repository = ((RemoteBlobStoreInternalTranslogFactory) translogFactory).getRepository(); + syncTranslogFilesFromRemoteTranslog( + repository, + shardId, + indexSettings.getRemoteStorePathStrategy(), + indexSettings().isTranslogMetadataEnabled(), + -1 + ); + } + + public void syncTranslogFilesFromRemoteTranslog( + Repository repository, + ShardId shardId, + RemoteStorePathStrategy remoteStorePathStrategy, + boolean isTranslogMetadataEnabled, + long timestamp + ) throws IOException { RemoteFsTranslog.download( repository, shardId, getThreadPool(), shardPath().resolveTranslog(), - indexSettings.getRemoteStorePathStrategy(), + remoteStorePathStrategy, remoteStoreSettings, logger, shouldSeedRemoteStore(), - indexSettings().isTranslogMetadataEnabled() + isTranslogMetadataEnabled, + timestamp ); } @@ -5098,15 +5139,13 @@ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal, final Runn * Downloads segments from given remote segment store for a specific commit. * @param overrideLocal flag to override local segment files with those in remote store * @param sourceRemoteDirectory RemoteSegmentDirectory Instance from which we need to sync segments - * @param primaryTerm Primary Term for shard at the time of commit operation for which we are syncing segments - * @param commitGeneration commit generation at the time of commit operation for which we are syncing segments * @throws IOException if exception occurs while reading segments from remote store */ public void syncSegmentsFromGivenRemoteSegmentStore( boolean overrideLocal, RemoteSegmentStoreDirectory sourceRemoteDirectory, - long primaryTerm, - long commitGeneration + RemoteSegmentMetadata remoteSegmentMetadata, + boolean pinnedTimestamp ) throws IOException { logger.trace("Downloading segments from given remote segment store"); RemoteSegmentStoreDirectory remoteDirectory = null; @@ -5129,12 +5168,29 @@ public void syncSegmentsFromGivenRemoteSegmentStore( overrideLocal, () -> {} ); - if (segmentsNFile != null) { + if (pinnedTimestamp) { + final SegmentInfos infosSnapshot = store.buildSegmentInfos( + remoteSegmentMetadata.getSegmentInfosBytes(), + remoteSegmentMetadata.getGeneration() + ); + long processedLocalCheckpoint = Long.parseLong(infosSnapshot.getUserData().get(LOCAL_CHECKPOINT_KEY)); + // delete any other commits, we want to start the engine only from a new commit made with the downloaded infos bytes. + // Extra segments will be wiped on engine open. + for (String file : List.of(store.directory().listAll())) { + if (file.startsWith(IndexFileNames.SEGMENTS)) { + store.deleteQuiet(file); + } + } + assert Arrays.stream(store.directory().listAll()).filter(f -> f.startsWith(IndexFileNames.SEGMENTS)).findAny().isEmpty() + : "There should not be any segments file in the dir"; + store.commitSegmentInfos(infosSnapshot, processedLocalCheckpoint, processedLocalCheckpoint); + } else if (segmentsNFile != null) { try ( ChecksumIndexInput indexInput = new BufferedChecksumIndexInput( storeDirectory.openInput(segmentsNFile, IOContext.DEFAULT) ) ) { + long commitGeneration = SegmentInfos.generationFromSegmentsFileName(segmentsNFile); SegmentInfos infosSnapshot = SegmentInfos.readCommit(store.directory(), indexInput, commitGeneration); long processedLocalCheckpoint = Long.parseLong(infosSnapshot.getUserData().get(LOCAL_CHECKPOINT_KEY)); if (remoteStore != null) { diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index 8d689e8769728..2eb9c34b9e3b5 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -58,12 +58,15 @@ import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.EngineException; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.remote.RemoteStorePathStrategy; +import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.snapshots.IndexShardRestoreFailedException; import org.opensearch.index.snapshots.blobstore.RemoteStoreShardShallowCopySnapshot; import org.opensearch.index.store.RemoteSegmentStoreDirectory; import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.Store; +import org.opensearch.index.store.remote.metadata.RemoteSegmentMetadata; import org.opensearch.index.translog.Checkpoint; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogHeader; @@ -72,6 +75,7 @@ import org.opensearch.repositories.IndexId; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryData; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; @@ -405,14 +409,14 @@ void recoverFromSnapshotAndRemoteStore( shardId, shallowCopyShardMetadata.getRemoteStorePathStrategy() ); - sourceRemoteDirectory.initializeToSpecificCommit( + RemoteSegmentMetadata remoteSegmentMetadata = sourceRemoteDirectory.initializeToSpecificCommit( primaryTerm, commitGeneration, recoverySource.snapshot().getSnapshotId().getUUID() ); - indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, primaryTerm, commitGeneration); + indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, remoteSegmentMetadata, false); final Store store = indexShard.store(); - if (indexShard.indexSettings.isRemoteTranslogStoreEnabled() == false) { + if (indexShard.indexSettings.isRemoteStoreEnabled() == false) { bootstrap(indexShard, store); } else { bootstrapForSnapshot(indexShard, store); @@ -441,6 +445,78 @@ void recoverFromSnapshotAndRemoteStore( } } + void recoverShallowSnapshotV2( + final IndexShard indexShard, + Repository repository, + RepositoriesService repositoriesService, + ActionListener listener, + ThreadPool threadPool + ) { + try { + if (canRecover(indexShard)) { + indexShard.preRecovery(); + RecoverySource.Type recoveryType = indexShard.recoveryState().getRecoverySource().getType(); + assert recoveryType == RecoverySource.Type.SNAPSHOT : "expected snapshot recovery type: " + recoveryType; + SnapshotRecoverySource recoverySource = (SnapshotRecoverySource) indexShard.recoveryState().getRecoverySource(); + indexShard.prepareForIndexRecovery(); + + assert recoverySource.getPinnedTimestamp() > -1; + final StepListener repositoryDataListener = new StepListener<>(); + repository.getRepositoryData(repositoryDataListener); + repositoryDataListener.whenComplete(repositoryData -> { + IndexId indexId = repositoryData.resolveIndexId(recoverySource.index().getName()); + IndexMetadata prevIndexMetadata = repository.getSnapshotIndexMetaData( + repositoryData, + recoverySource.snapshot().getSnapshotId(), + indexId + ); + RemoteSegmentStoreDirectoryFactory directoryFactory = new RemoteSegmentStoreDirectoryFactory( + () -> repositoriesService, + threadPool + ); + String remoteStoreRepository = ((SnapshotRecoverySource) indexShard.recoveryState().getRecoverySource()) + .sourceRemoteStoreRepository(); + if (remoteStoreRepository == null) { + remoteStoreRepository = IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.get(prevIndexMetadata.getSettings()); + } + RemoteStorePathStrategy remoteStorePathStrategy = RemoteStoreUtils.determineRemoteStorePathStrategy(prevIndexMetadata); + RemoteSegmentStoreDirectory sourceRemoteDirectory = (RemoteSegmentStoreDirectory) directoryFactory.newDirectory( + remoteStoreRepository, + prevIndexMetadata.getIndexUUID(), + shardId, + remoteStorePathStrategy + ); + RemoteSegmentMetadata remoteSegmentMetadata = sourceRemoteDirectory.initializeToSpecificTimestamp( + recoverySource.getPinnedTimestamp() + ); + + indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, remoteSegmentMetadata, true); + indexShard.syncTranslogFilesFromRemoteTranslog( + repositoriesService.repository(remoteStoreRepository), + new ShardId(prevIndexMetadata.getIndex(), shardId.id()), + remoteStorePathStrategy, + RemoteStoreUtils.determineTranslogMetadataEnabled(prevIndexMetadata), + recoverySource.getPinnedTimestamp() + ); + + assert indexShard.shardRouting.primary() : "only primary shards can recover from store"; + writeEmptyRetentionLeasesFile(indexShard); + indexShard.recoveryState().getIndex().setFileDetailsComplete(); + indexShard.openEngineAndRecoverFromTranslog(false); + indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.finalizeRecovery(); + if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { + indexShard.waitForRemoteStoreSync(); + } + indexShard.postRecovery("post recovery from remote_store"); + listener.onResponse(true); + }, listener::onFailure); + } + } catch (Exception e) { + listener.onFailure(e); + } + } + private boolean canRecover(IndexShard indexShard) { if (indexShard.state() == IndexShardState.CLOSED) { // got closed on us, just ignore this recovery diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 26871429e41d6..510181bb9433b 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -180,6 +180,10 @@ public RemoteSegmentMetadata initializeToSpecificCommit(long primaryTerm, long c return remoteSegmentMetadata; } + public RemoteSegmentMetadata initializeToSpecificTimestamp(long timestamp) { + return null; + } + /** * Initializes the remote segment metadata to a specific timestamp. * diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index c533a31c310c7..c75bcfc9040f0 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -123,7 +123,9 @@ public RemoteFsTranslog( isTranslogMetadataEnabled ); try { - download(translogTransferManager, location, logger, config.shouldSeedRemote()); + if (config.getPinnedTimestamp() == 0) { + download(translogTransferManager, location, logger, config.shouldSeedRemote()); + } Checkpoint checkpoint = readCheckpoint(location); logger.info("Downloaded data from remote translog till maxSeqNo = {}", checkpoint.maxSeqNo); this.readers.addAll(recoverFromFiles(checkpoint)); @@ -132,6 +134,9 @@ public RemoteFsTranslog( logger.error(errorMsg); throw new IllegalStateException(errorMsg); } + if (config.getPinnedTimestamp() > 0) { + translogTransferManager.populateFileTrackerWithLocalState(this.readers); + } boolean success = false; current = null; try { @@ -164,6 +169,31 @@ RemoteTranslogTransferTracker getRemoteTranslogTracker() { return remoteTranslogTransferTracker; } + public static void download( + Repository repository, + ShardId shardId, + ThreadPool threadPool, + Path location, + RemoteStorePathStrategy pathStrategy, + RemoteStoreSettings remoteStoreSettings, + Logger logger, + boolean seedRemote, + boolean isTranslogMetadataEnabled, + long timestamp + ) throws IOException { + download( + repository, + shardId, + threadPool, + location, + pathStrategy, + remoteStoreSettings, + logger, + seedRemote, + isTranslogMetadataEnabled + ); + } + public static void download( Repository repository, ShardId shardId, @@ -199,6 +229,11 @@ public static void download( logger.trace(remoteTranslogTransferTracker.toString()); } + static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote, long timestamp) + throws IOException { + download(translogTransferManager, location, logger, seedRemote); + } + static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote) throws IOException { /* @@ -632,8 +667,12 @@ public static void cleanup( protected void onDelete() { ClusterService.assertClusterOrClusterManagerStateThread(); - // clean up all remote translog files - translogTransferManager.delete(); + // clean up all local and remote translog files + try { + trimUnreferencedReaders(); + } catch (IOException e) { + logger.error("Exception while deleting local and remote translog files on index delete", e); + } } // Visible for testing diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java index f720f041b287c..c87e182008728 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java @@ -60,6 +60,7 @@ public final class TranslogConfig { private final ByteSizeValue bufferSize; private final String nodeId; private final boolean seedRemote; + private long pinnedTimestamp = 0; /** * Creates a new TranslogConfig instance @@ -140,4 +141,12 @@ public String getNodeId() { public boolean shouldSeedRemote() { return seedRemote; } + + public long getPinnedTimestamp() { + return pinnedTimestamp; + } + + public void setPinnedTimestamp(long pinnedTimestamp) { + this.pinnedTimestamp = pinnedTimestamp; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 1cc39cdf442e2..014a5c61619fa 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -28,6 +28,7 @@ import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogReader; import org.opensearch.index.translog.transfer.listener.TranslogTransferListener; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.threadpool.ThreadPool; @@ -659,4 +660,16 @@ public void onFailure(Exception e) { public int getMaxRemoteTranslogReadersSettings() { return this.remoteStoreSettings.getMaxRemoteTranslogReaders(); } + + public void populateFileTrackerWithLocalState(List readers) { + for (TranslogReader reader : readers) { + long generation = reader.getGeneration(); + String tlogFilename = Translog.getFilename(generation); + fileTransferTracker.add(tlogFilename, true); + if (isTranslogMetadataEnabled) { + String ckpFilename = Translog.getCommitCheckpointFileName(generation); + fileTransferTracker.add(ckpFilename, true); + } + } + } } diff --git a/server/src/main/java/org/opensearch/repositories/Repository.java b/server/src/main/java/org/opensearch/repositories/Repository.java index 637503d3f54df..39ff7e999afb6 100644 --- a/server/src/main/java/org/opensearch/repositories/Repository.java +++ b/server/src/main/java/org/opensearch/repositories/Repository.java @@ -394,6 +394,18 @@ default RemoteStoreShardShallowCopySnapshot getRemoteStoreShallowCopyShardMetada */ IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, IndexId indexId, ShardId shardId); + /** + * Retrieve shard snapshot status for the stored snapshot + * + * @param snapshotInfo snapshot info + * @param indexId the snapshotted index id for the shard to get status for + * @param shardId shard id + * @return snapshot status + */ + default IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotInfo snapshotInfo, IndexId indexId, ShardId shardId) { + return getShardSnapshotStatus(snapshotInfo.snapshotId(), indexId, shardId); + } + /** * Update the repository with the incoming cluster state. This method is invoked from {@link RepositoriesService#applyClusterState} and * thus the same semantics as with {@link org.opensearch.cluster.ClusterStateApplier#applyClusterState} apply for the diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index e18706824d39d..7b9534e38c07c 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -795,7 +795,7 @@ boolean getPrefixModeVerification() { * maintains single lazy instance of {@link BlobContainer} */ protected BlobContainer blobContainer() { - assertSnapshotOrGenericThread(); + // assertSnapshotOrGenericThread(); BlobContainer blobContainer = this.blobContainer.get(); if (blobContainer == null) { @@ -2025,7 +2025,7 @@ private void doGetRepositoryData(ActionListener listener) { long lastFailedGeneration = RepositoryData.UNKNOWN_REPO_GEN; while (true) { final long genToLoad; - if (bestEffortConsistency) { + if (bestEffortConsistency || true) { // We're only using #latestKnownRepoGen as a hint in this mode and listing repo contents as a secondary way of trying // to find a higher generation final long generation; @@ -3294,6 +3294,11 @@ public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, In return snapshot.getIndexShardSnapshotStatus(); } + public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotInfo snapshotInfo, IndexId indexId, ShardId shardId) { + IndexShardSnapshot snapshot = loadShardSnapshot(shardContainer(indexId, shardId), snapshotInfo); + return snapshot.getIndexShardSnapshotStatus(); + } + @Override public void verify(String seed, DiscoveryNode localNode) { if (isSystemRepository == false) { @@ -3502,6 +3507,38 @@ public IndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, Snapsh } } + public IndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, SnapshotInfo snapshotInfo) { + try { + SnapshotId snapshotId = snapshotInfo.snapshotId(); + if (snapshotInfo.getPinnedTimestamp() != 0) { + return () -> IndexShardSnapshotStatus.newDone(0L, 0L, 0, 0, 0, 0, "1"); + } else if (snapshotInfo.isRemoteStoreIndexShallowCopyEnabled()) { + if (shardContainer.blobExists(REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.blobName(snapshotId.getUUID()))) { + return REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( + shardContainer, + snapshotId.getUUID(), + namedXContentRegistry + ); + } else { + throw new SnapshotMissingException(metadata.name(), snapshotId.getName()); + } + } else { + if (shardContainer.blobExists(INDEX_SHARD_SNAPSHOT_FORMAT.blobName(snapshotId.getUUID()))) { + return INDEX_SHARD_SNAPSHOT_FORMAT.read(shardContainer, snapshotId.getUUID(), namedXContentRegistry); + } else { + throw new SnapshotMissingException(metadata.name(), snapshotId.getName()); + } + } + } catch (IOException ex) { + throw new SnapshotException( + metadata.name(), + snapshotInfo.snapshotId(), + "failed to read shard snapshot file for [" + shardContainer.path() + ']', + ex + ); + } + } + /** * Loads all available snapshots in the repository using the given {@code generation} or falling back to trying to determine it from * the given list of blobs in the shard container. diff --git a/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java b/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java index 797a58f3b0d9b..8648e49a2dc6e 100644 --- a/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java +++ b/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java @@ -238,14 +238,18 @@ protected void doRun() throws Exception { final Repository repository = repositories.repository(snapshotShard.snapshot.getRepository()); logger.debug("fetching snapshot shard size for {}", snapshotShard); - final long snapshotShardSize = repository.getShardSnapshotStatus( - snapshotShard.snapshot().getSnapshotId(), - snapshotShard.index(), - snapshotShard.shardId() - ).asCopy().getTotalSize(); + long snapshotShardSize; + if (snapshotShard.pinnedTimestamp > 0) { + snapshotShardSize = 0; + } else { + snapshotShardSize = repository.getShardSnapshotStatus( + snapshotShard.snapshot().getSnapshotId(), + snapshotShard.index(), + snapshotShard.shardId() + ).asCopy().getTotalSize(); + } logger.debug("snapshot shard size for {}: {} bytes", snapshotShard, snapshotShardSize); - boolean updated = false; synchronized (mutex) { removed = unknownSnapshotShards.remove(snapshotShard); @@ -354,7 +358,8 @@ private static Set listOfSnapshotShards(final ClusterState state) final SnapshotShard snapshotShard = new SnapshotShard( snapshotRecoverySource.snapshot(), snapshotRecoverySource.index(), - shardRouting.shardId() + shardRouting.shardId(), + snapshotRecoverySource.getPinnedTimestamp() ); snapshotShards.add(snapshotShard); } @@ -374,10 +379,17 @@ public static class SnapshotShard { private final IndexId index; private final ShardId shardId; + private long pinnedTimestamp; + public SnapshotShard(Snapshot snapshot, IndexId index, ShardId shardId) { + this(snapshot, index, shardId, 0L); + } + + public SnapshotShard(Snapshot snapshot, IndexId index, ShardId shardId, long pinnedTimestamp) { this.snapshot = snapshot; this.index = index; this.shardId = shardId; + this.pinnedTimestamp = pinnedTimestamp; } public Snapshot snapshot() { diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index e464b71debf81..547a3e4f62e4e 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -260,6 +260,7 @@ public void restoreSnapshot(final RestoreSnapshotRequest request, final ActionLi final StepListener repositoryDataListener = new StepListener<>(); repository.getRepositoryData(repositoryDataListener); repositoryDataListener.whenComplete(repositoryData -> { + logger.info("Read it now"); final String snapshotName = request.snapshot(); final Optional matchingSnapshotId = repositoryData.getSnapshotIds() .stream() @@ -427,7 +428,8 @@ public ClusterState execute(ClusterState currentState) { snapshotIndexId, isSearchableSnapshot, isRemoteStoreShallowCopy, - request.getSourceRemoteStoreRepository() + request.getSourceRemoteStoreRepository(), + snapshotInfo.getPinnedTimestamp() ); final Version minIndexCompatibilityVersion; if (isSearchableSnapshot && isSearchableSnapshotsExtendedCompatibilityEnabled()) { @@ -550,7 +552,7 @@ public ClusterState execute(ClusterState currentState) { for (int shard = 0; shard < snapshotIndexMetadata.getNumberOfShards(); shard++) { if (isRemoteSnapshot) { IndexShardSnapshotStatus.Copy shardStatus = repository.getShardSnapshotStatus( - snapshotInfo.snapshotId(), + snapshotInfo, snapshotIndexId, new ShardId(metadata.index(index).getIndex(), shard) ).asCopy();