Skip to content

Commit

Permalink
[fix](restore) Set the storage medium of the bind replicas (apache#40921
Browse files Browse the repository at this point in the history
)

Since the replicas are bound to the same disk, the storage medium must
be the same to avoid media migration.
  • Loading branch information
w41ter authored Sep 19, 2024
1 parent 412ff70 commit ced6ec0
Showing 1 changed file with 45 additions and 13 deletions.
58 changes: 45 additions & 13 deletions fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -652,8 +652,8 @@ private void checkAndPrepareMeta() {
}
}

// the new tablets -> { local tablet, schema hash }, used in atomic restore.
Map<Long, Pair<Long, Integer>> tabletBases = new HashMap<>();
// the new tablets -> { local tablet, schema hash, storage medium }, used in atomic restore.
Map<Long, TabletRef> tabletBases = new HashMap<>();

// Check and prepare meta objects.
AgentBatchTask batchTask = new AgentBatchTask();
Expand Down Expand Up @@ -1037,14 +1037,30 @@ private void checkAndPrepareMeta() {

private Status bindLocalAndRemoteOlapTableReplicas(
OlapTable localOlapTbl, OlapTable remoteOlapTbl,
Map<Long, Pair<Long, Integer>> tabletBases) {
Map<Long, TabletRef> tabletBases) {
localOlapTbl.readLock();
try {
// The storage medium of the remote olap table's storage is HDD, because we want to
// restore the tables in another cluster might without SSD.
//
// Keep the storage medium of the new olap table the same as the old one, so that
// the replicas in the new olap table will not be migrated to other storage mediums.
remoteOlapTbl.setStorageMedium(localOlapTbl.getStorageMedium());
for (Partition partition : remoteOlapTbl.getPartitions()) {
Partition localPartition = localOlapTbl.getPartition(partition.getName());
if (localPartition == null) {
continue;
}
// Since the replicas are bound to the same disk, the storage medium must be the same
// to avoid media migration.
TStorageMedium storageMedium = localOlapTbl.getPartitionInfo()
.getDataProperty(localPartition.getId()).getStorageMedium();
remoteOlapTbl.getPartitionInfo().getDataProperty(partition.getId())
.setStorageMedium(storageMedium);
if (LOG.isDebugEnabled()) {
LOG.debug("bind local partition {} and remote partition {} with same storage medium {}, name: {}",
localPartition.getId(), partition.getId(), storageMedium, partition.getName());
}
for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
String indexName = remoteOlapTbl.getIndexNameById(index.getId());
Long localIndexId = localOlapTbl.getIndexIdByName(indexName);
Expand Down Expand Up @@ -1092,7 +1108,8 @@ private Status bindLocalAndRemoteOlapTableReplicas(
localOlapTbl.getName());
}
}
tabletBases.put(remoteTablet.getId(), Pair.of(localTablet.getId(), schemaHash));
tabletBases.put(remoteTablet.getId(),
new TabletRef(localTablet.getId(), schemaHash, storageMedium));
}
}
}
Expand Down Expand Up @@ -1223,7 +1240,7 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc
}

private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable localTbl, Partition restorePart,
Map<Long, Pair<Long, Integer>> tabletBases) {
Map<Long, TabletRef> tabletBases) {
Set<String> bfColumns = localTbl.getCopiedBfColumns();
double bfFpp = localTbl.getBfFpp();

Expand All @@ -1245,8 +1262,12 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc
clusterKeyIndexes = OlapTable.getClusterKeyIndexes(indexMeta.getSchema());
}
for (Tablet restoreTablet : restoredIdx.getTablets()) {
TabletRef baseTabletRef = tabletBases == null ? null : tabletBases.get(restoreTablet.getId());
// All restored replicas will be saved to HDD by default.
TStorageMedium storageMedium = baseTabletRef == null
? TStorageMedium.HDD : baseTabletRef.storageMedium;
TabletMeta tabletMeta = new TabletMeta(db.getId(), localTbl.getId(), restorePart.getId(),
restoredIdx.getId(), indexMeta.getSchemaHash(), TStorageMedium.HDD);
restoredIdx.getId(), indexMeta.getSchemaHash(), storageMedium);
Env.getCurrentInvertedIndex().addTablet(restoreTablet.getId(), tabletMeta);
for (Replica restoreReplica : restoreTablet.getReplicas()) {
Env.getCurrentInvertedIndex().addReplica(restoreTablet.getId(), restoreReplica);
Expand All @@ -1255,7 +1276,7 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc
restoreTablet.getId(), restoreReplica.getId(), indexMeta.getShortKeyColumnCount(),
indexMeta.getSchemaHash(), restoreReplica.getVersion(),
indexMeta.getKeysType(), TStorageType.COLUMN,
TStorageMedium.HDD /* all restored replicas will be saved to HDD */,
storageMedium,
indexMeta.getSchema(), bfColumns, bfFpp, null,
indexes,
localTbl.isInMemory(),
Expand All @@ -1280,12 +1301,11 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc
localTbl.variantEnableFlattenNested());
task.setInvertedIndexFileStorageFormat(localTbl.getInvertedIndexFileStorageFormat());
task.setInRestoreMode(true);
if (tabletBases != null && tabletBases.containsKey(restoreTablet.getId())) {
if (baseTabletRef != null) {
// ensure this replica is bound to the same backend disk as the origin table's replica.
Pair<Long, Integer> baseTablet = tabletBases.get(restoreTablet.getId());
task.setBaseTablet(baseTablet.first, baseTablet.second);
task.setBaseTablet(baseTabletRef.tabletId, baseTabletRef.schemaHash);
LOG.info("set base tablet {} for replica {} in restore job {}, tablet id={}",
baseTablet.first, restoreReplica.getId(), jobId, restoreTablet.getId());
baseTabletRef.tabletId, restoreReplica.getId(), jobId, restoreTablet.getId());
}
if (!CollectionUtils.isEmpty(clusterKeyIndexes)) {
task.setClusterKeyIndexes(clusterKeyIndexes);
Expand Down Expand Up @@ -1379,7 +1399,7 @@ private void genFileMapping(OlapTable localTbl, Partition localPartition, Long r
}

private void genFileMapping(OlapTable localTbl, Partition localPartition, Long remoteTblId,
BackupPartitionInfo backupPartInfo, boolean overwrite, Map<Long, Pair<Long, Integer>> tabletBases) {
BackupPartitionInfo backupPartInfo, boolean overwrite, Map<Long, TabletRef> tabletBases) {
for (MaterializedIndex localIdx : localPartition.getMaterializedIndices(IndexExtState.VISIBLE)) {
if (LOG.isDebugEnabled()) {
LOG.debug("get index id: {}, index name: {}", localIdx.getId(),
Expand All @@ -1396,7 +1416,7 @@ private void genFileMapping(OlapTable localTbl, Partition localPartition, Long r
for (Replica localReplica : localTablet.getReplicas()) {
long refTabletId = -1L;
if (tabletBases != null && tabletBases.containsKey(localTablet.getId())) {
refTabletId = tabletBases.get(localTablet.getId()).first;
refTabletId = tabletBases.get(localTablet.getId()).tabletId;
if (LOG.isDebugEnabled()) {
LOG.debug("restored tablet {} is based on exists tablet {}",
localTablet.getId(), refTabletId);
Expand Down Expand Up @@ -2565,4 +2585,16 @@ public String toString() {
public static String tableAliasWithAtomicRestore(String tableName) {
return ATOMIC_RESTORE_TABLE_PREFIX + tableName;
}

private static class TabletRef {
public long tabletId;
public int schemaHash;
public TStorageMedium storageMedium;

TabletRef(long tabletId, int schemaHash, TStorageMedium storageMedium) {
this.tabletId = tabletId;
this.schemaHash = schemaHash;
this.storageMedium = storageMedium;
}
}
}

0 comments on commit ced6ec0

Please sign in to comment.