diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp index 607a3a8f07adad..3dd964733a2b44 100644 --- a/cloud/src/recycler/recycler.cpp +++ b/cloud/src/recycler/recycler.cpp @@ -1003,7 +1003,8 @@ int64_t calculate_restore_job_expired_time( const std::string& instance_id_, const RestoreJobCloudPB& restore_job, int64_t* earlest_ts /* restore job earliest expiration ts */) { if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED || - restore_job.state() == RestoreJobCloudPB::COMPLETED) { + restore_job.state() == RestoreJobCloudPB::COMPLETED || + restore_job.state() == RestoreJobCloudPB::RECYCLING) { // final state, recycle immediately return 0L; } diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index c081cfc3b69028..d3d8d58ded061e 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1715,6 +1715,12 @@ public class Config extends ConfigBase { "Whether to enable cloud restore job."}, varType = VariableAnnotation.EXPERIMENTAL) public static boolean enable_cloud_restore_job = false; + @ConfField(mutable = true, masterOnly = true, description = { + "存算分离恢复过程中,一次 create tablets rpc 创建的 tablet 数量上限,默认值为256个", + "During the cloud restore job, the maximum number of tablets created by one " + + "create tablets RPC, 256 by default."}) + public static int cloud_restore_create_tablet_batch_size = 256; + /** * Control the default max num of the instance for a user. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 4762133d071e47..190306ef1f6af0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -802,7 +802,7 @@ public void setBeingSyncedProperties() { public void resetVersionForRestore() { for (Partition partition : idToPartition.values()) { - partition.setNextVersion(partition.getVisibleVersion() + 1); + partition.setNextVersion(partition.getCachedVisibleVersion() + 1); } } @@ -940,7 +940,7 @@ && getTableProperty().getDynamicPartitionProperty().getBuckets() if (Config.isCloudMode()) { long newReplicaId = Env.getCurrentEnv().getNextId(); Replica replica = new CloudReplica(newReplicaId, null, ReplicaState.NORMAL, - visibleVersion, schemaHash, db.getId(), id, partition.getId(), idx.getId(), i); + visibleVersion, schemaHash, db.getId(), id, entry.getKey(), idx.getId(), i); newTablet.addReplica(replica, true /* is restore */); continue; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/backup/CloudRestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/backup/CloudRestoreJob.java index 26d2f6b750a307..e0adccc1de515a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/backup/CloudRestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/backup/CloudRestoreJob.java @@ -46,6 +46,7 @@ import org.apache.doris.cloud.proto.Cloud; import org.apache.doris.cloud.qe.ComputeGroupException; import org.apache.doris.cloud.system.CloudSystemInfoService; +import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; @@ -85,7 +86,7 @@ public class CloudRestoreJob extends RestoreJob { private String cloudClusterId = null; - private Map tabletsPerTable = new HashMap<>(); + private Map, List> tabletRequests = new HashMap<>(); public enum MetaSeriviceOperation { PREPARE, @@ -190,14 +191,18 @@ public void doCreateReplicas() { handleMetaObject(MetaSeriviceOperation.PREPARE); // send create tablets requests boolean needSetStorageVault = ((CloudEnv) Env.getCurrentEnv()).getEnableStorageVault(); - for (Map.Entry entry : tabletsPerTable.entrySet()) { - OlapTable table = entry.getKey(); - Cloud.CreateTabletsRequest.Builder requestBuilder = entry.getValue(); - Cloud.CreateTabletsResponse resp = sendCreateTabletsRequests(requestBuilder, table, - needSetStorageVault); - if (resp.hasStorageVaultId()) { - storageVaultId = resp.getStorageVaultId(); - needSetStorageVault = false; + for (Map.Entry, List> entry + : tabletRequests.entrySet()) { + Pair tableToPartition = entry.getKey(); + OlapTable table = tableToPartition.first; + List requestBuilders = entry.getValue(); + for (Cloud.CreateTabletsRequest.Builder requestBuilder : requestBuilders) { + Cloud.CreateTabletsResponse resp = sendCreateTabletsRequests(requestBuilder, table, + needSetStorageVault); + if (resp.hasStorageVaultId()) { + storageVaultId = resp.getStorageVaultId(); + needSetStorageVault = false; + } } } // set storage vault for new restoring table @@ -214,7 +219,7 @@ public void doCreateReplicas() { } catch (Exception e) { status = new Status(Status.ErrCode.COMMON_ERROR, e.getMessage()); } finally { - tabletsPerTable.clear(); + tabletRequests.clear(); } } @@ -352,8 +357,8 @@ public void createReplicas(Database db, OlapTable localTbl, Partition restorePar public void createReplicas(Database db, OlapTable localTbl, Partition restorePart, Map tabletBases) { List rowStoreColumns = localTbl.getTableProperty().getCopiedRowStoreColumns(); - Cloud.CreateTabletsRequest.Builder requestBuilder = tabletsPerTable.computeIfAbsent(localTbl, - r -> Cloud.CreateTabletsRequest.newBuilder()); + List requestBuilders = tabletRequests.computeIfAbsent( + Pair.of(localTbl, restorePart), r -> Lists.newArrayList()); for (MaterializedIndex restoredIdx : restorePart.getMaterializedIndices(MaterializedIndex.IndexExtState .VISIBLE)) { @@ -365,42 +370,52 @@ public void createReplicas(Database db, OlapTable localTbl, Partition restorePar // only base and shadow index need cluster key unique column ids clusterKeyUids = OlapTable.getClusterKeyUids(indexMeta.getSchema()); } - for (Tablet restoreTablet : restoredIdx.getTablets()) { - try { - requestBuilder.addTabletMetas(((CloudInternalCatalog) Env.getCurrentInternalCatalog()) - .createTabletMetaBuilder(localTbl.getId(), restoredIdx.getId(), - restorePart.getId(), restoreTablet, - localTbl.getPartitionInfo().getTabletType(restorePart.getId()), - indexMeta.getSchemaHash(), indexMeta.getKeysType(), - indexMeta.getShortKeyColumnCount(), localTbl.getCopiedBfColumns(), - localTbl.getBfFpp(), indexes, indexMeta.getSchema(), localTbl.getDataSortInfo(), - localTbl.getCompressionType(), localTbl.getStoragePolicy(), - localTbl.isInMemory(), false, localTbl.getName(), localTbl.getTTLSeconds(), - localTbl.getEnableUniqueKeyMergeOnWrite(), localTbl.storeRowColumn(), - localTbl.getBaseSchemaVersion(), localTbl.getCompactionPolicy(), - localTbl.getTimeSeriesCompactionGoalSizeMbytes(), - localTbl.getTimeSeriesCompactionFileCountThreshold(), - localTbl.getTimeSeriesCompactionTimeThresholdSeconds(), - localTbl.getTimeSeriesCompactionEmptyRowsetsThreshold(), - localTbl.getTimeSeriesCompactionLevelThreshold(), localTbl.disableAutoCompaction(), - localTbl.getRowStoreColumnsUniqueIds(rowStoreColumns), - localTbl.getInvertedIndexFileStorageFormat(), - localTbl.rowStorePageSize(), - localTbl.variantEnableFlattenNested(), clusterKeyUids, - localTbl.storagePageSize(), localTbl.getTDEAlgorithmPB(), - localTbl.storageDictPageSize(), false)); - // In cloud mode all storage medium will be saved to HDD. - TabletMeta tabletMeta = new TabletMeta(db.getId(), localTbl.getId(), restorePart.getId(), - restoredIdx.getId(), indexMeta.getSchemaHash(), TStorageMedium.HDD); - Env.getCurrentInvertedIndex().addTablet(restoreTablet.getId(), tabletMeta); - Env.getCurrentInvertedIndex().addReplica(restoreTablet.getId(), - restoreTablet.getReplicaByBackendId(-1)); - } catch (Exception e) { - String errMsg = String.format("create tablet meta builder failed, errMsg:%s, local table:%d, " - + "restore partition=%d, restore index=%d, restore tablet=%d", e.getMessage(), - localTbl.getId(), restorePart.getId(), restoredIdx.getId(), restoreTablet.getId()); - status = new Status(Status.ErrCode.COMMON_ERROR, errMsg); + + int maxCreateTabletBatchSize = Config.cloud_restore_create_tablet_batch_size; + List restoreTablets = restoredIdx.getTablets(); + for (int i = 0; i < restoreTablets.size(); i += maxCreateTabletBatchSize) { + int end = Math.min(i + maxCreateTabletBatchSize, restoreTablets.size()); + List subRestoreTablets = restoreTablets.subList(i, end); + Cloud.CreateTabletsRequest.Builder requestBuilder = Cloud.CreateTabletsRequest.newBuilder(); + for (Tablet restoreTablet : subRestoreTablets) { + try { + requestBuilder.addTabletMetas(((CloudInternalCatalog) Env.getCurrentInternalCatalog()) + .createTabletMetaBuilder(localTbl.getId(), restoredIdx.getId(), + restorePart.getId(), restoreTablet, + localTbl.getPartitionInfo().getTabletType(restorePart.getId()), + indexMeta.getSchemaHash(), indexMeta.getKeysType(), + indexMeta.getShortKeyColumnCount(), localTbl.getCopiedBfColumns(), + localTbl.getBfFpp(), indexes, indexMeta.getSchema(), localTbl.getDataSortInfo(), + localTbl.getCompressionType(), localTbl.getStoragePolicy(), + localTbl.isInMemory(), false, localTbl.getName(), localTbl.getTTLSeconds(), + localTbl.getEnableUniqueKeyMergeOnWrite(), localTbl.storeRowColumn(), + localTbl.getBaseSchemaVersion(), localTbl.getCompactionPolicy(), + localTbl.getTimeSeriesCompactionGoalSizeMbytes(), + localTbl.getTimeSeriesCompactionFileCountThreshold(), + localTbl.getTimeSeriesCompactionTimeThresholdSeconds(), + localTbl.getTimeSeriesCompactionEmptyRowsetsThreshold(), + localTbl.getTimeSeriesCompactionLevelThreshold(), localTbl.disableAutoCompaction(), + localTbl.getRowStoreColumnsUniqueIds(rowStoreColumns), + localTbl.getInvertedIndexFileStorageFormat(), + localTbl.rowStorePageSize(), + localTbl.variantEnableFlattenNested(), clusterKeyUids, + localTbl.storagePageSize(), localTbl.getTDEAlgorithmPB(), + localTbl.storageDictPageSize(), false)); + // In cloud mode all storage medium will be saved to HDD. + TabletMeta tabletMeta = new TabletMeta(db.getId(), localTbl.getId(), restorePart.getId(), + restoredIdx.getId(), indexMeta.getSchemaHash(), TStorageMedium.HDD); + Env.getCurrentInvertedIndex().addTablet(restoreTablet.getId(), tabletMeta); + Env.getCurrentInvertedIndex().addReplica(restoreTablet.getId(), + restoreTablet.getReplicaByBackendId(-1)); + } catch (Exception e) { + String errMsg = String.format("create tablet meta builder failed, errMsg:%s, local table:%d, " + + "restore partition=%d, restore index=%d, restore tablet=%d", e.getMessage(), + localTbl.getId(), restorePart.getId(), restoredIdx.getId(), restoreTablet.getId()); + status = new Status(Status.ErrCode.COMMON_ERROR, errMsg); + return; + } } + requestBuilders.add(requestBuilder); } } } @@ -519,8 +534,11 @@ private Cloud.CreateTabletsResponse sendCreateTabletsRequests(Cloud.CreateTablet + "vault name=%s, errMsg=%s", dbId, olapTable.getName(), storageVaultName, e.getMessage()); throw new DdlException(errMsg); } - LOG.info("cloud restore job restore tablets, dbId: {}, tableName: {}, vault name: {}", dbId, - olapTable.getName(), storageVaultName); + if (LOG.isDebugEnabled()) { + LOG.debug("cloud restore job restore tablets, dbId: {}, tableName: {}, vault name: {}," + + "tablet created: {}", dbId, olapTable.getName(), storageVaultName, + requestBuilder.getTabletMetasCount()); + } return resp; }