Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1229,7 +1229,7 @@ public void checkStableAndNormal(String clusterName) throws DdlException {
}

public boolean isStable(SystemInfoService infoService, TabletScheduler tabletScheduler, String clusterName) {
int availableBackendsNum = infoService.getClusterBackendIds(clusterName, true).size();
List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(clusterName, true);
for (Partition partition : idToPartition.values()) {
long visibleVersion = partition.getVisibleVersion();
long visibleVersionHash = partition.getVisibleVersionHash();
Expand All @@ -1242,7 +1242,7 @@ public boolean isStable(SystemInfoService infoService, TabletScheduler tabletSch

Pair<TabletStatus, TabletSchedCtx.Priority> statusPair = tablet.getHealthStatusWithPriority(
infoService, clusterName, visibleVersion, visibleVersionHash, replicationNum,
availableBackendsNum);
aliveBeIdsInCluster);
if (statusPair.first != TabletStatus.HEALTHY) {
LOG.info("table {} is not stable because tablet {} status is {}. replicas: {}",
id, tablet.getId(), statusPair.first, tablet.getReplicas());
Expand Down
33 changes: 24 additions & 9 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/**
* This class represents the olap tablet related metadata.
Expand Down Expand Up @@ -407,7 +408,7 @@ public long getDataSize(boolean singleReplica) {
public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(
SystemInfoService systemInfoService, String clusterName,
long visibleVersion, long visibleVersionHash, int replicationNum,
int availableBackendsNum) {
List<Long> aliveBeIdsInCluster) {

int alive = 0;
int aliveAndVersionComplete = 0;
Expand Down Expand Up @@ -453,15 +454,16 @@ public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(
}

// 1. alive replicas are not enough
if (alive < replicationNum && replicas.size() >= availableBackendsNum
&& availableBackendsNum >= replicationNum && replicationNum > 1) {
int aliveBackendsNum = aliveBeIdsInCluster.size();
if (alive < replicationNum && replicas.size() >= aliveBackendsNum
&& aliveBackendsNum >= replicationNum && replicationNum > 1) {
// there is no enough backend for us to create a new replica, so we have to delete an existing replica,
// so there can be available backend for us to create a new replica.
// And if there is only one replica, we will not handle it(maybe need human interference)
// condition explain:
// 1. alive < replicationNum: replica is missing or bad
// 2. replicas.size() >= availableBackendsNum: the existing replicas occupies all available backends
// 3. availableBackendsNum >= replicationNum: make sure after deleting, there will be at least one backend for new replica.
// 2. replicas.size() >= aliveBackendsNum: the existing replicas occupies all available backends
// 3. aliveBackendsNum >= replicationNum: make sure after deleting, there will be at least one backend for new replica.
// 4. replicationNum > 1: if replication num is set to 1, do not delete any replica, for safety reason
return Pair.create(TabletStatus.FORCE_REDUNDANT, TabletSchedCtx.Priority.VERY_HIGH);
} else if (alive < (replicationNum / 2) + 1) {
Expand All @@ -484,10 +486,23 @@ public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(
}

// 3. replica is under relocating
if (stable < (replicationNum / 2) + 1) {
return Pair.create(TabletStatus.REPLICA_RELOCATING, TabletSchedCtx.Priority.NORMAL);
} else if (stable < replicationNum) {
return Pair.create(TabletStatus.REPLICA_RELOCATING, TabletSchedCtx.Priority.LOW);
if (stable < replicationNum) {
List<Long> replicaBeIds = replicas.stream()
.map(Replica::getBackendId).collect(Collectors.toList());
List<Long> availableBeIds = aliveBeIdsInCluster.stream()
.filter(systemInfoService::checkBackendAvailable)
.collect(Collectors.toList());
if (replicaBeIds.containsAll(availableBeIds)
&& availableBeIds.size() >= replicationNum
&& replicationNum > 1) { // No BE can be choose to create a new replica
return Pair.create(TabletStatus.FORCE_REDUNDANT,
stable < (replicationNum / 2) + 1 ? TabletSchedCtx.Priority.NORMAL : TabletSchedCtx.Priority.LOW);
}
if (stable < (replicationNum / 2) + 1) {
return Pair.create(TabletStatus.REPLICA_RELOCATING, TabletSchedCtx.Priority.NORMAL);
} else if (stable < replicationNum) {
return Pair.create(TabletStatus.REPLICA_RELOCATING, TabletSchedCtx.Priority.LOW);
}
}

// 4. healthy replicas in cluster are not enough
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ private void checkTablets() {

db.readLock();
try {
int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
for (Table table : db.getTables()) {
if (!table.needSchedule()) {
continue;
Expand Down Expand Up @@ -239,7 +239,7 @@ private void checkTablets() {
partition.getVisibleVersion(),
partition.getVisibleVersionHash(),
olapTbl.getPartitionInfo().getReplicationNum(partition.getId()),
availableBackendsNum);
aliveBeIdsInCluster);

if (statusWithPrio.first == TabletStatus.HEALTHY) {
// Only set last status check time when status is healthy.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -799,11 +799,11 @@ public void finishCloneTask(CloneTask cloneTask, TFinishTaskRequest request)
throw new SchedException(Status.UNRECOVERABLE, "tablet does not exist");
}

int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partitionId);
Pair<TabletStatus, TabletSchedCtx.Priority> pair = tablet.getHealthStatusWithPriority(
infoService, db.getClusterName(), visibleVersion, visibleVersionHash, replicationNum,
availableBackendsNum);
aliveBeIdsInCluster);
if (pair.first == TabletStatus.HEALTHY) {
throw new SchedException(Status.FINISHED, "tablet is healthy");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,13 +495,13 @@ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask)
statusPair = Pair.create(st, Priority.HIGH);
tabletCtx.setColocateGroupBackendIds(backendsSet);
} else {
int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
statusPair = tablet.getHealthStatusWithPriority(
infoService, tabletCtx.getCluster(),
partition.getVisibleVersion(),
partition.getVisibleVersionHash(),
tbl.getPartitionInfo().getReplicationNum(partition.getId()),
availableBackendsNum);
aliveBeIdsInCluster);
}

if (tabletCtx.getType() == TabletSchedCtx.Type.BALANCE && tableState != OlapTableState.NORMAL) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

package org.apache.doris.common.proc;

import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multimap;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.MaterializedIndex;
Expand All @@ -34,19 +38,13 @@
import org.apache.doris.system.SystemInfoService;
import org.apache.doris.task.AgentTaskQueue;
import org.apache.doris.thrift.TTaskType;

import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multimap;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

public class StatisticProcDir implements ProcDirInterface {
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
.add("DbId").add("DbName").add("TableNum").add("PartitionNum")
Expand Down Expand Up @@ -108,7 +106,7 @@ public ProcResult fetchResult() throws AnalysisException {
}

++totalDbNum;
int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
db.readLock();
try {
int dbTableNum = 0;
Expand Down Expand Up @@ -137,7 +135,7 @@ public ProcResult fetchResult() throws AnalysisException {
Pair<TabletStatus, Priority> res = tablet.getHealthStatusWithPriority(
infoService, db.getClusterName(),
partition.getVisibleVersion(), partition.getVisibleVersionHash(),
replicationNum, availableBackendsNum);
replicationNum, aliveBeIdsInCluster);

// here we treat REDUNDANT as HEALTHY, for user friendly.
if (res.first != TabletStatus.HEALTHY && res.first != TabletStatus.REDUNDANT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1027,10 +1027,10 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon
return;
}

int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
Pair<TabletStatus, TabletSchedCtx.Priority> status = tablet.getHealthStatusWithPriority(infoService,
db.getClusterName(), visibleVersion, visibleVersionHash,
replicationNum, availableBackendsNum);
replicationNum, aliveBeIdsInCluster);

if (status.first == TabletStatus.VERSION_INCOMPLETE || status.first == TabletStatus.REPLICA_MISSING) {
long lastFailedVersion = -1L;
Expand Down