Skip to content

Commit 8ff4e2e

Browse files
author
David Roberts
committed
Periodically try to reassign unassigned persistent tasks
Previously persistent task assignment was checked in the following situations: - Persistent tasks are changed - A node joins or leaves the cluster - The routing table is changed - Custom metadata in the cluster state is changed - A new master node is elected However, there could be situations when a persistent task that could not be assigned to a node could become assignable due to some other change, such as memory usage on the nodes. This change adds a timed recheck of persistent task assignment to account for such situations. The timer is suspended while checks triggered by cluster state changes are in-flight to avoid adding burden to an already busy cluster. Closes #35792
1 parent b6eb73a commit 8ff4e2e

File tree

6 files changed

+262
-30
lines changed

6 files changed

+262
-30
lines changed

docs/reference/modules/cluster/misc.asciidoc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,10 @@ Plugins can create a kind of tasks called persistent tasks. Those tasks are
130130
usually long-live tasks and are stored in the cluster state, allowing the
131131
tasks to be revived after a full cluster restart.
132132

133-
Every time a persistent task is created, the master nodes takes care of
133+
Every time a persistent task is created, the master node takes care of
134134
assigning the task to a node of the cluster, and the assigned node will then
135135
pick up the task and execute it locally. The process of assigning persistent
136-
tasks to nodes is controlled by the following property, which can be updated
136+
tasks to nodes is controlled by the following properties, which can be updated
137137
dynamically:
138138

139139
`cluster.persistent_tasks.allocation.enable`::
@@ -148,3 +148,13 @@ This setting does not affect the persistent tasks that are already being execute
148148
Only newly created persistent tasks, or tasks that must be reassigned (after a node
149149
left the cluster, for example), are impacted by this setting.
150150
--
151+
152+
`cluster.persistent_tasks.allocation.recheck_interval`::
153+
154+
The master node will automatically check whether persistent tasks need to
155+
be assigned when the cluster state changes significantly. However, there
156+
may be other factors, such as memory usage, that affect whether persistent
157+
tasks can be assigned to nodes but do not cause the cluster state to change.
158+
This setting controls how often assignment checks are performed to react to
159+
these factors. The default is 30 seconds. The minimum permitted value is 10
160+
seconds.

server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
import org.elasticsearch.monitor.os.OsService;
8181
import org.elasticsearch.monitor.process.ProcessService;
8282
import org.elasticsearch.node.Node;
83+
import org.elasticsearch.persistent.PersistentTasksClusterService;
8384
import org.elasticsearch.persistent.decider.EnableAssignmentDecider;
8485
import org.elasticsearch.plugins.PluginsService;
8586
import org.elasticsearch.repositories.fs.FsRepository;
@@ -443,6 +444,7 @@ public void apply(Settings value, Settings current, Settings previous) {
443444
Node.BREAKER_TYPE_KEY,
444445
OperationRouting.USE_ADAPTIVE_REPLICA_SELECTION_SETTING,
445446
IndexGraveyard.SETTING_MAX_TOMBSTONES,
447+
PersistentTasksClusterService.CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING,
446448
EnableAssignmentDecider.CLUSTER_TASKS_ALLOCATION_ENABLE_SETTING
447449
)));
448450

server/src/main/java/org/elasticsearch/node/Node.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ protected Node(
499499

500500
final PersistentTasksExecutorRegistry registry = new PersistentTasksExecutorRegistry(tasksExecutors);
501501
final PersistentTasksClusterService persistentTasksClusterService =
502-
new PersistentTasksClusterService(settings, registry, clusterService);
502+
new PersistentTasksClusterService(settings, registry, clusterService, threadPool);
503503
final PersistentTasksService persistentTasksService = new PersistentTasksService(clusterService, threadPool, client);
504504

505505
modules.add(b -> {

server/src/main/java/org/elasticsearch/persistent/PersistentTasksClusterService.java

Lines changed: 111 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,54 @@
3131
import org.elasticsearch.cluster.metadata.MetaData;
3232
import org.elasticsearch.cluster.node.DiscoveryNodes;
3333
import org.elasticsearch.cluster.service.ClusterService;
34+
import org.elasticsearch.common.settings.Setting;
3435
import org.elasticsearch.common.settings.Settings;
36+
import org.elasticsearch.common.unit.TimeValue;
37+
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
38+
import org.elasticsearch.common.util.concurrent.FutureUtils;
3539
import org.elasticsearch.persistent.PersistentTasksCustomMetaData.Assignment;
3640
import org.elasticsearch.persistent.PersistentTasksCustomMetaData.PersistentTask;
3741
import org.elasticsearch.persistent.decider.AssignmentDecision;
3842
import org.elasticsearch.persistent.decider.EnableAssignmentDecider;
43+
import org.elasticsearch.threadpool.ThreadPool;
3944

4045
import java.util.Objects;
46+
import java.util.concurrent.Future;
4147

4248
/**
4349
* Component that runs only on the master node and is responsible for assigning running tasks to nodes
4450
*/
4551
public class PersistentTasksClusterService implements ClusterStateListener {
4652

53+
public static final Setting<TimeValue> CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING =
54+
Setting.timeSetting("cluster.persistent_tasks.allocation.recheck_interval", TimeValue.timeValueSeconds(30),
55+
TimeValue.timeValueSeconds(10), Setting.Property.Dynamic, Setting.Property.NodeScope);
56+
4757
private static final Logger logger = LogManager.getLogger(PersistentTasksClusterService.class);
4858

4959
private final ClusterService clusterService;
5060
private final PersistentTasksExecutorRegistry registry;
5161
private final EnableAssignmentDecider decider;
62+
private final ThreadPool threadPool;
63+
private final PeriodicRechecker periodicRechecker;
64+
private volatile TimeValue recheckInterval;
5265

53-
public PersistentTasksClusterService(Settings settings, PersistentTasksExecutorRegistry registry, ClusterService clusterService) {
66+
public PersistentTasksClusterService(Settings settings, PersistentTasksExecutorRegistry registry, ClusterService clusterService,
67+
ThreadPool threadPool) {
5468
this.clusterService = clusterService;
5569
clusterService.addListener(this);
70+
clusterService.getClusterSettings().addSettingsUpdateConsumer(CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING,
71+
this::setRecheckInterval);
5672
this.registry = registry;
5773
this.decider = new EnableAssignmentDecider(settings, clusterService.getClusterSettings());
74+
this.recheckInterval = CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING.get(settings);
75+
this.threadPool = threadPool;
76+
this.periodicRechecker = new PeriodicRechecker();
77+
}
78+
79+
void setRecheckInterval(TimeValue recheckInterval) {
80+
this.recheckInterval = recheckInterval;
81+
periodicRechecker.rescheduleIfScheduled();
5882
}
5983

6084
/**
@@ -241,24 +265,36 @@ private <Params extends PersistentTaskParams> Assignment createAssignment(final
241265

242266
@Override
243267
public void clusterChanged(ClusterChangedEvent event) {
268+
periodicRechecker.cancel();
244269
if (event.localNodeMaster()) {
245270
if (shouldReassignPersistentTasks(event)) {
246-
logger.trace("checking task reassignment for cluster state {}", event.state().getVersion());
247-
clusterService.submitStateUpdateTask("reassign persistent tasks", new ClusterStateUpdateTask() {
248-
@Override
249-
public ClusterState execute(ClusterState currentState) {
250-
return reassignTasks(currentState);
251-
}
252-
253-
@Override
254-
public void onFailure(String source, Exception e) {
255-
logger.warn("failed to reassign persistent tasks", e);
256-
}
257-
});
271+
reassignPersistentTasks(event.state().getVersion());
272+
} else {
273+
periodicRechecker.schedule();
258274
}
259275
}
260276
}
261277

278+
/**
279+
* Submit a cluster state update to reassign any persistent tasks that need reassigning
280+
*/
281+
private void reassignPersistentTasks(long currentStateVersion) {
282+
logger.trace("checking task reassignment for cluster state {}", currentStateVersion);
283+
clusterService.submitStateUpdateTask("reassign persistent tasks", new ClusterStateUpdateTask() {
284+
@Override
285+
public ClusterState execute(ClusterState currentState) {
286+
ClusterState newState = reassignTasks(currentState);
287+
periodicRechecker.schedule();
288+
return newState;
289+
}
290+
291+
@Override
292+
public void onFailure(String source, Exception e) {
293+
logger.warn("failed to reassign persistent tasks", e);
294+
}
295+
});
296+
}
297+
262298
/**
263299
* Returns true if the cluster state change(s) require to reassign some persistent tasks. It can happen in the following
264300
* situations: a node left or is added, the routing table changed, the master node changed, the metadata changed or the
@@ -278,12 +314,21 @@ boolean shouldReassignPersistentTasks(final ClusterChangedEvent event) {
278314
|| event.metaDataChanged()
279315
|| masterChanged) {
280316

281-
for (PersistentTask<?> task : tasks.tasks()) {
282-
if (needsReassignment(task.getAssignment(), event.state().nodes())) {
283-
Assignment assignment = createAssignment(task.getTaskName(), task.getParams(), event.state());
284-
if (Objects.equals(assignment, task.getAssignment()) == false) {
285-
return true;
286-
}
317+
return anyTaskNeedsReassignment(tasks, event.state());
318+
}
319+
return false;
320+
}
321+
322+
/**
323+
* Returns true if any persistent task provided requires reassignment,
324+
* i.e. is not assigned or is assigned to a non-existing node.
325+
*/
326+
private boolean anyTaskNeedsReassignment(final PersistentTasksCustomMetaData tasks, final ClusterState state) {
327+
for (PersistentTask<?> task : tasks.tasks()) {
328+
if (needsReassignment(task.getAssignment(), state.nodes())) {
329+
Assignment assignment = createAssignment(task.getTaskName(), task.getParams(), state);
330+
if (Objects.equals(assignment, task.getAssignment()) == false) {
331+
return true;
287332
}
288333
}
289334
}
@@ -347,4 +392,51 @@ private static ClusterState update(ClusterState currentState, PersistentTasksCus
347392
return currentState;
348393
}
349394
}
395+
396+
/**
397+
* Class to periodically try to reassign unassigned persistent tasks.
398+
*/
399+
private class PeriodicRechecker implements Runnable {
400+
401+
private volatile Future<?> nextRun;
402+
403+
void schedule() {
404+
try {
405+
synchronized (this) {
406+
FutureUtils.cancel(nextRun);
407+
nextRun = threadPool.schedule(recheckInterval, ThreadPool.Names.GENERIC, this);
408+
}
409+
} catch (EsRejectedExecutionException e) {
410+
logger.debug("could not schedule periodic persistent task assignment check", e);
411+
}
412+
}
413+
414+
synchronized void cancel() {
415+
FutureUtils.cancel(nextRun);
416+
nextRun = null;
417+
}
418+
419+
synchronized void rescheduleIfScheduled() {
420+
if (nextRun != null) {
421+
schedule();
422+
}
423+
}
424+
425+
@Override
426+
public void run() {
427+
synchronized (this) {
428+
nextRun = null;
429+
}
430+
if (clusterService.localNode().isMasterNode()) {
431+
logger.trace("periodic persistent task assignment check running");
432+
ClusterState state = clusterService.state();
433+
final PersistentTasksCustomMetaData tasks = state.getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
434+
if (tasks != null && anyTaskNeedsReassignment(tasks, state)) {
435+
reassignPersistentTasks(state.getVersion());
436+
} else {
437+
schedule();
438+
}
439+
}
440+
}
441+
}
350442
}

0 commit comments

Comments
 (0)