Skip to content

Commit

Permalink
Fix opensearch-project#7972 SearchBackpressureIT flaky tests (opensea…
Browse files Browse the repository at this point in the history
…rch-project#8063)

* fix thread issue

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* fix thread issue

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* Fix thresholds

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* Swap to object based

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* Spotless

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* Swap to preserve nulls

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* Spotless

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* Resolve npe

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* remove final declerations

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* spotless

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* add annotations

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* push to rerun tests

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* Fix idea

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* Fix idea

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

---------

Signed-off-by: Stephen Crawford <steecraw@amazon.com>
Signed-off-by: Rishab Nahata <rnnahata@amazon.com>
  • Loading branch information
stephen-crawford authored and imRishN committed Jun 27, 2023
1 parent 77a3d4e commit 14b28b2
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public void testSearchShardTaskCancellationWithHighElapsedTime() throws Interrup
public void testSearchTaskCancellationWithHighCpu() throws InterruptedException {
Settings request = Settings.builder()
.put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced")
.put(SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 50)
.put(SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 1000)
.build();
assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get());

Expand Down Expand Up @@ -182,7 +182,7 @@ public void testSearchTaskCancellationWithHighCpu() throws InterruptedException
public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedException {
Settings request = Settings.builder()
.put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced")
.put(SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 50)
.put(SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 1000)
.build();
assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get());

Expand Down
85 changes: 48 additions & 37 deletions server/src/main/java/org/opensearch/tasks/CancellableTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@
package org.opensearch.tasks;

import org.opensearch.common.Nullable;
import org.opensearch.common.SetOnce;
import org.opensearch.common.unit.TimeValue;

import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;

import static org.opensearch.search.SearchService.NO_TIMEOUT;

Expand All @@ -47,17 +47,26 @@
*/
public abstract class CancellableTask extends Task {

private volatile String reason;
private final AtomicBoolean cancelled = new AtomicBoolean(false);
private static class CancelledInfo {
String reason;
/**
* The time this task was cancelled as a wall clock time since epoch ({@link System#currentTimeMillis()} style).
*/
Long cancellationStartTime;
/**
* The time this task was cancelled as a relative time ({@link System#nanoTime()} style).
*/
Long cancellationStartTimeNanos;

public CancelledInfo(String reason) {
this.reason = reason;
this.cancellationStartTime = System.currentTimeMillis();
this.cancellationStartTimeNanos = System.nanoTime();
}
}

private final SetOnce<CancelledInfo> cancelledInfo = new SetOnce<>();
private final TimeValue cancelAfterTimeInterval;
/**
* The time this task was cancelled as a wall clock time since epoch ({@link System#currentTimeMillis()} style).
*/
private Long cancellationStartTime = null;
/**
* The time this task was cancelled as a relative time ({@link System#nanoTime()} style).
*/
private Long cancellationStartTimeNanos = null;

public CancellableTask(long id, String type, String action, String description, TaskId parentTaskId, Map<String, String> headers) {
this(id, type, action, description, parentTaskId, headers, NO_TIMEOUT);
Expand All @@ -81,14 +90,29 @@ public CancellableTask(
*/
public void cancel(String reason) {
assert reason != null;
if (cancelled.compareAndSet(false, true)) {
this.cancellationStartTime = System.currentTimeMillis();
this.cancellationStartTimeNanos = System.nanoTime();
this.reason = reason;
if (cancelledInfo.trySet(new CancelledInfo(reason))) {
onCancelled();
}
}

public boolean isCancelled() {
return cancelledInfo.get() != null;
}

/**
* Returns true if this task can potentially have children that need to be cancelled when it parent is cancelled.
*/
public abstract boolean shouldCancelChildrenOnCancellation();

public TimeValue getCancellationTimeout() {
return cancelAfterTimeInterval;
}

/**
* Called after the task is cancelled so that it can take any actions that it has to take.
*/
protected void onCancelled() {}

/**
* Returns true if this task should be automatically cancelled if the coordinating node that
* requested this task left the cluster.
Expand All @@ -97,37 +121,24 @@ public boolean cancelOnParentLeaving() {
return true;
}

@Nullable
public Long getCancellationStartTime() {
return cancellationStartTime;
CancelledInfo info = cancelledInfo.get();
return (info != null) ? info.cancellationStartTime : null;
}

@Nullable
public Long getCancellationStartTimeNanos() {
return cancellationStartTimeNanos;
}

/**
* Returns true if this task can potentially have children that need to be cancelled when it parent is cancelled.
*/
public abstract boolean shouldCancelChildrenOnCancellation();

public boolean isCancelled() {
return cancelled.get();
}

public TimeValue getCancellationTimeout() {
return cancelAfterTimeInterval;
CancelledInfo info = cancelledInfo.get();
return (info != null) ? info.cancellationStartTimeNanos : null;
}

/**
* The reason the task was cancelled or null if it hasn't been cancelled.
*/
@Nullable
public final String getReasonCancelled() {
return reason;
public String getReasonCancelled() {
CancelledInfo info = cancelledInfo.get();
return (info != null) ? info.reason : null;
}

/**
* Called after the task is cancelled so that it can take any actions that it has to take.
*/
protected void onCancelled() {}
}

0 comments on commit 14b28b2

Please sign in to comment.