-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Skip shard refreshes if shard is search idle
#27500
Changes from 3 commits
561546c
a50cd19
6e6edf2
67eea42
52fc286
47dd97f
046c6be
69583a0
c0e4176
ca92314
b2f77a4
20f5c21
9a18fd2
bdf9a7e
d8266d3
e99a136
9b84786
dc52ff5
88a8ec0
1e1d739
dd97797
cba052b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
import org.elasticsearch.common.Nullable; | ||
import org.elasticsearch.common.logging.LoggerMessageFormat; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.util.concurrent.AbstractRunnable; | ||
import org.elasticsearch.index.shard.ShardId; | ||
import org.elasticsearch.threadpool.ThreadPool; | ||
import org.elasticsearch.transport.TransportChannel; | ||
|
@@ -47,6 +48,8 @@ | |
import org.elasticsearch.transport.TransportService; | ||
|
||
import java.io.IOException; | ||
import java.io.UncheckedIOException; | ||
import java.util.concurrent.Executor; | ||
import java.util.function.Supplier; | ||
|
||
import static org.elasticsearch.action.support.TransportActions.isShardNotAvailableException; | ||
|
@@ -78,7 +81,7 @@ protected TransportSingleShardAction(Settings settings, String actionName, Threa | |
if (!isSubAction()) { | ||
transportService.registerRequestHandler(actionName, request, ThreadPool.Names.SAME, new TransportHandler()); | ||
} | ||
transportService.registerRequestHandler(transportShardAction, request, executor, new ShardTransportHandler()); | ||
transportService.registerRequestHandler(transportShardAction, request, ThreadPool.Names.SAME, new ShardTransportHandler()); | ||
} | ||
|
||
/** | ||
|
@@ -97,6 +100,19 @@ protected void doExecute(Request request, ActionListener<Response> listener) { | |
|
||
protected abstract Response shardOperation(Request request, ShardId shardId) throws IOException; | ||
|
||
protected void shardOperation(Request request, ShardId shardId, ActionListener<Response> listener) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe call this asyncShardOperation to avoid confusion? also, can you please java doc the fact that this is still called on the networking thread? |
||
threadPool.executor(this.executor).execute(new AbstractRunnable() { | ||
@Override | ||
public void onFailure(Exception e) { | ||
listener.onFailure(e); | ||
} | ||
|
||
@Override | ||
protected void doRun() throws Exception { | ||
listener.onResponse(shardOperation(request, shardId)); | ||
} | ||
}); | ||
} | ||
protected abstract Response newResponse(); | ||
|
||
protected abstract boolean resolveIndex(Request request); | ||
|
@@ -291,11 +307,27 @@ public void messageReceived(final Request request, final TransportChannel channe | |
if (logger.isTraceEnabled()) { | ||
logger.trace("executing [{}] on shard [{}]", request, request.internalShardId); | ||
} | ||
Response response = shardOperation(request, request.internalShardId); | ||
channel.sendResponse(response); | ||
shardOperation(request, request.internalShardId, new ActionListener<Response>() { | ||
@Override | ||
public void onResponse(Response response) { | ||
try { | ||
channel.sendResponse(response); | ||
} catch (IOException e) { | ||
onFailure(e); | ||
} | ||
} | ||
|
||
@Override | ||
public void onFailure(Exception e) { | ||
try { | ||
channel.sendResponse(e); | ||
} catch (IOException e1) { | ||
throw new UncheckedIOException(e1); | ||
} | ||
} | ||
}); | ||
} | ||
} | ||
|
||
/** | ||
* Internal request class that gets built on each node. Holds the original request plus additional info. | ||
*/ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
|
||
package org.elasticsearch.action.termvectors; | ||
|
||
import org.elasticsearch.action.ActionListener; | ||
import org.elasticsearch.action.RoutingMissingException; | ||
import org.elasticsearch.action.support.ActionFilters; | ||
import org.elasticsearch.action.support.single.shard.TransportSingleShardAction; | ||
|
@@ -37,6 +38,8 @@ | |
import org.elasticsearch.threadpool.ThreadPool; | ||
import org.elasticsearch.transport.TransportService; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* Performs the get operation. | ||
*/ | ||
|
@@ -82,6 +85,19 @@ protected void resolveRequest(ClusterState state, InternalRequest request) { | |
} | ||
} | ||
|
||
@Override | ||
protected void shardOperation(TermVectorsRequest request, ShardId shardId, ActionListener<TermVectorsResponse> listener) throws IOException { | ||
IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex()); | ||
IndexShard indexShard = indexService.getShard(shardId.id()); | ||
indexShard.awaitPendingRefresh(b -> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment here w.r.t real time requests. |
||
try { | ||
super.shardOperation(request, shardId, listener); | ||
} catch (IOException ex) { | ||
listener.onFailure(ex); | ||
} | ||
}); | ||
} | ||
|
||
@Override | ||
protected TermVectorsResponse shardOperation(TermVectorsRequest request, ShardId shardId) { | ||
IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -234,6 +234,9 @@ Runnable getGlobalCheckpointSyncer() { | |
*/ | ||
private final RefreshListeners refreshListeners; | ||
|
||
private final AtomicLong lastSearcherAccess = new AtomicLong(); | ||
private final AtomicReference<Translog.Location> pendingRefreshLocation = new AtomicReference<>(); | ||
|
||
public IndexShard( | ||
ShardRouting shardRouting, | ||
IndexSettings indexSettings, | ||
|
@@ -298,6 +301,7 @@ public IndexShard( | |
searcherWrapper = indexSearcherWrapper; | ||
primaryTerm = indexSettings.getIndexMetaData().primaryTerm(shardId.id()); | ||
refreshListeners = buildRefreshListeners(); | ||
lastSearcherAccess.set(threadPool.relativeTimeInMillis()); | ||
persistMetadata(path, indexSettings, shardRouting, null, logger); | ||
} | ||
|
||
|
@@ -1120,6 +1124,7 @@ public Engine.Searcher acquireSearcher(String source) { | |
|
||
private Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scope) { | ||
readAllowed(); | ||
lastSearcherAccess.lazySet(threadPool.relativeTimeInMillis()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I spent some cycles reading this and convincing my self that we can do this regardless of the searcher scope - i.e., that if we end up here with an internal searcher it will be OK (it is!). I would personally think it will be simpler to follow if we mark access in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have to do this in here otherwise we can't make a decision if a shard is idle or not. sorry your comment doesn't make sense with respect to the engine. the engine is not involved here |
||
final Engine engine = getEngine(); | ||
final Engine.Searcher searcher = engine.acquireSearcher(source, scope); | ||
boolean success = false; | ||
|
@@ -2418,15 +2423,69 @@ EngineFactory getEngineFactory() { | |
return engineFactory; | ||
} | ||
|
||
/** | ||
* Executes a scheduled refresh if necessary | ||
*/ | ||
public boolean scheduledRefresh() { | ||
if (isReadAllowed() && isRefreshNeeded()) { | ||
if (refreshListeners.refreshNeeded() == false // if we have a listener that is waiting for a refresh we need to force it | ||
&& isSearchIdle() && indexSettings.isExplicitRefresh() == false) { | ||
// lets skip this refresh since we are search idle and | ||
// don't necessarily need to refresh. the next search execute cause a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: truncated comment |
||
setRefreshPending(); | ||
return false; | ||
} else { | ||
refresh("schedule"); | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
/** | ||
* Returns <code>true</code> iff one or more changes to the engine are not visible to via the current searcher *or* there are pending | ||
* refresh listeners. | ||
* Otherwise <code>false</code>. | ||
* | ||
* @throws AlreadyClosedException if the engine or internal indexwriter in the engine is already closed | ||
*/ | ||
public boolean isRefreshNeeded() { | ||
return getEngine().refreshNeeded() || (refreshListeners != null && refreshListeners.refreshNeeded()); | ||
final boolean isRefreshNeeded() { | ||
return refreshListeners.refreshNeeded() // lets check the cheaper one first | ||
|| getEngine().refreshNeeded(); | ||
} | ||
|
||
/** | ||
* Returns true if this shards is search idle | ||
*/ | ||
final boolean isSearchIdle() { | ||
return (threadPool.relativeTimeInMillis() - lastSearcherAccess.get()) >= indexSettings.getSearchIdleAfter().getMillis(); | ||
} | ||
|
||
private void setRefreshPending() { | ||
Engine engine = getEngine(); | ||
if (isSearchIdle()) { | ||
acquireSearcher("setRefreshPending").close(); // move the shard into non-search idle | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's a bug I removed it and added a test |
||
} | ||
Translog.Location lastWriteLocation = engine.getTranslog().getLastWriteLocation(); | ||
Translog.Location location; | ||
do { | ||
location = this.pendingRefreshLocation.get(); | ||
if (location != null && lastWriteLocation.compareTo(location) <= 0) { | ||
break; | ||
} | ||
} while (pendingRefreshLocation.compareAndSet(location, lastWriteLocation) == false); | ||
} | ||
|
||
public void awaitPendingRefresh(Consumer<Boolean> listener) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add javadocs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: maybe call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I disagree actually, we are waiting for the pending refresh so I think it's a good name? |
||
final Translog.Location location = pendingRefreshLocation.get(); | ||
if (location != null) { | ||
addRefreshListener(location, (b) -> { | ||
pendingRefreshLocation.compareAndSet(location, null); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we move this part to the setRefreshPending method? this will come at the expense of a dedicated listener but all the code that changes pendingRefreshLocation will be in one place making it easier figure out. |
||
listener.accept(true); | ||
}); | ||
} else { | ||
listener.accept(false); | ||
} | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm on the fence as to whether we should only do this on non-realtime get. Real time gets don't really relate to refresh cycles (they force a refresh if needed). They are already "efficient" in the sense that they only refresh if they need to (i.e., there's a pending doc change in the version map).