-
Notifications
You must be signed in to change notification settings - Fork 99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tuning for Solr to improve indexing latency #529
Changes from all commits
fb30621
12db23c
3879321
8d766bf
7e5ecb4
4574e4b
4748453
93f98d5
ca5d87a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,8 +32,9 @@ object SolrConfig extends Configurable { | |
def socketTimeout = getInt("socketTimeout", 1000) | ||
def connectionTimeout = getInt("connectionTimeout", 5000) | ||
def maxTotalConnections = getInt("maxTotalConnections", 100) | ||
def commitWithin = getInt("commitWithinMs", 50) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any docs explaining what commitWithinMs is about? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, its discussed in bullet 3 in the description of the PR, but also here |
||
def defaultMaxConnectionsPerHost = getInt("defaultMaxConnectionsPerHost", 100) | ||
def assetBatchUpdateWindow = getInt("assetBatchUpdateWindowMs", 10) milliseconds | ||
def assetBatchUpdateWindow = getInt("assetBatchUpdateWindowMs", 30) milliseconds | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where did these magic numbers come from? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same place the first numbers came from (the ether). its discussed in the description of the PR why these numbers were chosen as a default |
||
|
||
override protected def validateConfig() { | ||
if (enabled) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,7 +102,7 @@ object SolrHelper { | |
logger.debug("Populating Asset Logs") | ||
val num = assets.map { asset => | ||
val logs = AssetLog.findByAsset(asset) | ||
updateAssetLogs(logs, indexTime, false) | ||
updateAssetLogs(logs, indexTime) | ||
logs.size | ||
}.sum | ||
_server.foreach { _.commit() } | ||
|
@@ -111,35 +111,30 @@ object SolrHelper { | |
}.getOrElse(logger.warn("attempted to populate solr when no server was initialized")) | ||
} | ||
|
||
def updateItems[T](items: Seq[T], serializer: SolrSerializer[T], indexTime: Date, commit: Boolean = true) { | ||
def updateItems[T](items: Seq[T], serializer: SolrSerializer[T], indexTime: Date) = { | ||
_server.map { server => | ||
val docs = items.map { item => prepForInsertion(serializer.serialize(item, indexTime)) } | ||
if (docs.size > 0) { | ||
val fuckingJava = new java.util.ArrayList[SolrInputDocument] | ||
docs.foreach { doc => fuckingJava.add(doc) } | ||
server.add(fuckingJava) | ||
if (commit) { | ||
server.commit() | ||
if (items.size == 1) { | ||
logger.debug(("Indexed %s: %s".format(serializer.docType.name.toLowerCase, items.head.toString))) | ||
} else { | ||
logger.info("Indexed %d %ss".format(docs.size, serializer.docType.name.toLowerCase)) | ||
} | ||
} | ||
server.add(fuckingJava, SolrConfig.commitWithin) | ||
logger.debug("Added %d %s documents to be indexed within %d ms".format( | ||
fuckingJava.size, | ||
serializer.docType.name.toLowerCase, | ||
SolrConfig.commitWithin)) | ||
// dont explicitly hard commit, let solr figure it out and make docs available | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are there any downsides to doing this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, because a hard commit isnt actually synchronous because this is happening via actor in background thread |
||
// to be searched ASAP. commit(boolean waitFlush, boolean waitSearcher, boolean softCommit) | ||
server.commit(false, false, true) | ||
} else { | ||
logger.warn("No items to index!") | ||
} | ||
} | ||
|
||
} | ||
|
||
def updateAssets(assets: Seq[Asset], indexTime: Date, commit: Boolean = true) { | ||
updateItems[Asset](assets, AssetSerializer, indexTime, commit) | ||
} | ||
def updateAssets(assets: Seq[Asset], indexTime: Date) = updateItems[Asset](assets, AssetSerializer, indexTime) | ||
|
||
def updateAssetLogs(logs: Seq[AssetLog], indexTime: Date, commit: Boolean = true) { | ||
updateItems[AssetLog](logs, AssetLogSerializer, indexTime, commit) | ||
} | ||
def updateAssetLogs(logs: Seq[AssetLog], indexTime: Date) = updateItems[AssetLog](logs, AssetLogSerializer, indexTime) | ||
|
||
def terminateSolr() { | ||
_server.foreach { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there an issue/ticket to fix this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#528
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc: @evanelias
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this could break things if we create a config asset and then immediately perform sets on it? I think jetpants may do that in some cases but can't recall by memory. What exactly would break / what are the implications / is this a new problem?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@evanelias this isn't new; it's been lurking since inception. Implications are that the tag set will fall (can't find asset to modify in solr index) until solr populates. Not cataclysmic, but definitely not a great consistency model to expose