-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18700][SQL] Add StripedLock for each table's relation in cache #16135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8718ec3
a1d9a3c
95aabb8
82cf00e
276656e
16c47c5
5beccaa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| package org.apache.spark.sql.hive | ||
|
|
||
| import java.io.File | ||
| import java.util.concurrent.{Executors, TimeUnit} | ||
|
|
||
| import org.scalatest.BeforeAndAfterEach | ||
|
|
||
|
|
@@ -352,4 +353,34 @@ class PartitionedTablePerfStatsSuite | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-18700: table loaded only once even when resolved concurrently") { | ||
| withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") { | ||
| withTable("test") { | ||
| withTempDir { dir => | ||
| HiveCatalogMetrics.reset() | ||
| setupPartitionedHiveTable("test", dir, 50) | ||
| // select the table in multi-threads | ||
| val executorPool = Executors.newFixedThreadPool(10) | ||
| (1 to 10).map(threadId => { | ||
| val runnable = new Runnable { | ||
| override def run(): Unit = { | ||
| spark.sql("select * from test where partCol1 = 999").count() | ||
| } | ||
| } | ||
| executorPool.execute(runnable) | ||
| None | ||
| }) | ||
| executorPool.shutdown() | ||
| executorPool.awaitTermination(30, TimeUnit.SECONDS) | ||
| // check the cache hit, we use the metric of METRIC_FILES_DISCOVERED and | ||
| // METRIC_PARALLEL_LISTING_JOB_COUNT to check this, while the lock take effect, | ||
| // only one thread can really do the build, so the listing job count is 2, the other | ||
| // one is cache.load func. Also METRIC_FILES_DISCOVERED is $partition_num * 2 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is wrong. The extra counts are from the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Working on a fix to avoid the useless filesystem scan caused by the save() API.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gatorsmile Xiao fixed this in #16481 |
||
| assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 100) | ||
| assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 2) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: "These locks guard against multiple attempts to instantiate a table, which wastes memory."
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fix done