Commit bb0b2d2
[SPARK-51821][CORE] Call interrupt() without holding uninterruptibleLock to avoid possible deadlock
### What changes were proposed in this pull request?
Do not hold `uninterruptibleLock` monitor while calling `super.interrupt()` in `UninterruptibleThread`, instead use newly introduced `awaitInterruptThread` flag and wait for `super.interrupt()` to be called.
### Why are the changes needed?
There is potential deadlock as `UninterruptibleThread` may be blocked on NIO operation and interrupting channel while holding `uninterruptibleLock` monitor may cause deadlock like in
```
Found one Java-level deadlock:
=============================
"pool-1-thread-1-ScalaTest-running-UninterruptibleThreadSuite":
waiting to lock monitor 0x00006000036ee3c0 (object 0x000000070f3019d0, a java.lang.Object),
which is held by "task thread"
"task thread":
waiting to lock monitor 0x00006000036e75a0 (object 0x000000070f70fe80, a java.lang.Object),
which is held by "pool-1-thread-1-ScalaTest-running-UninterruptibleThreadSuite"
Java stack information for the threads listed above:
===================================================
"pool-1-thread-1-ScalaTest-running-UninterruptibleThreadSuite":
at java.nio.channels.spi.AbstractInterruptibleChannel$1.interrupt(java.base17.0.14/AbstractInterruptibleChannel.java:157)
- waiting to lock <0x000000070f3019d0> (a java.lang.Object)
at java.lang.Thread.interrupt(java.base17.0.14/Thread.java:1004)
- locked <0x000000070f70fc90> (a java.lang.Object)
at org.apache.spark.util.UninterruptibleThread.interrupt(UninterruptibleThread.scala:99)
- locked <0x000000070f70fe80> (a java.lang.Object)
at org.apache.spark.util.UninterruptibleThreadSuite.$anonfun$new$5(UninterruptibleThreadSuite.scala:159)
- locked <0x000000070f70f9f8> (a java.lang.Object)
at org.apache.spark.util.UninterruptibleThreadSuite$$Lambda$216/0x000000700120d6c8.apply$mcV$sp(Unknown Source)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at org.scalatest.enablers.Timed$$anon$1.timeoutAfter(Timed.scala:127)
at org.scalatest.concurrent.TimeLimits$.failAfterImpl(TimeLimits.scala:282)
at org.scalatest.concurrent.TimeLimits.failAfter(TimeLimits.scala:231)
at org.scalatest.concurrent.TimeLimits.failAfter$(TimeLimits.scala:230)
at org.apache.spark.SparkFunSuite.failAfter(SparkFunSuite.scala:69)
at org.apache.spark.SparkFunSuite.$anonfun$test$2(SparkFunSuite.scala:155)
at org.apache.spark.SparkFunSuite$$Lambda$205/0x0000007001207700.apply(Unknown Source)
at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226)
at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:227)
at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224)
at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236)
at org.scalatest.funsuite.AnyFunSuiteLike$$Lambda$343/0x00000070012867b0.apply(Unknown Source)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236)
at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218)
at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:69)
at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:69)
at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)
at org.scalatest.funsuite.AnyFunSuiteLike$$Lambda$339/0x00000070012833e0.apply(Unknown Source)
at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
at org.scalatest.SuperEngine$$Lambda$340/0x0000007001283998.apply(Unknown Source)
at scala.collection.immutable.List.foreach(List.scala:334)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)
at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)
at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)
at org.scalatest.Suite.run(Suite.scala:1114)
at org.scalatest.Suite.run$(Suite.scala:1096)
at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1564)
at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:273)
at org.scalatest.funsuite.AnyFunSuiteLike$$Lambda$332/0x000000700127b000.apply(Unknown Source)
at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:273)
at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:272)
at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:69)
at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:69)
at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:321)
at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:517)
at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:414)
at sbt.ForkMain$Run$$Lambda$107/0x0000007001110000.call(Unknown Source)
at java.util.concurrent.FutureTask.run(java.base17.0.14/FutureTask.java:264)
at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base17.0.14/ThreadPoolExecutor.java:1136)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base17.0.14/ThreadPoolExecutor.java:635)
at java.lang.Thread.run(java.base17.0.14/Thread.java:840)
"task thread":
at org.apache.spark.util.UninterruptibleThread.interrupt(UninterruptibleThread.scala:96)
- waiting to lock <0x000000070f70fe80> (a java.lang.Object)
at org.apache.spark.util.UninterruptibleThreadSuite$InterruptibleChannel.implCloseChannel(UninterruptibleThreadSuite.scala:143)
at java.nio.channels.spi.AbstractInterruptibleChannel.close(java.base17.0.14/AbstractInterruptibleChannel.java:112)
- locked <0x000000070f3019d0> (a java.lang.Object)
at org.apache.spark.util.UninterruptibleThreadSuite$InterruptibleChannel.<init>(UninterruptibleThreadSuite.scala:138)
at org.apache.spark.util.UninterruptibleThreadSuite$$anon$5.run(UninterruptibleThreadSuite.scala:153)
Found 1 deadlock.
```
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Added 2 new test cases to the `UninterruptibleThreadSuite`
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #50594 from vrozov/uninterruptible.
Authored-by: Vlad Rozov <vrozov@amazon.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>1 parent 9178c2c commit bb0b2d2
File tree
2 files changed
+149
-33
lines changed- core/src
- main/scala/org/apache/spark/util
- test/scala/org/apache/spark/util
2 files changed
+149
-33
lines changedLines changed: 95 additions & 30 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
35 | 35 | | |
36 | 36 | | |
37 | 37 | | |
38 | | - | |
39 | | - | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
40 | 45 | | |
41 | | - | |
42 | | - | |
43 | | - | |
44 | | - | |
45 | | - | |
46 | | - | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
47 | 51 | | |
48 | | - | |
49 | | - | |
50 | | - | |
51 | | - | |
52 | | - | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
| 75 | + | |
| 76 | + | |
| 77 | + | |
| 78 | + | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
| 91 | + | |
| 92 | + | |
| 93 | + | |
| 94 | + | |
| 95 | + | |
| 96 | + | |
| 97 | + | |
| 98 | + | |
| 99 | + | |
| 100 | + | |
| 101 | + | |
| 102 | + | |
| 103 | + | |
| 104 | + | |
| 105 | + | |
| 106 | + | |
| 107 | + | |
| 108 | + | |
| 109 | + | |
| 110 | + | |
| 111 | + | |
| 112 | + | |
| 113 | + | |
| 114 | + | |
| 115 | + | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
53 | 122 | | |
54 | 123 | | |
55 | 124 | | |
| |||
63 | 132 | | |
64 | 133 | | |
65 | 134 | | |
66 | | - | |
| 135 | + | |
67 | 136 | | |
68 | 137 | | |
69 | 138 | | |
70 | 139 | | |
71 | | - | |
72 | | - | |
73 | | - | |
74 | | - | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
75 | 146 | | |
| 147 | + | |
76 | 148 | | |
77 | 149 | | |
78 | 150 | | |
79 | | - | |
80 | | - | |
81 | | - | |
82 | | - | |
83 | | - | |
84 | | - | |
85 | | - | |
86 | | - | |
| 151 | + | |
87 | 152 | | |
88 | 153 | | |
89 | 154 | | |
| |||
92 | 157 | | |
93 | 158 | | |
94 | 159 | | |
95 | | - | |
96 | | - | |
97 | | - | |
98 | | - | |
| 160 | + | |
| 161 | + | |
99 | 162 | | |
| 163 | + | |
| 164 | + | |
100 | 165 | | |
101 | 166 | | |
102 | 167 | | |
| |||
Lines changed: 54 additions & 3 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
17 | 17 | | |
18 | 18 | | |
19 | 19 | | |
| 20 | + | |
20 | 21 | | |
21 | 22 | | |
22 | 23 | | |
| |||
115 | 116 | | |
116 | 117 | | |
117 | 118 | | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
| 122 | + | |
| 123 | + | |
| 124 | + | |
| 125 | + | |
| 126 | + | |
| 127 | + | |
| 128 | + | |
| 129 | + | |
| 130 | + | |
| 131 | + | |
| 132 | + | |
| 133 | + | |
| 134 | + | |
| 135 | + | |
| 136 | + | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
118 | 158 | | |
119 | 159 | | |
120 | 160 | | |
| |||
148 | 188 | | |
149 | 189 | | |
150 | 190 | | |
151 | | - | |
152 | | - | |
153 | | - | |
| 191 | + | |
| 192 | + | |
| 193 | + | |
| 194 | + | |
| 195 | + | |
| 196 | + | |
| 197 | + | |
| 198 | + | |
| 199 | + | |
| 200 | + | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
154 | 205 | | |
155 | 206 | | |
156 | 207 | | |
| |||
0 commit comments