Skip to content

Commit 5d0b2d3

Browse files
committed
Add task completion callback to avoid leak in limit after sort
1 parent ea250da commit 5d0b2d3

File tree

2 files changed

+15
-19
lines changed

2 files changed

+15
-19
lines changed

core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
import java.io.IOException;
2121
import java.util.LinkedList;
2222

23+
import scala.runtime.AbstractFunction0;
24+
import scala.runtime.BoxedUnit;
25+
2326
import com.google.common.annotations.VisibleForTesting;
2427
import org.slf4j.Logger;
2528
import org.slf4j.LoggerFactory;
@@ -90,6 +93,17 @@ public UnsafeExternalSorter(
9093
this.fileBufferSizeBytes = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024;
9194
this.pageSizeBytes = conf.getSizeAsBytes("spark.buffer.pageSize", "64m");
9295
initializeForWriting();
96+
97+
// Register a cleanup task with TaskContext to ensure that memory is guaranteed to be freed at
98+
// the end of the task. This is necessary to avoid memory leaks in when the downstream operator
99+
// does not fully consume the sorter's output (e.g. sort followed by limit).
100+
taskContext.addOnCompleteCallback(new AbstractFunction0<BoxedUnit>() {
101+
@Override
102+
public BoxedUnit apply() {
103+
freeMemory();
104+
return null;
105+
}
106+
});
93107
}
94108

95109
// TODO: metrics tracking + integration with shuffle write metrics

sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeExternalSortSuite.scala

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,7 @@ class UnsafeExternalSortSuite extends SparkPlanTest with BeforeAndAfterAll {
3636
TestSQLContext.conf.setConf(SQLConf.CODEGEN_ENABLED, SQLConf.CODEGEN_ENABLED.defaultValue.get)
3737
}
3838

39-
ignore("sort followed by limit should not leak memory") {
40-
// TODO: this test is going to fail until we implement a proper iterator interface
41-
// with a close() method.
42-
TestSQLContext.sparkContext.conf.set("spark.unsafe.exceptionOnMemoryLeak", "false")
39+
test("sort followed by limit") {
4340
checkThatPlansAgree(
4441
(1 to 100).map(v => Tuple1(v)).toDF("a"),
4542
(child: SparkPlan) => Limit(10, UnsafeExternalSort('a.asc :: Nil, true, child)),
@@ -48,21 +45,6 @@ class UnsafeExternalSortSuite extends SparkPlanTest with BeforeAndAfterAll {
4845
)
4946
}
5047

51-
test("sort followed by limit") {
52-
TestSQLContext.sparkContext.conf.set("spark.unsafe.exceptionOnMemoryLeak", "false")
53-
try {
54-
checkThatPlansAgree(
55-
(1 to 100).map(v => Tuple1(v)).toDF("a"),
56-
(child: SparkPlan) => Limit(10, UnsafeExternalSort('a.asc :: Nil, true, child)),
57-
(child: SparkPlan) => Limit(10, Sort('a.asc :: Nil, global = true, child)),
58-
sortAnswers = false
59-
)
60-
} finally {
61-
TestSQLContext.sparkContext.conf.set("spark.unsafe.exceptionOnMemoryLeak", "false")
62-
63-
}
64-
}
65-
6648
test("sorting does not crash for large inputs") {
6749
val sortOrder = 'a.asc :: Nil
6850
val stringLength = 1024 * 1024 * 2

0 commit comments

Comments
 (0)