Skip to content

Commit fa225da

Browse files
maropugatorsmile
authored andcommitted
[SPARK-22176][SQL] Fix overflow issue in Dataset.show
## What changes were proposed in this pull request? This pr fixed an overflow issue below in `Dataset.show`: ``` scala> Seq((1, 2), (3, 4)).toDF("a", "b").show(Int.MaxValue) org.apache.spark.sql.AnalysisException: The limit expression must be equal to or greater than 0, but got -2147483648;; GlobalLimit -2147483648 +- LocalLimit -2147483648 +- Project [_1#27218 AS a#27221, _2#27219 AS b#27222] +- LocalRelation [_1#27218, _2#27219] at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:41) at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:89) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$$checkLimitClause(CheckAnalysis.scala:70) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:234) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:80) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127) ``` ## How was this patch tested? Added tests in `DataFrameSuite`. Author: Takeshi Yamamuro <yamamuro@apache.org> Closes #19401 from maropu/MaxValueInShowString.
1 parent 4329eb2 commit fa225da

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class Dataset[T] private[sql](
237237
*/
238238
private[sql] def showString(
239239
_numRows: Int, truncate: Int = 20, vertical: Boolean = false): String = {
240-
val numRows = _numRows.max(0)
240+
val numRows = _numRows.max(0).min(Int.MaxValue - 1)
241241
val takeResult = toDF().take(numRows + 1)
242242
val hasMoreData = takeResult.length > numRows
243243
val data = takeResult.take(numRows)

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,18 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
10451045
assert(testData.select($"*").showString(0) === expectedAnswer)
10461046
}
10471047

1048+
test("showString(Int.MaxValue)") {
1049+
val df = Seq((1, 2), (3, 4)).toDF("a", "b")
1050+
val expectedAnswer = """+---+---+
1051+
|| a| b|
1052+
|+---+---+
1053+
|| 1| 2|
1054+
|| 3| 4|
1055+
|+---+---+
1056+
|""".stripMargin
1057+
assert(df.showString(Int.MaxValue) === expectedAnswer)
1058+
}
1059+
10481060
test("showString(0), vertical = true") {
10491061
val expectedAnswer = "(0 rows)\n"
10501062
assert(testData.select($"*").showString(0, vertical = true) === expectedAnswer)

0 commit comments

Comments
 (0)