Skip to content

Commit 192960d

Browse files
committed
code review
1 parent 8beed08 commit 192960d

File tree

4 files changed

+53
-26
lines changed

4 files changed

+53
-26
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,6 @@ trait DataSourceScanExec extends LeafExecNode {
7171
case (key, _) if (key.equals("DataFilters") || key.equals("Format")) => true
7272
case (_, _) => false
7373
}.map {
74-
case (key, value) if (key.equals("Location")) =>
75-
val abbreviatedLoaction = StringUtils.abbreviate(redact(value), 100)
76-
s"$key: $abbreviatedLoaction"
7774
case (key, value) => s"$key: ${redact(value)}"
7875
}
7976

@@ -87,7 +84,7 @@ trait DataSourceScanExec extends LeafExecNode {
8784
/**
8885
* Shorthand for calling redactString() without specifying redacting rules
8986
*/
90-
private def redact(text: String): String = {
87+
protected def redact(text: String): String = {
9188
Utils.redact(sqlContext.sessionState.conf.stringRedactionPattern, text)
9289
}
9390

@@ -361,6 +358,31 @@ case class FileSourceScanExec(
361358
withSelectedBucketsCount
362359
}
363360

361+
override def verboseStringWithOperatorId(): String = {
362+
val metadataStr = metadata.toSeq.sorted.filterNot {
363+
case (_, value) if (value.isEmpty || value.equals("[]")) => true
364+
case (key, _) if (key.equals("DataFilters") || key.equals("Format")) => true
365+
case (_, _) => false
366+
}.map {
367+
case (key, _) if (key.equals("Location")) =>
368+
val location = relation.location
369+
val numPaths = location.rootPaths.length
370+
val abbreviatedLoaction = if (numPaths <= 1) {
371+
location.rootPaths.mkString("[", ", ", "]")
372+
} else {
373+
"[" + location.rootPaths.head + s", ... ${numPaths - 1} entries]"
374+
}
375+
s"$key: ${location.getClass.getSimpleName} ${redact(abbreviatedLoaction)}"
376+
case (key, value) => s"$key: ${redact(value)}"
377+
}
378+
379+
s"""
380+
|(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
381+
|Output: ${producedAttributes.mkString("[", ", ", "]")}
382+
|${metadataStr.mkString("\n")}
383+
""".stripMargin
384+
}
385+
364386
lazy val inputRDD: RDD[InternalRow] = {
365387
val readFile: (PartitionedFile) => Iterator[InternalRow] =
366388
relation.fileFormat.buildReaderWithPartitionValues(

sql/core/src/test/resources/sql-tests/results/explain.sql.out

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ struct<plan:string>
5959
(1) Scan parquet default.explain_temp1
6060
Output: [key#x, val#x]
6161
Batched: true
62-
Location [not included in comparison]
62+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
6363
PushedFilters: [IsNotNull(key), GreaterThan(key,0)]
6464
ReadSchema: struct<key:int,val:int>
6565

@@ -115,7 +115,7 @@ struct<plan:string>
115115
(1) Scan parquet default.explain_temp1
116116
Output: [key#x, val#x]
117117
Batched: true
118-
Location [not included in comparison]
118+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
119119
PushedFilters: [IsNotNull(key), GreaterThan(key,0)]
120120
ReadSchema: struct<key:int,val:int>
121121

@@ -174,7 +174,7 @@ struct<plan:string>
174174
(1) Scan parquet default.explain_temp1
175175
Output: [key#x, val#x]
176176
Batched: true
177-
Location [not included in comparison]
177+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
178178
PushedFilters: [IsNotNull(key), GreaterThan(key,0)]
179179
ReadSchema: struct<key:int,val:int>
180180

@@ -192,7 +192,7 @@ Input : [key#x, val#x]
192192
(5) Scan parquet default.explain_temp1
193193
Output: [key#x, val#x]
194194
Batched: true
195-
Location [not included in comparison]
195+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
196196
PushedFilters: [IsNotNull(key), GreaterThan(key,0)]
197197
ReadSchema: struct<key:int,val:int>
198198

@@ -244,7 +244,7 @@ struct<plan:string>
244244
(1) Scan parquet default.explain_temp1
245245
Output: [key#x, val#x]
246246
Batched: true
247-
Location [not included in comparison]
247+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
248248
PushedFilters: [IsNotNull(key)]
249249
ReadSchema: struct<key:int,val:int>
250250

@@ -262,7 +262,7 @@ Input : [key#x, val#x]
262262
(5) Scan parquet default.explain_temp2
263263
Output: [key#x, val#x]
264264
Batched: true
265-
Location [not included in comparison]
265+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp2]
266266
PushedFilters: [IsNotNull(key)]
267267
ReadSchema: struct<key:int,val:int>
268268

@@ -309,7 +309,7 @@ struct<plan:string>
309309
(1) Scan parquet default.explain_temp1
310310
Output: [key#x, val#x]
311311
Batched: true
312-
Location [not included in comparison]
312+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
313313
ReadSchema: struct<key:int,val:int>
314314

315315
(2) ColumnarToRow [codegen id : 2]
@@ -318,7 +318,7 @@ Input: [key#x, val#x]
318318
(3) Scan parquet default.explain_temp2
319319
Output: [key#x, val#x]
320320
Batched: true
321-
Location [not included in comparison]
321+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp2]
322322
PushedFilters: [IsNotNull(key)]
323323
ReadSchema: struct<key:int,val:int>
324324

@@ -366,7 +366,7 @@ struct<plan:string>
366366
(1) Scan parquet default.explain_temp1
367367
Output: [key#x, val#x]
368368
Batched: true
369-
Location [not included in comparison]
369+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
370370
PushedFilters: [IsNotNull(key), IsNotNull(val), GreaterThan(val,3)]
371371
ReadSchema: struct<key:int,val:int>
372372

@@ -396,7 +396,7 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
396396
(5) Scan parquet default.explain_temp2
397397
Output: [key#x, val#x]
398398
Batched: true
399-
Location [not included in comparison]
399+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp2]
400400
PushedFilters: [IsNotNull(key), IsNotNull(val), EqualTo(val,2)]
401401
ReadSchema: struct<key:int,val:int>
402402

@@ -433,7 +433,7 @@ Subquery:2 Hosting operator id = 7 Hosting Expression = Subquery scalar-subquery
433433
(12) Scan parquet default.explain_temp3
434434
Output: [key#x, val#x]
435435
Batched: true
436-
Location [not included in comparison]
436+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp3]
437437
PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
438438
ReadSchema: struct<key:int,val:int>
439439

@@ -481,7 +481,7 @@ struct<plan:string>
481481
(1) Scan parquet default.explain_temp1
482482
Output: [key#x, val#x]
483483
Batched: true
484-
Location [not included in comparison]
484+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
485485
ReadSchema: struct<key:int,val:int>
486486

487487
(2) ColumnarToRow [codegen id : 1]
@@ -506,7 +506,7 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
506506
(4) Scan parquet default.explain_temp2
507507
Output: [key#x, val#x]
508508
Batched: true
509-
Location [not included in comparison]
509+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp2]
510510
PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
511511
ReadSchema: struct<key:int,val:int>
512512

@@ -543,7 +543,7 @@ Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
543543
(11) Scan parquet default.explain_temp3
544544
Output: [key#x, val#x]
545545
Batched: true
546-
Location [not included in comparison]
546+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp3]
547547
PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
548548
ReadSchema: struct<key:int,val:int>
549549

@@ -584,7 +584,7 @@ struct<plan:string>
584584
(1) Scan parquet default.explain_temp1
585585
Output: []
586586
Batched: true
587-
Location [not included in comparison]
587+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
588588
ReadSchema: struct<>
589589

590590
(2) ColumnarToRow [codegen id : 1]
@@ -607,7 +607,7 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
607607
(4) Scan parquet default.explain_temp1
608608
Output: [key#x]
609609
Batched: true
610-
Location [not included in comparison]
610+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
611611
ReadSchema: struct<key:int>
612612

613613
(5) ColumnarToRow [codegen id : 1]
@@ -652,7 +652,7 @@ struct<plan:string>
652652
(1) Scan parquet default.explain_temp1
653653
Output: [key#x, val#x]
654654
Batched: true
655-
Location [not included in comparison]
655+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
656656
PushedFilters: [IsNotNull(key), GreaterThan(key,10)]
657657
ReadSchema: struct<key:int,val:int>
658658

@@ -670,7 +670,7 @@ Input : [key#x, val#x]
670670
(5) Scan parquet default.explain_temp1
671671
Output: [key#x, val#x]
672672
Batched: true
673-
Location [not included in comparison]
673+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
674674
PushedFilters: [IsNotNull(key), GreaterThan(key,10)]
675675
ReadSchema: struct<key:int,val:int>
676676

@@ -723,7 +723,7 @@ struct<plan:string>
723723
(1) Scan parquet default.explain_temp1
724724
Output: [key#x, val#x]
725725
Batched: true
726-
Location [not included in comparison]
726+
Location [not included in comparison]sql/core/spark-warehouse/explain_temp1]
727727
PushedFilters: [IsNotNull(key), GreaterThan(key,10)]
728728
ReadSchema: struct<key:int,val:int>
729729

sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,17 @@ class ExplainSuite extends QueryTest with SharedSparkSession {
246246
val expected_pattern2 =
247247
"PartitionFilters: \\[isnotnull\\(k#xL\\), dynamicpruningexpression\\(k#xL " +
248248
"IN subquery#x\\)\\]"
249+
val expected_pattern3 =
250+
"Location: PrunedInMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" +
251+
"/df2/.*, ... 99 entries\\]"
252+
val expected_pattern4 =
253+
"Location: PrunedInMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" +
254+
"/df1/.*, ... 999 entries\\]"
249255
withNormalizedExplain(sqlText) { normalizedOutput =>
250256
assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1)
251257
assert(expected_pattern2.r.findAllMatchIn(normalizedOutput).length == 1)
258+
assert(expected_pattern3.r.findAllMatchIn(normalizedOutput).length == 2)
259+
assert(expected_pattern4.r.findAllMatchIn(normalizedOutput).length == 1)
252260
}
253261
}
254262
}

sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -436,9 +436,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
436436
.replaceAll(
437437
s"Location.*/sql/core/spark-warehouse/$clsName/",
438438
s"Location ${notIncludedMsg}sql/core/spark-warehouse/")
439-
.replaceAll(
440-
s"Location.*\\.\\.\\.",
441-
s"Location ${notIncludedMsg}")
442439
.replaceAll("Created By.*", s"Created By $notIncludedMsg")
443440
.replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
444441
.replaceAll("Last Access.*", s"Last Access $notIncludedMsg")

0 commit comments

Comments
 (0)