Skip to content

Commit ec5d698

Browse files
dilipbiswalcloud-fan
authored andcommitted
[SPARK-29092][SQL] Report additional information about DataSourceScanExec in EXPLAIN FORMATTED
# What changes were proposed in this pull request? Currently we report only output attributes of a scan while doing EXPLAIN FORMATTED. This PR implements the ```verboseStringWithOperatorId``` in DataSourceScanExec to report additional information about a scan such as pushed down filters, partition filters, location etc. **SQL** ``` EXPLAIN FORMATTED SELECT key, max(val) FROM explain_temp1 WHERE key > 0 GROUP BY key ORDER BY key ``` **Before** ``` == Physical Plan == * Sort (9) +- Exchange (8) +- * HashAggregate (7) +- Exchange (6) +- * HashAggregate (5) +- * Project (4) +- * Filter (3) +- * ColumnarToRow (2) +- Scan parquet default.explain_temp1 (1) (1) Scan parquet default.explain_temp1 Output: [key#x, val#x] .... .... .... ``` **After** ``` == Physical Plan == * Sort (9) +- Exchange (8) +- * HashAggregate (7) +- Exchange (6) +- * HashAggregate (5) +- * Project (4) +- * Filter (3) +- * ColumnarToRow (2) +- Scan parquet default.explain_temp1 (1) (1) Scan parquet default.explain_temp1 Output: [key#x, val#x] Batched: true DataFilters: [isnotnull(key#x), (key#x > 0)] Format: Parquet Location: InMemoryFileIndex[file:/tmp/apache/spark/spark-warehouse/explain_temp1] PushedFilters: [IsNotNull(key), GreaterThan(key,0)] ReadSchema: struct<key:int,val:int> ... ... ... ``` ### Why are the changes needed? ### Does this PR introduce any user-facing change? ### How was this patch tested? Closes #26042 from dilipbiswal/verbose_string_datasrc_scanexec. Authored-by: Dilip Biswal <dkbiswal@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 8616109 commit ec5d698

File tree

9 files changed

+162
-36
lines changed

9 files changed

+162
-36
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,26 @@ trait DataSourceScanExec extends LeafExecNode {
6565
s"$nodeNamePrefix$nodeName${truncatedString(output, "[", ",", "]", maxFields)}$metadataStr")
6666
}
6767

68+
override def verboseStringWithOperatorId(): String = {
69+
val metadataStr = metadata.toSeq.sorted.filterNot {
70+
case (_, value) if (value.isEmpty || value.equals("[]")) => true
71+
case (key, _) if (key.equals("DataFilters") || key.equals("Format")) => true
72+
case (_, _) => false
73+
}.map {
74+
case (key, value) => s"$key: ${redact(value)}"
75+
}
76+
77+
s"""
78+
|(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
79+
|Output: ${producedAttributes.mkString("[", ", ", "]")}
80+
|${metadataStr.mkString("\n")}
81+
""".stripMargin
82+
}
83+
6884
/**
6985
* Shorthand for calling redactString() without specifying redacting rules
7086
*/
71-
private def redact(text: String): String = {
87+
protected def redact(text: String): String = {
7288
Utils.redact(sqlContext.sessionState.conf.stringRedactionPattern, text)
7389
}
7490

@@ -342,6 +358,31 @@ case class FileSourceScanExec(
342358
withSelectedBucketsCount
343359
}
344360

361+
override def verboseStringWithOperatorId(): String = {
362+
val metadataStr = metadata.toSeq.sorted.filterNot {
363+
case (_, value) if (value.isEmpty || value.equals("[]")) => true
364+
case (key, _) if (key.equals("DataFilters") || key.equals("Format")) => true
365+
case (_, _) => false
366+
}.map {
367+
case (key, _) if (key.equals("Location")) =>
368+
val location = relation.location
369+
val numPaths = location.rootPaths.length
370+
val abbreviatedLoaction = if (numPaths <= 1) {
371+
location.rootPaths.mkString("[", ", ", "]")
372+
} else {
373+
"[" + location.rootPaths.head + s", ... ${numPaths - 1} entries]"
374+
}
375+
s"$key: ${location.getClass.getSimpleName} ${redact(abbreviatedLoaction)}"
376+
case (key, value) => s"$key: ${redact(value)}"
377+
}
378+
379+
s"""
380+
|(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
381+
|Output: ${producedAttributes.mkString("[", ", ", "]")}
382+
|${metadataStr.mkString("\n")}
383+
""".stripMargin
384+
}
385+
345386
lazy val inputRDD: RDD[InternalRow] = {
346387
val readFile: (PartitionedFile) => Iterator[InternalRow] =
347388
relation.fileFormat.buildReaderWithPartitionValues(

sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
193193
)
194194
}
195195
val executedPlan = new QueryExecution(sparkSession, query).executedPlan
196-
InSubqueryExec(expr, SubqueryExec(s"subquery${exprId.id}", executedPlan), exprId)
196+
InSubqueryExec(expr, SubqueryExec(s"subquery#${exprId.id}", executedPlan), exprId)
197197
}
198198
}
199199
}

sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,12 @@ hr int
5656
Database default
5757
Table t
5858
Partition Values [ds=2017-08-01, hr=10]
59-
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10
59+
Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=10
6060
Created Time [not included in comparison]
6161
Last Access [not included in comparison]
6262

6363
# Storage Information
64-
Location [not included in comparison]sql/core/spark-warehouse/t
64+
Location [not included in comparison]/{warehouse_dir}/t
6565

6666

6767
-- !query 5
@@ -90,13 +90,13 @@ hr int
9090
Database default
9191
Table t
9292
Partition Values [ds=2017-08-01, hr=10]
93-
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10
93+
Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=10
9494
Created Time [not included in comparison]
9595
Last Access [not included in comparison]
9696
Partition Statistics [not included in comparison] bytes, 3 rows
9797

9898
# Storage Information
99-
Location [not included in comparison]sql/core/spark-warehouse/t
99+
Location [not included in comparison]/{warehouse_dir}/t
100100

101101

102102
-- !query 7
@@ -125,13 +125,13 @@ hr int
125125
Database default
126126
Table t
127127
Partition Values [ds=2017-08-01, hr=10]
128-
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10
128+
Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=10
129129
Created Time [not included in comparison]
130130
Last Access [not included in comparison]
131131
Partition Statistics [not included in comparison] bytes, 3 rows
132132

133133
# Storage Information
134-
Location [not included in comparison]sql/core/spark-warehouse/t
134+
Location [not included in comparison]/{warehouse_dir}/t
135135

136136

137137
-- !query 9
@@ -152,13 +152,13 @@ hr int
152152
Database default
153153
Table t
154154
Partition Values [ds=2017-08-01, hr=11]
155-
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=11
155+
Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=11
156156
Created Time [not included in comparison]
157157
Last Access [not included in comparison]
158158
Partition Statistics [not included in comparison] bytes, 4 rows
159159

160160
# Storage Information
161-
Location [not included in comparison]sql/core/spark-warehouse/t
161+
Location [not included in comparison]/{warehouse_dir}/t
162162

163163

164164
-- !query 10
@@ -187,13 +187,13 @@ hr int
187187
Database default
188188
Table t
189189
Partition Values [ds=2017-08-01, hr=10]
190-
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10
190+
Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=10
191191
Created Time [not included in comparison]
192192
Last Access [not included in comparison]
193193
Partition Statistics [not included in comparison] bytes, 3 rows
194194

195195
# Storage Information
196-
Location [not included in comparison]sql/core/spark-warehouse/t
196+
Location [not included in comparison]/{warehouse_dir}/t
197197

198198

199199
-- !query 12
@@ -214,13 +214,13 @@ hr int
214214
Database default
215215
Table t
216216
Partition Values [ds=2017-08-01, hr=11]
217-
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=11
217+
Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=11
218218
Created Time [not included in comparison]
219219
Last Access [not included in comparison]
220220
Partition Statistics [not included in comparison] bytes, 4 rows
221221

222222
# Storage Information
223-
Location [not included in comparison]sql/core/spark-warehouse/t
223+
Location [not included in comparison]/{warehouse_dir}/t
224224

225225

226226
-- !query 13
@@ -241,13 +241,13 @@ hr int
241241
Database default
242242
Table t
243243
Partition Values [ds=2017-09-01, hr=5]
244-
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-09-01/hr=5
244+
Location [not included in comparison]/{warehouse_dir}/t/ds=2017-09-01/hr=5
245245
Created Time [not included in comparison]
246246
Last Access [not included in comparison]
247247
Partition Statistics [not included in comparison] bytes, 2 rows
248248

249249
# Storage Information
250-
Location [not included in comparison]sql/core/spark-warehouse/t
250+
Location [not included in comparison]/{warehouse_dir}/t
251251

252252

253253
-- !query 14

sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Created By [not included in comparison]
2929
Type MANAGED
3030
Provider parquet
3131
Comment added
32-
Location [not included in comparison]sql/core/spark-warehouse/table_with_comment
32+
Location [not included in comparison]/{warehouse_dir}/table_with_comment
3333

3434

3535
-- !query 2
@@ -60,7 +60,7 @@ Type MANAGED
6060
Provider parquet
6161
Comment modified comment
6262
Table Properties [type=parquet]
63-
Location [not included in comparison]sql/core/spark-warehouse/table_with_comment
63+
Location [not included in comparison]/{warehouse_dir}/table_with_comment
6464

6565

6666
-- !query 4
@@ -95,7 +95,7 @@ Last Access [not included in comparison]
9595
Created By [not included in comparison]
9696
Type MANAGED
9797
Provider parquet
98-
Location [not included in comparison]sql/core/spark-warehouse/table_comment
98+
Location [not included in comparison]/{warehouse_dir}/table_comment
9999

100100

101101
-- !query 7
@@ -123,7 +123,7 @@ Created By [not included in comparison]
123123
Type MANAGED
124124
Provider parquet
125125
Comment added comment
126-
Location [not included in comparison]sql/core/spark-warehouse/table_comment
126+
Location [not included in comparison]/{warehouse_dir}/table_comment
127127

128128

129129
-- !query 9
@@ -150,7 +150,7 @@ Last Access [not included in comparison]
150150
Created By [not included in comparison]
151151
Type MANAGED
152152
Provider parquet
153-
Location [not included in comparison]sql/core/spark-warehouse/table_comment
153+
Location [not included in comparison]/{warehouse_dir}/table_comment
154154

155155

156156
-- !query 11

sql/core/src/test/resources/sql-tests/results/describe.sql.out

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ Bucket Columns [`a`]
131131
Sort Columns [`b`]
132132
Comment table_comment
133133
Table Properties [t=test, e=3]
134-
Location [not included in comparison]sql/core/spark-warehouse/t
134+
Location [not included in comparison]/{warehouse_dir}/t
135135
Storage Properties [a=1, b=2]
136136
Partition Provider Catalog
137137

@@ -163,7 +163,7 @@ Bucket Columns [`a`]
163163
Sort Columns [`b`]
164164
Comment table_comment
165165
Table Properties [t=test, e=3]
166-
Location [not included in comparison]sql/core/spark-warehouse/t
166+
Location [not included in comparison]/{warehouse_dir}/t
167167
Storage Properties [a=1, b=2]
168168
Partition Provider Catalog
169169

@@ -203,7 +203,7 @@ Bucket Columns [`a`]
203203
Sort Columns [`b`]
204204
Comment table_comment
205205
Table Properties [t=test]
206-
Location [not included in comparison]sql/core/spark-warehouse/t
206+
Location [not included in comparison]/{warehouse_dir}/t
207207
Storage Properties [a=1, b=2]
208208
Partition Provider Catalog
209209

@@ -242,7 +242,7 @@ Num Buckets 2
242242
Bucket Columns [`a`]
243243
Sort Columns [`b`]
244244
Table Properties [t=test]
245-
Location [not included in comparison]sql/core/spark-warehouse/t
245+
Location [not included in comparison]/{warehouse_dir}/t
246246
Storage Properties [a=1, b=2]
247247
Partition Provider Catalog
248248

@@ -280,7 +280,7 @@ d string
280280
Database default
281281
Table t
282282
Partition Values [c=Us, d=1]
283-
Location [not included in comparison]sql/core/spark-warehouse/t/c=Us/d=1
283+
Location [not included in comparison]/{warehouse_dir}/t/c=Us/d=1
284284
Storage Properties [a=1, b=2]
285285
Created Time [not included in comparison]
286286
Last Access [not included in comparison]
@@ -289,7 +289,7 @@ Last Access [not included in comparison]
289289
Num Buckets 2
290290
Bucket Columns [`a`]
291291
Sort Columns [`b`]
292-
Location [not included in comparison]sql/core/spark-warehouse/t
292+
Location [not included in comparison]/{warehouse_dir}/t
293293
Storage Properties [a=1, b=2]
294294

295295

@@ -311,7 +311,7 @@ d string
311311
Database default
312312
Table t
313313
Partition Values [c=Us, d=1]
314-
Location [not included in comparison]sql/core/spark-warehouse/t/c=Us/d=1
314+
Location [not included in comparison]/{warehouse_dir}/t/c=Us/d=1
315315
Storage Properties [a=1, b=2]
316316
Created Time [not included in comparison]
317317
Last Access [not included in comparison]
@@ -320,7 +320,7 @@ Last Access [not included in comparison]
320320
Num Buckets 2
321321
Bucket Columns [`a`]
322322
Sort Columns [`b`]
323-
Location [not included in comparison]sql/core/spark-warehouse/t
323+
Location [not included in comparison]/{warehouse_dir}/t
324324
Storage Properties [a=1, b=2]
325325

326326

0 commit comments

Comments
 (0)