Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -245,5 +245,14 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
println(org.apache.spark.sql.execution.debug.codegenString(executedPlan))
// scalastyle:on println
}

/**
* Get WholeStageCodegenExec subtrees and the codegen in a query plan
*
* @return Sequence of WholeStageCodegen subtrees and corresponding codegen
*/
def codegenToSeq(): Seq[(String, String)] = {
org.apache.spark.sql.execution.debug.codegenStringSeq(executedPlan)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,42 @@ package object debug {
// scalastyle:on println
}

/**
* Get WholeStageCodegenExec subtrees and the codegen in a query plan into one String
*
* @param plan the query plan for codegen
* @return single String containing all WholeStageCodegen subtrees and corresponding codegen
*/
def codegenString(plan: SparkPlan): String = {
val codegenSeq = codegenStringSeq(plan)
var output = s"Found ${codegenSeq.size} WholeStageCodegen subtrees.\n"
for (((subtree, code), i) <- codegenSeq.zipWithIndex) {
output += s"== Subtree ${i + 1} / ${codegenSeq.size} ==\n"
output += subtree
output += "\nGenerated code:\n"
output += s"${code}\n"
}
output
}

/**
* Get WholeStageCodegenExec subtrees and the codegen in a query plan
*
* @param plan the query plan for codegen
* @return Sequence of WholeStageCodegen subtrees and corresponding codegen
*/
def codegenStringSeq(plan: SparkPlan): Seq[(String, String)] = {
val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
plan transform {
case s: WholeStageCodegenExec =>
codegenSubtrees += s
s
case s => s
}
var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n"
for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
output += s
output += "\nGenerated code:\n"
val (_, source) = s.doCodeGen()
output += s"${CodeFormatter.format(source)}\n"
codegenSubtrees.toSeq.map { subtree =>
val (_, source) = subtree.doCodeGen()
(subtree.toString, CodeFormatter.format(source))
}
output
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,11 @@ class DebuggingSuite extends SparkFunSuite with SharedSQLContext {
assert(res.contains("Subtree 2 / 2"))
assert(res.contains("Object[]"))
}

test("debugCodegenStringSeq") {
val res = codegenStringSeq(spark.range(10).groupBy("id").count().queryExecution.executedPlan)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please post the return value here?

assert(res.length == 2)
assert(res.forall{ case (subtree, code) =>
subtree.contains("Range") && code.contains("Object[]")})
}
}