Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-23407][SQL] add a config to try to inline all mutable states during codegen #20599

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ class CodegenContext {
*/
var currentVars: Seq[ExprCode] = null

// Reads the config here, to make it effective for the entire lifetime of this context.
private val tryInlineAllState = SQLConf.get.getConf(SQLConf.CODEGEN_TRY_INLINE_ALL_STATES)

/**
* Holding expressions' inlined mutable states like `MonotonicallyIncreasingID.count` as a
* 2-tuple: java type, variable name.
Expand Down Expand Up @@ -253,10 +256,11 @@ class CodegenContext {
forceInline: Boolean = false,
useFreshName: Boolean = true): String = {

// want to put a primitive type variable at outerClass for performance
val canInlinePrimitive = isPrimitiveType(javaType) &&
// Puts a primitive type variable at outerClass for performance, or puts all variables at outer
// class if CODEGEN_TRY_INLINE_ALL_STATES is true, if we have't hit the threshold yet.
val canInline = (isPrimitiveType(javaType) || tryInlineAllState) &&
(inlinedMutableStates.length < CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD)
if (forceInline || canInlinePrimitive || javaType.contains("[][]")) {
if (forceInline || javaType.contains("[][]") || canInline) {
val varName = if (useFreshName) freshName(variableName) else variableName
val initCode = initFunc(varName)
inlinedMutableStates += ((javaType, varName))
Expand Down Expand Up @@ -1461,20 +1465,19 @@ object CodeGenerator extends Logging {
CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classBytes.length)
try {
val cf = new ClassFile(new ByteArrayInputStream(classBytes))
val stats = cf.methodInfos.asScala.flatMap { method =>
cf.methodInfos.asScala.flatMap { method =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these changes related to this PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not, but a small clean up.

method.getAttributes().filter(_.getClass.getName == codeAttr.getName).map { a =>
val byteCodeSize = codeAttrField.get(a).asInstanceOf[Array[Byte]].length
CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(byteCodeSize)
byteCodeSize
}
}
Some(stats)
} catch {
case NonFatal(e) =>
logWarning("Error calculating stats of compiled class.", e)
None
Nil
}
}.flatten
}

codeSizes.max
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,16 @@ object SQLConf {
"disable logging or -1 to apply no limit.")
.createWithDefault(1000)

val CODEGEN_TRY_INLINE_ALL_STATES =
buildConf("spark.sql.codegen.tryInlineAllStates")
.internal()
.doc("When adding mutable states during code generation, whether or not we should try to " +
"inline all the states. If this config is false, we only try to inline primitive stats, " +
"so that primitive states are more likely to be inlined. Set this config to true to make " +
"the behavior same as Spark 2.2.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it only behaves the same before we hit the threshold?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, let me improve it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also watch out for a typo s/stats/states/

.booleanConf
.createWithDefault(false)

val WHOLESTAGE_HUGE_METHOD_LIMIT = buildConf("spark.sql.codegen.hugeMethodLimit")
.internal()
.doc("The maximum bytecode size of a single compiled Java function generated by whole-stage " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.objects._
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.ThreadUtils
Expand Down Expand Up @@ -436,4 +437,18 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
ctx.addImmutableStateIfNotExists("String", mutableState2)
assert(ctx.inlinedMutableStates.length == 2)
}

test("SPARK-23407: inline all mutable states if CODEGEN_TRY_INLINE_ALL_STATES is true") {
val conf = SQLConf.get
try {
conf.setConf(SQLConf.CODEGEN_TRY_INLINE_ALL_STATES, true)
val ctx = new CodegenContext
ctx.addMutableState(ctx.JAVA_INT, "i", v => s"$v = 1;")
ctx.addMutableState("String", "s", v => s"$v = null;")
assert(ctx.inlinedMutableStates.size == 2)
assert(ctx.arrayCompactedMutableStates.isEmpty)
} finally {
conf.unsetConf(SQLConf.CODEGEN_TRY_INLINE_ALL_STATES)
}
}
}