Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,8 @@ object SparkSubmit {
if (verbose) {
printStream.println(s"Main class:\n$childMainClass")
printStream.println(s"Arguments:\n${childArgs.mkString("\n")}")
printStream.println(s"System properties:\n${sysProps.mkString("\n")}")
// sysProps may contain sensitive information, so redact before printing
printStream.println(s"System properties:\n${Utils.redact(sysProps).mkString("\n")}")
printStream.println(s"Classpath elements:\n${childClasspath.mkString("\n")}")
printStream.println("\n")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,15 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
// scalastyle:off println
if (verbose) SparkSubmit.printStream.println(s"Using properties file: $propertiesFile")
Option(propertiesFile).foreach { filename =>
Utils.getPropertiesFromFile(filename).foreach { case (k, v) =>
val properties = Utils.getPropertiesFromFile(filename)
properties.foreach { case (k, v) =>
defaultProperties(k) = v
if (verbose) SparkSubmit.printStream.println(s"Adding default property: $k=$v")
}
// Property files may contain sensitive information, so redact before printing
if (verbose) {
Utils.redact(properties).foreach { case (k, v) =>
SparkSubmit.printStream.println(s"Adding default property: $k=$v")
}
}
}
// scalastyle:on println
Expand Down Expand Up @@ -318,7 +324,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
|
|Spark properties used, including those specified through
| --conf and those from the properties file $propertiesFile:
|${sparkProperties.mkString(" ", "\n ", "\n")}
|${Utils.redact(sparkProperties).mkString(" ", "\n ", "\n")}
""".stripMargin
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,13 @@ package object config {
" bigger files.")
.longConf
.createWithDefault(4 * 1024 * 1024)

private[spark] val SECRET_REDACTION_PATTERN =
ConfigBuilder("spark.redaction.regex")
.doc("Regex to decide which Spark configuration properties and environment variables in " +
"driver and executor environments contain sensitive information. When this regex matches " +
"a property, its value is redacted from the environment UI and various logs like YARN " +
"and event logs.")
.stringConf
.createWithDefault("(?i)secret|password")
}
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ private[spark] class EventLoggingListener(

override def onTaskEnd(event: SparkListenerTaskEnd): Unit = logEvent(event)

override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = logEvent(event)
override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = {
logEvent(redactEvent(event))
}

// Events that trigger a flush
override def onStageCompleted(event: SparkListenerStageCompleted): Unit = {
Expand Down Expand Up @@ -231,6 +233,15 @@ private[spark] class EventLoggingListener(
}
}

private[spark] def redactEvent(
event: SparkListenerEnvironmentUpdate): SparkListenerEnvironmentUpdate = {
// "Spark Properties" entry will always exist because the map is always populated with it.
val redactedProps = Utils.redact(sparkConf, event.environmentDetails("Spark Properties"))
val redactedEnvironmentDetails = event.environmentDetails +
("Spark Properties" -> redactedProps)
SparkListenerEnvironmentUpdate(redactedEnvironmentDetails)
}

}

private[spark] object EventLoggingListener extends Logging {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,17 @@ import javax.servlet.http.HttpServletRequest
import scala.xml.Node

import org.apache.spark.ui.{UIUtils, WebUIPage}
import org.apache.spark.util.Utils

private[ui] class EnvironmentPage(parent: EnvironmentTab) extends WebUIPage("") {
private val listener = parent.listener

private def removePass(kv: (String, String)): (String, String) = {
if (kv._1.toLowerCase.contains("password") || kv._1.toLowerCase.contains("secret")) {
(kv._1, "******")
} else kv
}

def render(request: HttpServletRequest): Seq[Node] = {
val runtimeInformationTable = UIUtils.listingTable(
propertyHeader, jvmRow, listener.jvmInformation, fixedWidth = true)
val sparkPropertiesTable = UIUtils.listingTable(
propertyHeader, propertyRow, listener.sparkProperties.map(removePass), fixedWidth = true)
val sparkPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
Utils.redact(parent.conf, listener.sparkProperties), fixedWidth = true)

val systemPropertiesTable = UIUtils.listingTable(
propertyHeader, propertyRow, listener.systemProperties, fixedWidth = true)
val classpathEntriesTable = UIUtils.listingTable(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.apache.spark.ui._

private[ui] class EnvironmentTab(parent: SparkUI) extends SparkUITab(parent, "environment") {
val listener = parent.environmentListener
val conf = parent.conf
attachPage(new EnvironmentPage(this))
}

Expand Down
33 changes: 32 additions & 1 deletion core/src/main/scala/org/apache/spark/util/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import scala.io.Source
import scala.reflect.ClassTag
import scala.util.Try
import scala.util.control.{ControlThrowable, NonFatal}
import scala.util.matching.Regex

import _root_.io.netty.channel.unix.Errors.NativeIoException
import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
Expand All @@ -55,7 +56,7 @@ import org.slf4j.Logger
import org.apache.spark._
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config.{DYN_ALLOCATION_INITIAL_EXECUTORS, DYN_ALLOCATION_MIN_EXECUTORS, EXECUTOR_INSTANCES}
import org.apache.spark.internal.config._
import org.apache.spark.network.util.JavaUtils
import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}

Expand Down Expand Up @@ -2571,6 +2572,36 @@ private[spark] object Utils extends Logging {
sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
}
}

private[util] val REDACTION_REPLACEMENT_TEXT = "*********(redacted)"

def redact(conf: SparkConf, kvs: Seq[(String, String)]): Seq[(String, String)] = {
val redactionPattern = conf.get(SECRET_REDACTION_PATTERN).r
redact(redactionPattern, kvs)
}

private def redact(redactionPattern: Regex, kvs: Seq[(String, String)]): Seq[(String, String)] = {
kvs.map { kv =>
redactionPattern.findFirstIn(kv._1)
.map { _ => (kv._1, REDACTION_REPLACEMENT_TEXT) }
.getOrElse(kv)
}
}

/**
* Looks up the redaction regex from within the key value pairs and uses it to redact the rest
* of the key value pairs. No care is taken to make sure the redaction property itself is not
* redacted. So theoretically, the property itself could be configured to redact its own value
* when printing.
*/
def redact(kvs: Map[String, String]): Seq[(String, String)] = {
val redactionPattern = kvs.getOrElse(
SECRET_REDACTION_PATTERN.key,
SECRET_REDACTION_PATTERN.defaultValueString
).r
redact(redactionPattern, kvs.toArray)
}

}

private[util] object CallerContext extends Logging {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,18 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
}
}

test("Event logging with password redaction") {
val key = "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD"
val secretPassword = "secret_password"
val conf = getLoggingConf(testDirPath, None)
.set(key, secretPassword)
val eventLogger = new EventLoggingListener("test", None, testDirPath.toUri(), conf)
val envDetails = SparkEnv.environmentDetails(conf, "FIFO", Seq.empty, Seq.empty)
val event = SparkListenerEnvironmentUpdate(envDetails)
val redactedProps = eventLogger.redactEvent(event).environmentDetails("Spark Properties").toMap
assert(redactedProps(key) == "*********(redacted)")
}

test("Log overwriting") {
val logUri = EventLoggingListener.getLogPath(testDir.toURI, "test", None)
val logPath = new URI(logUri).getPath
Expand Down
20 changes: 20 additions & 0 deletions core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -975,4 +975,24 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {

assert(pValue > threshold)
}

test("redact sensitive information") {
val sparkConf = new SparkConf

// Set some secret keys
val secretKeys = Seq(
"spark.executorEnv.HADOOP_CREDSTORE_PASSWORD",
"spark.my.password",
"spark.my.sECreT")
secretKeys.foreach { key => sparkConf.set(key, "secret_password") }
// Set a non-secret key
sparkConf.set("spark.regular.property", "not_a_secret")

// Redact sensitive information
val redactedConf = Utils.redact(sparkConf, sparkConf.getAll).toMap

// Assert that secret information got redacted while the regular property remained the same
secretKeys.foreach { key => assert(redactedConf(key) === Utils.REDACTION_REPLACEMENT_TEXT) }
assert(redactedConf("spark.regular.property") === "not_a_secret")
}
}
9 changes: 9 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,15 @@ Apart from these, the following properties are also available, and may be useful
process. The user can specify multiple of these to set multiple environment variables.
</td>
</tr>
<tr>
<td><code>spark.redaction.regex</code></td>
<td>(?i)secret|password</td>
<td>
Regex to decide which Spark configuration properties and environment variables in driver and
executor environments contain sensitive information. When this regex matches a property, its
value is redacted from the environment UI and various logs like YARN and event logs.
</td>
</tr>
<tr>
<td><code>spark.python.profile</code></td>
<td>false</td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.ipc.YarnRPC
import org.apache.hadoop.yarn.util.{ConverterUtils, Records}

import org.apache.spark.{SecurityManager, SparkConf, SparkException}
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._
import org.apache.spark.launcher.YarnCommandBuilderUtils
Expand Down Expand Up @@ -75,7 +74,7 @@ private[yarn] class ExecutorRunnable(
|===============================================================================
|YARN executor launch context:
| env:
|${env.map { case (k, v) => s" $k -> $v\n" }.mkString}
|${Utils.redact(sparkConf, env.toSeq).map { case (k, v) => s" $k -> $v\n" }.mkString}
| command:
| ${commands.mkString(" \\ \n ")}
|
Expand Down