diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 2951bdc18bc5..a69a2b5645dd 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -223,4 +223,13 @@ package object config { " bigger files.") .longConf .createWithDefault(4 * 1024 * 1024) + + private[spark] val SECRET_REDACTION_PATTERN = + ConfigBuilder("spark.redaction.regex") + .doc("Regex to decide which Spark configuration properties and environment variables in " + + "driver and executor environments contain sensitive information. When this regex matches " + + "a property, its value is redacted from the environment UI and various logs like YARN " + + "and event logs.") + .stringConf + .createWithDefault("(?i)secret|password") } diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala index ce7877469f03..f39565edd235 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala @@ -153,7 +153,9 @@ private[spark] class EventLoggingListener( override def onTaskEnd(event: SparkListenerTaskEnd): Unit = logEvent(event) - override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = logEvent(event) + override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = { + logEvent(redactEvent(event)) + } // Events that trigger a flush override def onStageCompleted(event: SparkListenerStageCompleted): Unit = { @@ -231,6 +233,15 @@ private[spark] class EventLoggingListener( } } + private[spark] def redactEvent( + event: SparkListenerEnvironmentUpdate): SparkListenerEnvironmentUpdate = { + // "Spark Properties" entry will always exist because the map is always populated with it. + val redactedProps = Utils.redact(sparkConf, event.environmentDetails("Spark Properties")) + val redactedEnvironmentDetails = event.environmentDetails + + ("Spark Properties" -> redactedProps) + SparkListenerEnvironmentUpdate(redactedEnvironmentDetails) + } + } private[spark] object EventLoggingListener extends Logging { diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala index 9f6e9a6c9037..b11f8f1555f1 100644 --- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala @@ -22,21 +22,17 @@ import javax.servlet.http.HttpServletRequest import scala.xml.Node import org.apache.spark.ui.{UIUtils, WebUIPage} +import org.apache.spark.util.Utils private[ui] class EnvironmentPage(parent: EnvironmentTab) extends WebUIPage("") { private val listener = parent.listener - private def removePass(kv: (String, String)): (String, String) = { - if (kv._1.toLowerCase.contains("password") || kv._1.toLowerCase.contains("secret")) { - (kv._1, "******") - } else kv - } - def render(request: HttpServletRequest): Seq[Node] = { val runtimeInformationTable = UIUtils.listingTable( propertyHeader, jvmRow, listener.jvmInformation, fixedWidth = true) - val sparkPropertiesTable = UIUtils.listingTable( - propertyHeader, propertyRow, listener.sparkProperties.map(removePass), fixedWidth = true) + val sparkPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow, + Utils.redact(parent.conf, listener.sparkProperties), fixedWidth = true) + val systemPropertiesTable = UIUtils.listingTable( propertyHeader, propertyRow, listener.systemProperties, fixedWidth = true) val classpathEntriesTable = UIUtils.listingTable( diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala index f62260c6f6e1..70b3ffd95e60 100644 --- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala +++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala @@ -23,6 +23,7 @@ import org.apache.spark.ui._ private[ui] class EnvironmentTab(parent: SparkUI) extends SparkUITab(parent, "environment") { val listener = parent.environmentListener + val conf = parent.conf attachPage(new EnvironmentPage(this)) } diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 748d729554fc..fa5c8a2c18d0 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -55,7 +55,7 @@ import org.slf4j.Logger import org.apache.spark._ import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.{DYN_ALLOCATION_INITIAL_EXECUTORS, DYN_ALLOCATION_MIN_EXECUTORS, EXECUTOR_INSTANCES} +import org.apache.spark.internal.config._ import org.apache.spark.network.util.JavaUtils import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance} import org.apache.spark.util.logging.RollingFileAppender @@ -2555,6 +2555,18 @@ private[spark] object Utils extends Logging { sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten } } + + private[util] val REDACTION_REPLACEMENT_TEXT = "*********(redacted)" + + def redact(conf: SparkConf, kvs: Seq[(String, String)]): Seq[(String, String)] = { + val redactionPattern = conf.get(SECRET_REDACTION_PATTERN).r + kvs.map { kv => + redactionPattern.findFirstIn(kv._1) + .map { ignore => (kv._1, REDACTION_REPLACEMENT_TEXT) } + .getOrElse(kv) + } + } + } private[util] object CallerContext extends Logging { diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala index 8a5ec37eeb66..230e2c34d0d6 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala @@ -95,6 +95,18 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit } } + test("Event logging with password redaction") { + val key = "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD" + val secretPassword = "secret_password" + val conf = getLoggingConf(testDirPath, None) + .set(key, secretPassword) + val eventLogger = new EventLoggingListener("test", None, testDirPath.toUri(), conf) + val envDetails = SparkEnv.environmentDetails(conf, "FIFO", Seq.empty, Seq.empty) + val event = SparkListenerEnvironmentUpdate(envDetails) + val redactedProps = eventLogger.redactEvent(event).environmentDetails("Spark Properties").toMap + assert(redactedProps(key) == "*********(redacted)") + } + test("Log overwriting") { val logUri = EventLoggingListener.getLogPath(testDir.toURI, "test", None) val logPath = new URI(logUri).getPath diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index feacfb7642f2..fb7b91222b49 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -974,4 +974,24 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { assert(pValue > threshold) } + + test("redact sensitive information") { + val sparkConf = new SparkConf + + // Set some secret keys + val secretKeys = Seq( + "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD", + "spark.my.password", + "spark.my.sECreT") + secretKeys.foreach { key => sparkConf.set(key, "secret_password") } + // Set a non-secret key + sparkConf.set("spark.regular.property", "not_a_secret") + + // Redact sensitive information + val redactedConf = Utils.redact(sparkConf, sparkConf.getAll).toMap + + // Assert that secret information got redacted while the regular property remained the same + secretKeys.foreach { key => assert(redactedConf(key) === Utils.REDACTION_REPLACEMENT_TEXT) } + assert(redactedConf("spark.regular.property") === "not_a_secret") + } } diff --git a/docs/configuration.md b/docs/configuration.md index a3b4ff01e6d9..aa201c6b6a7e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -356,6 +356,15 @@ Apart from these, the following properties are also available, and may be useful process. The user can specify multiple of these to set multiple environment variables. + + spark.redaction.regex + (?i)secret|password + + Regex to decide which Spark configuration properties and environment variables in driver and + executor environments contain sensitive information. When this regex matches a property, its + value is redacted from the environment UI and various logs like YARN and event logs. + + spark.python.profile false diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala index 8e0533f39ae5..868c2edc5a46 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala @@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.ipc.YarnRPC import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import org.apache.spark.{SecurityManager, SparkConf, SparkException} -import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ import org.apache.spark.launcher.YarnCommandBuilderUtils @@ -75,7 +74,7 @@ private[yarn] class ExecutorRunnable( |=============================================================================== |YARN executor launch context: | env: - |${env.map { case (k, v) => s" $k -> $v\n" }.mkString} + |${Utils.redact(sparkConf, env.toSeq).map { case (k, v) => s" $k -> $v\n" }.mkString} | command: | ${commands.mkString(" \\ \n ")} |