From cc09f86b779f41cfa9f08e7a41198ccc0c4e61c9 Mon Sep 17 00:00:00 2001 From: Srinivasa Reddy Vundela Date: Wed, 14 Oct 2015 12:04:41 -0700 Subject: [PATCH 01/11] [SPARK-11105] Distribute log4j.properties to executors --- .../scala/org/apache/spark/deploy/yarn/Client.scala | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 9fcfe362a3ba2..714fda42f92ae 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -340,6 +340,14 @@ private[spark] class Client( "for alternatives.") } + // Uploading $SPARK_CONF_DIR/log4j.properties file to the distributed cache to make sure that + // the executor's will use the latest configurations instead of the default values. This is + // required when user changes log4j.properties directly or use's UI (e.g., Cloudera Manager) to + // set the log configurations. If configuration file is provided through --files then executors + // will be taking configurations from --files instead of $SPARK_CONF_DIR/log4j.properties. + val log4jConf = + oldLog4jConf.orElse(Option(getClass.getResource("/log4j.properties").toString())) + def addDistributedUri(uri: URI): Boolean = { val uriStr = uri.toString() if (distributedUris.contains(uriStr)) { @@ -415,7 +423,7 @@ private[spark] class Client( List( (SPARK_JAR, sparkJar(sparkConf), CONF_SPARK_JAR), (APP_JAR, args.userJar, CONF_SPARK_USER_JAR), - ("log4j.properties", oldLog4jConf.orNull, null) + ("log4j.properties", log4jConf.orNull, null) ).foreach { case (destName, path, confKey) => if (path != null && !path.trim().isEmpty()) { val (isLocal, localizedPath) = distribute(path, destName = Some(destName)) From 2772c1d392724dfefe8f0be725703b241f9db59f Mon Sep 17 00:00:00 2001 From: Srinivasa Reddy Vundela Date: Wed, 14 Oct 2015 12:47:09 -0700 Subject: [PATCH 02/11] Changing vocabulary --- .../main/scala/org/apache/spark/deploy/yarn/Client.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 714fda42f92ae..94b3c8de2db98 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -341,10 +341,10 @@ private[spark] class Client( } // Uploading $SPARK_CONF_DIR/log4j.properties file to the distributed cache to make sure that - // the executor's will use the latest configurations instead of the default values. This is - // required when user changes log4j.properties directly or use's UI (e.g., Cloudera Manager) to - // set the log configurations. If configuration file is provided through --files then executors - // will be taking configurations from --files instead of $SPARK_CONF_DIR/log4j.properties. + // the executors will use the latest configurations instead of the default values. This is + // required when user changes log4j.properties directly to set the log configurations. If + // configuration file is provided through --files then executors will be taking configurations + // from --files instead of $SPARK_CONF_DIR/log4j.properties. val log4jConf = oldLog4jConf.orElse(Option(getClass.getResource("/log4j.properties").toString())) From 4ffcb3acf766fae623f8521eebc5c0dbe9d41fed Mon Sep 17 00:00:00 2001 From: Srinivasa Reddy Vundela Date: Wed, 14 Oct 2015 14:17:47 -0700 Subject: [PATCH 03/11] Fixing the possible NullPointerIssue --- yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 94b3c8de2db98..a65113393475b 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -345,8 +345,9 @@ private[spark] class Client( // required when user changes log4j.properties directly to set the log configurations. If // configuration file is provided through --files then executors will be taking configurations // from --files instead of $SPARK_CONF_DIR/log4j.properties. + val log4jConfUrl = getClass.getResource("/log4j.properties"); val log4jConf = - oldLog4jConf.orElse(Option(getClass.getResource("/log4j.properties").toString())) + oldLog4jConf.orElse(Option(if (log4jConfUrl == null ) null else log4jConfUrl.toString()) def addDistributedUri(uri: URI): Boolean = { val uriStr = uri.toString() From f007acfa44586bd693c805c9d8c59124a7b11fa8 Mon Sep 17 00:00:00 2001 From: Srinivasa Reddy Vundela Date: Wed, 14 Oct 2015 14:22:02 -0700 Subject: [PATCH 04/11] -a --- yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index a65113393475b..24d1364e92fd3 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -345,9 +345,9 @@ private[spark] class Client( // required when user changes log4j.properties directly to set the log configurations. If // configuration file is provided through --files then executors will be taking configurations // from --files instead of $SPARK_CONF_DIR/log4j.properties. - val log4jConfUrl = getClass.getResource("/log4j.properties"); + val log4jConfUrl = getClass.getResource("/log4j.properties") val log4jConf = - oldLog4jConf.orElse(Option(if (log4jConfUrl == null ) null else log4jConfUrl.toString()) + oldLog4jConf.orElse(Option(if (log4jConfUrl == null) null else log4jConfUrl.toString()) def addDistributedUri(uri: URI): Boolean = { val uriStr = uri.toString() From 85aecef5604644c0eb703c054ede58d0a297b404 Mon Sep 17 00:00:00 2001 From: Srinivasa Reddy Vundela Date: Wed, 14 Oct 2015 14:25:59 -0700 Subject: [PATCH 05/11] -a --- yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 24d1364e92fd3..0b18dc3c86607 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -347,7 +347,7 @@ private[spark] class Client( // from --files instead of $SPARK_CONF_DIR/log4j.properties. val log4jConfUrl = getClass.getResource("/log4j.properties") val log4jConf = - oldLog4jConf.orElse(Option(if (log4jConfUrl == null) null else log4jConfUrl.toString()) + oldLog4jConf.orElse(Option(if (log4jConfUrl == null) null else log4jConfUrl.toString())) def addDistributedUri(uri: URI): Boolean = { val uriStr = uri.toString() From 9669a32f83c48e4cd1c1b7a7d39aad7737be68a2 Mon Sep 17 00:00:00 2001 From: vundela Date: Wed, 14 Oct 2015 16:24:46 -0700 Subject: [PATCH 06/11] Making changes as suggested by Marcelo --- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 0b18dc3c86607..e255c458c9244 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -345,9 +345,8 @@ private[spark] class Client( // required when user changes log4j.properties directly to set the log configurations. If // configuration file is provided through --files then executors will be taking configurations // from --files instead of $SPARK_CONF_DIR/log4j.properties. - val log4jConfUrl = getClass.getResource("/log4j.properties") - val log4jConf = - oldLog4jConf.orElse(Option(if (log4jConfUrl == null) null else log4jConfUrl.toString())) + val log4jConf = oldLog4jConf.orElse( + Option(Utils.getContextOrSparkClassLoader.getResource("/log4j.properties")).map(_.toString)) def addDistributedUri(uri: URI): Boolean = { val uriStr = uri.toString() From 2e34edc5be4036cd702731081096bcf8aa1a35d6 Mon Sep 17 00:00:00 2001 From: vundela Date: Mon, 19 Oct 2015 08:54:34 -0700 Subject: [PATCH 07/11] Uploading log4j conf through archived files --- .../org/apache/spark/deploy/yarn/Client.scala | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index e255c458c9244..f7c9cbef3b34a 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -340,14 +340,6 @@ private[spark] class Client( "for alternatives.") } - // Uploading $SPARK_CONF_DIR/log4j.properties file to the distributed cache to make sure that - // the executors will use the latest configurations instead of the default values. This is - // required when user changes log4j.properties directly to set the log configurations. If - // configuration file is provided through --files then executors will be taking configurations - // from --files instead of $SPARK_CONF_DIR/log4j.properties. - val log4jConf = oldLog4jConf.orElse( - Option(Utils.getContextOrSparkClassLoader.getResource("/log4j.properties")).map(_.toString)) - def addDistributedUri(uri: URI): Boolean = { val uriStr = uri.toString() if (distributedUris.contains(uriStr)) { @@ -423,7 +415,7 @@ private[spark] class Client( List( (SPARK_JAR, sparkJar(sparkConf), CONF_SPARK_JAR), (APP_JAR, args.userJar, CONF_SPARK_USER_JAR), - ("log4j.properties", log4jConf.orNull, null) + ("log4j.properties", oldLog4jConf.orNull, null) ).foreach { case (destName, path, confKey) => if (path != null && !path.trim().isEmpty()) { val (isLocal, localizedPath) = distribute(path, destName = Some(destName)) @@ -505,6 +497,20 @@ private[spark] class Client( */ private def createConfArchive(): File = { val hadoopConfFiles = new HashMap[String, File]() + + // Uploading $SPARK_CONF_DIR/log4j.properties file to the distributed cache to make sure that + // the executors will use the latest configurations instead of the default values. This is + // required when user changes log4j.properties directly to set the log configurations. If + // configuration file is provided through --files then executors will be taking configurations + // from --files instead of $SPARK_CONF_DIR/log4j.properties. + Option(Utils.getContextOrSparkClassLoader.getResource("log4j.properties")) + .map(_.getPath).map(path => { + val file = new File(path) + if(file.isFile && file.canRead) { + hadoopConfFiles(file.getName) = file + } + }) + Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").foreach { envKey => sys.env.get(envKey).foreach { path => val dir = new File(path) From 187eb99def45670285047c51ad8269a998fb8ed9 Mon Sep 17 00:00:00 2001 From: vundela Date: Mon, 19 Oct 2015 08:58:59 -0700 Subject: [PATCH 08/11] Indentation --- .../scala/org/apache/spark/deploy/yarn/Client.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index f7c9cbef3b34a..462b3ddb2e5f4 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -505,11 +505,11 @@ private[spark] class Client( // from --files instead of $SPARK_CONF_DIR/log4j.properties. Option(Utils.getContextOrSparkClassLoader.getResource("log4j.properties")) .map(_.getPath).map(path => { - val file = new File(path) - if(file.isFile && file.canRead) { - hadoopConfFiles(file.getName) = file - } - }) + val file = new File(path) + if(file.isFile && file.canRead) { + hadoopConfFiles(file.getName) = file + } + }) Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").foreach { envKey => sys.env.get(envKey).foreach { path => From 39c75a2c73cc49dda6c2450d1be9b96b2d71023e Mon Sep 17 00:00:00 2001 From: vundela Date: Mon, 19 Oct 2015 15:12:39 -0700 Subject: [PATCH 09/11] Uploading documentation to reflect the new behaviour --- docs/running-on-yarn.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 677c0000440ac..db6bfa69ee0fe 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -81,7 +81,7 @@ all environment variables used for launching each container. This process is use classpath problems in particular. (Note that enabling this requires admin privileges on cluster settings and a restart of all node managers. Thus, this is not applicable to hosted clusters). -To use a custom log4j configuration for the application master or executors, there are two options: +To use a custom log4j configuration for the application master or executors, here are the options: - upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files to be uploaded with the application. @@ -89,6 +89,9 @@ To use a custom log4j configuration for the application master or executors, the (for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file, the `file:` protocol should be explicitly provided, and the file needs to exist locally on all the nodes. +- update the `$SPARK_CONF_DIR/log4j.properties` file and it will be automatically uploaded along + with the other configurations. Note that other 2 options has higher priority than this option if + multiple options are specified. Note that for the first option, both executors and the application master will share the same log4j configuration, which may cause issues when they run on the same node (e.g. trying to write From e1e474aff26d2b2b7ba117885f96493e9cb724a0 Mon Sep 17 00:00:00 2001 From: vundela Date: Mon, 19 Oct 2015 16:32:06 -0700 Subject: [PATCH 10/11] Making changes as per suggestion --- .../scala/org/apache/spark/deploy/yarn/Client.scala | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 462b3ddb2e5f4..6672553c6ca4f 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -503,13 +503,12 @@ private[spark] class Client( // required when user changes log4j.properties directly to set the log configurations. If // configuration file is provided through --files then executors will be taking configurations // from --files instead of $SPARK_CONF_DIR/log4j.properties. - Option(Utils.getContextOrSparkClassLoader.getResource("log4j.properties")) - .map(_.getPath).map(path => { - val file = new File(path) - if(file.isFile && file.canRead) { - hadoopConfFiles(file.getName) = file - } - }) + val log4jFileName = "log4j.properties" + Option(Utils.getContextOrSparkClassLoader.getResource(log4jFileName)).foreach { + url => if (url.getProtocol == "file") { + hadoopConfFiles(log4jFileName) = new File(url.getPath) + } + } Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").foreach { envKey => sys.env.get(envKey).foreach { path => From b97a2b5d8b1c477444d57dddccf6a30657008f96 Mon Sep 17 00:00:00 2001 From: vundela Date: Mon, 19 Oct 2015 19:00:14 -0700 Subject: [PATCH 11/11] Style change --- yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 6672553c6ca4f..824942c42dd86 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -504,8 +504,8 @@ private[spark] class Client( // configuration file is provided through --files then executors will be taking configurations // from --files instead of $SPARK_CONF_DIR/log4j.properties. val log4jFileName = "log4j.properties" - Option(Utils.getContextOrSparkClassLoader.getResource(log4jFileName)).foreach { - url => if (url.getProtocol == "file") { + Option(Utils.getContextOrSparkClassLoader.getResource(log4jFileName)).foreach { url => + if (url.getProtocol == "file") { hadoopConfFiles(log4jFileName) = new File(url.getPath) } }