diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java b/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java
index bf8adb70637e2..32dd8f1f26b58 100644
--- a/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java
+++ b/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java
@@ -28,6 +28,49 @@
import org.slf4j.Logger;
// checkstyle.on: RegexpSinglelineJava
+// checkstyle.off: RegexpSinglelineJava
+/**
+ * Guidelines for the Structured Logging Framework - Java Logging
+ *
+ *
+ * Use the `org.apache.spark.internal.SparkLoggerFactory` to get the logger instance in Java code:
+ * Getting Logger Instance:
+ * Instead of using `org.slf4j.LoggerFactory`, use `org.apache.spark.internal.SparkLoggerFactory`
+ * to ensure structured logging.
+ *
+ *
+ * import org.apache.spark.internal.SparkLogger;
+ * import org.apache.spark.internal.SparkLoggerFactory;
+ * private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class);
+ *
+ *
+ * Logging Messages with Variables:
+ * When logging messages with variables, wrap all the variables with `MDC`s and they will be
+ * automatically added to the Mapped Diagnostic Context (MDC).
+ *
+ *
+ * import org.apache.spark.internal.LogKeys;
+ * import org.apache.spark.internal.MDC;
+ * logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID$.MODULE$, i));
+ *
+ *
+ * Constant String Messages:
+ * For logging constant string messages, use the standard logging methods.
+ *
+ *
+ * logger.error("Failed to abort the writer after failing to write map output.", e);
+ *
+ *
+ * If you want to output logs in `java code` through the structured log framework,
+ * you can define `custom LogKey` and use it in `java` code as follows:
+ *
+ *
+ * // To add a `custom LogKey`, implement `LogKey`
+ * public static class CUSTOM_LOG_KEY implements LogKey { }
+ * import org.apache.spark.internal.MDC;
+ * logger.error("Unable to delete key {} for cache", MDC.of(CUSTOM_LOG_KEY, "key"));
+ */
+// checkstyle.on: RegexpSinglelineJava
public class SparkLogger {
private static final MessageFactory MESSAGE_FACTORY = ParameterizedMessageFactory.INSTANCE;
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
index 1f67a211c01fa..9cc93c4c87985 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
@@ -20,9 +20,37 @@ import java.util.Locale
/**
* All structured logging `keys` used in `MDC` must be extends `LogKey`
+ *
+ *
+ * `LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs.
+ * Follow these guidelines when adding a new LogKey:
+ *
+ * -
+ * Define all structured logging keys in `LogKey.scala`, and sort them alphabetically for
+ * ease of search.
+ *
+ * -
+ * Use `UPPER_SNAKE_CASE` for key names.
+ *
+ * -
+ * Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`,
+ * `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a
+ * general key instead of creating separate keys for each scenario such as
+ * `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`.
+ * This balances simplicity with the detail needed for effective logging.
+ *
+ * -
+ * Use abbreviations in names if they are widely understood,
+ * such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES.
+ *
+ * -
+ * For time-related keys, use milliseconds as the unit of time.
+ *
+ *
*/
trait LogKey {
- val name: String = this.toString.toLowerCase(Locale.ROOT)
+ private lazy val _name: String = getClass.getSimpleName.stripSuffix("$").toLowerCase(Locale.ROOT)
+ def name: String = _name
}
/**
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
index 2ea61358b6add..72c7cdfa62362 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -29,6 +29,44 @@ import org.slf4j.{Logger, LoggerFactory}
import org.apache.spark.internal.Logging.SparkShellLoggingFilter
import org.apache.spark.util.SparkClassUtils
+/**
+ * Guidelines for the Structured Logging Framework - Scala Logging
+ *
+ *
+ * Use the `org.apache.spark.internal.Logging` trait for logging in Scala code:
+ * Logging Messages with Variables:
+ * When logging a message with variables, wrap all the variables with `MDC`s and they will be
+ * automatically added to the Mapped Diagnostic Context (MDC).
+ * This allows for structured logging and better log analysis.
+ *
+ *
+ * logInfo(log"Trying to recover app: ${MDC(LogKeys.APP_ID, app.id)}")
+ *
+ *
+ * Constant String Messages:
+ * If you are logging a constant string message, use the log methods that accept a constant
+ * string.
+ *
+ *
+ * logInfo("StateStore stopped")
+ *
+ *
+ * Exceptions:
+ * To ensure logs are compatible with Spark SQL and log analysis tools, avoid
+ * `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from
+ * the `Logging` trait to log exceptions, maintaining structured and parsable logs.
+ *
+ *
+ * If you want to output logs in `scala code` through the structured log framework,
+ * you can define `custom LogKey` and use it in `scala` code as follows:
+ *
+ *
+ * // To add a `custom LogKey`, implement `LogKey`
+ * case object CUSTOM_LOG_KEY extends LogKey
+ * import org.apache.spark.internal.MDC;
+ * logInfo(log"${MDC(CUSTOM_LOG_KEY, "key")}")
+ */
+
/**
* Mapped Diagnostic Context (MDC) that will be used in log messages.
* The values of the MDC will be inline in the log message, while the key-value pairs will be
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/README.md b/common/utils/src/main/scala/org/apache/spark/internal/README.md
deleted file mode 100644
index 28d2794851870..0000000000000
--- a/common/utils/src/main/scala/org/apache/spark/internal/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# Guidelines for the Structured Logging Framework
-
-## Scala Logging
-Use the `org.apache.spark.internal.Logging` trait for logging in Scala code:
-* **Logging Messages with Variables**: When logging a message with variables, wrap all the variables with `MDC`s and they will be automatically added to the Mapped Diagnostic Context (MDC). This allows for structured logging and better log analysis.
-```scala
-logInfo(log"Trying to recover app: ${MDC(LogKeys.APP_ID, app.id)}")
-```
-* **Constant String Messages**: If you are logging a constant string message, use the log methods that accept a constant string.
-```scala
-logInfo("StateStore stopped")
-```
-
-## Java Logging
-Use the `org.apache.spark.internal.SparkLoggerFactory` to get the logger instance in Java code:
-* **Getting Logger Instance**: Instead of using `org.slf4j.LoggerFactory`, use `org.apache.spark.internal.SparkLoggerFactory` to ensure structured logging.
-```java
-import org.apache.spark.internal.SparkLogger;
-import org.apache.spark.internal.SparkLoggerFactory;
-
-private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class);
-```
-* **Logging Messages with Variables**: When logging messages with variables, wrap all the variables with `MDC`s and they will be automatically added to the Mapped Diagnostic Context (MDC).
-```java
-import org.apache.spark.internal.LogKeys;
-import org.apache.spark.internal.MDC;
-
-logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID$.MODULE$, i));
-```
-
-* **Constant String Messages**: For logging constant string messages, use the standard logging methods.
-```java
-logger.error("Failed to abort the writer after failing to write map output.", e);
-```
-
-## LogKey
-
-`LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs. Follow these guidelines when adding a new LogKey:
-* Define all structured logging keys in `LogKey.scala`, and sort them alphabetically for ease of search.
-* Use `UPPER_SNAKE_CASE` for key names.
-* Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`, `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a general key instead of creating separate keys for each scenario such as `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`. This balances simplicity with the detail needed for effective logging.
-* Use abbreviations in names if they are widely understood, such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES.
-* For time-related keys, use milliseconds as the unit of time.
-
-## Exceptions
-
-To ensure logs are compatible with Spark SQL and log analysis tools, avoid `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from the `Logging` trait to log exceptions, maintaining structured and parsable logs.
diff --git a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
index 2d370bad4cc80..1d2e6d76a7590 100644
--- a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
+++ b/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
@@ -84,7 +84,12 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) {
}
@Override
- String expectedPatternForExternalSystemCustomLogKey(Level level) {
- return toRegexPattern(level, ".* : External system custom log message.\n");
+ String expectedPatternForScalaCustomLogKey(Level level) {
+ return toRegexPattern(level, ".* : Scala custom log message.\n");
+ }
+
+ @Override
+ String expectedPatternForJavaCustomLogKey(Level level) {
+ return toRegexPattern(level, ".* : Java custom log message.\n");
}
}
diff --git a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java b/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
index 0869f9827324d..90677b521640f 100644
--- a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
+++ b/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
@@ -26,9 +26,10 @@
import org.apache.logging.log4j.Level;
import org.junit.jupiter.api.Test;
-import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.LogKey;
import org.apache.spark.internal.LogKeys;
import org.apache.spark.internal.MDC;
+import org.apache.spark.internal.SparkLogger;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -68,8 +69,11 @@ private String basicMsg() {
private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, null);
- private final MDC externalSystemCustomLog =
- MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "External system custom log message.");
+ private final MDC scalaCustomLogMDC =
+ MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "Scala custom log message.");
+
+ private final MDC javaCustomLogMDC =
+ MDC.of(JavaCustomLogKeys.CUSTOM_LOG_KEY, "Java custom log message.");
// test for basic message (without any mdc)
abstract String expectedPatternForBasicMsg(Level level);
@@ -89,8 +93,11 @@ private String basicMsg() {
// test for message (with mdc - the value is null)
abstract String expectedPatternForMsgWithMDCValueIsNull(Level level);
- // test for external system custom LogKey
- abstract String expectedPatternForExternalSystemCustomLogKey(Level level);
+ // test for scala custom LogKey
+ abstract String expectedPatternForScalaCustomLogKey(Level level);
+
+ // test for java custom LogKey
+ abstract String expectedPatternForJavaCustomLogKey(Level level);
@Test
public void testBasicMsgLogger() {
@@ -142,8 +149,6 @@ public void testLoggerWithMDC() {
Runnable errorFn = () -> logger().error(msgWithMDC, executorIDMDC);
Runnable warnFn = () -> logger().warn(msgWithMDC, executorIDMDC);
Runnable infoFn = () -> logger().info(msgWithMDC, executorIDMDC);
- Runnable debugFn = () -> logger().debug(msgWithMDC, executorIDMDC);
- Runnable traceFn = () -> logger().trace(msgWithMDC, executorIDMDC);
List.of(
Pair.of(Level.ERROR, errorFn),
Pair.of(Level.WARN, warnFn),
@@ -213,20 +218,46 @@ public void testLoggerWithMDCValueIsNull() {
}
@Test
- public void testLoggerWithExternalSystemCustomLogKey() {
- Runnable errorFn = () -> logger().error("{}", externalSystemCustomLog);
- Runnable warnFn = () -> logger().warn("{}", externalSystemCustomLog);
- Runnable infoFn = () -> logger().info("{}", externalSystemCustomLog);
+ public void testLoggerWithScalaCustomLogKey() {
+ Runnable errorFn = () -> logger().error("{}", scalaCustomLogMDC);
+ Runnable warnFn = () -> logger().warn("{}", scalaCustomLogMDC);
+ Runnable infoFn = () -> logger().info("{}", scalaCustomLogMDC);
+ List.of(
+ Pair.of(Level.ERROR, errorFn),
+ Pair.of(Level.WARN, warnFn),
+ Pair.of(Level.INFO, infoFn)).forEach(pair -> {
+ try {
+ assertTrue(captureLogOutput(pair.getRight()).matches(
+ expectedPatternForScalaCustomLogKey(pair.getLeft())));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ @Test
+ public void testLoggerWithJavaCustomLogKey() {
+ Runnable errorFn = () -> logger().error("{}", javaCustomLogMDC);
+ Runnable warnFn = () -> logger().warn("{}", javaCustomLogMDC);
+ Runnable infoFn = () -> logger().info("{}", javaCustomLogMDC);
List.of(
Pair.of(Level.ERROR, errorFn),
Pair.of(Level.WARN, warnFn),
Pair.of(Level.INFO, infoFn)).forEach(pair -> {
try {
assertTrue(captureLogOutput(pair.getRight()).matches(
- expectedPatternForExternalSystemCustomLogKey(pair.getLeft())));
+ expectedPatternForJavaCustomLogKey(pair.getLeft())));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
}
+
+class JavaCustomLogKeys {
+ // Custom `LogKey` must be `implements LogKey`
+ public static class CUSTOM_LOG_KEY implements LogKey { }
+
+ // Singleton
+ public static final CUSTOM_LOG_KEY CUSTOM_LOG_KEY = new CUSTOM_LOG_KEY();
+}
diff --git a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
index 416f0b6172c00..ec19014e117ce 100644
--- a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
+++ b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
@@ -149,14 +149,28 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) {
}
@Override
- String expectedPatternForExternalSystemCustomLogKey(Level level) {
+ String expectedPatternForScalaCustomLogKey(Level level) {
return compactAndToRegexPattern(level, """
{
"ts": "",
"level": "",
- "msg": "External system custom log message.",
+ "msg": "Scala custom log message.",
"context": {
- "custom_log_key": "External system custom log message."
+ "custom_log_key": "Scala custom log message."
+ },
+ "logger": ""
+ }""");
+ }
+
+ @Override
+ String expectedPatternForJavaCustomLogKey(Level level) {
+ return compactAndToRegexPattern(level, """
+ {
+ "ts": "",
+ "level": "",
+ "msg": "Java custom log message.",
+ "context": {
+ "custom_log_key": "Java custom log message."
},
"logger": ""
}""");
diff --git a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
index 3baa720f38a90..ab9803d83bf62 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
@@ -47,8 +47,8 @@ class PatternLoggingSuite extends LoggingSuiteBase with BeforeAndAfterAll {
override def expectedPatternForMsgWithMDCAndException(level: Level): String =
s""".*$level $className: Error in executor 1.\njava.lang.RuntimeException: OOM\n[\\s\\S]*"""
- override def expectedPatternForExternalSystemCustomLogKey(level: Level): String = {
- s""".*$level $className: External system custom log message.\n"""
+ override def expectedPatternForCustomLogKey(level: Level): String = {
+ s""".*$level $className: Custom log message.\n"""
}
override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = {
diff --git a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
index 2152b57524d72..694f06706421a 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
@@ -78,8 +78,8 @@ trait LoggingSuiteBase
// test for message and exception
def expectedPatternForMsgWithMDCAndException(level: Level): String
- // test for external system custom LogKey
- def expectedPatternForExternalSystemCustomLogKey(level: Level): String
+ // test for custom LogKey
+ def expectedPatternForCustomLogKey(level: Level): String
def verifyMsgWithConcat(level: Level, logOutput: String): Unit
@@ -146,18 +146,17 @@ trait LoggingSuiteBase
}
}
- private val externalSystemCustomLog =
- log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "External system custom log message.")}"
- test("Logging with external system custom LogKey") {
+ private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}"
+ test("Logging with custom LogKey") {
Seq(
- (Level.ERROR, () => logError(externalSystemCustomLog)),
- (Level.WARN, () => logWarning(externalSystemCustomLog)),
- (Level.INFO, () => logInfo(externalSystemCustomLog)),
- (Level.DEBUG, () => logDebug(externalSystemCustomLog)),
- (Level.TRACE, () => logTrace(externalSystemCustomLog))).foreach {
+ (Level.ERROR, () => logError(customLog)),
+ (Level.WARN, () => logWarning(customLog)),
+ (Level.INFO, () => logInfo(customLog)),
+ (Level.DEBUG, () => logDebug(customLog)),
+ (Level.TRACE, () => logTrace(customLog))).foreach {
case (level, logFunc) =>
val logOutput = captureLogOutput(logFunc)
- assert(expectedPatternForExternalSystemCustomLogKey(level).r.matches(logOutput))
+ assert(expectedPatternForCustomLogKey(level).r.matches(logOutput))
}
}
@@ -261,15 +260,15 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
}""")
}
- override def expectedPatternForExternalSystemCustomLogKey(level: Level): String = {
+ override def expectedPatternForCustomLogKey(level: Level): String = {
compactAndToRegexPattern(
s"""
{
"ts": "",
"level": "$level",
- "msg": "External system custom log message.",
+ "msg": "Custom log message.",
"context": {
- "custom_log_key": "External system custom log message."
+ "custom_log_key": "Custom log message."
},
"logger": "$className"
}"""
@@ -307,6 +306,6 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
}
object CustomLogKeys {
- // External system custom LogKey must be `extends LogKey`
+ // Custom `LogKey` must be `extends LogKey`
case object CUSTOM_LOG_KEY extends LogKey
}