diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java b/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java index bf8adb70637e2..32dd8f1f26b58 100644 --- a/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java +++ b/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java @@ -28,6 +28,49 @@ import org.slf4j.Logger; // checkstyle.on: RegexpSinglelineJava +// checkstyle.off: RegexpSinglelineJava +/** + * Guidelines for the Structured Logging Framework - Java Logging + *

+ * + * Use the `org.apache.spark.internal.SparkLoggerFactory` to get the logger instance in Java code: + * Getting Logger Instance: + * Instead of using `org.slf4j.LoggerFactory`, use `org.apache.spark.internal.SparkLoggerFactory` + * to ensure structured logging. + *

+ * + * import org.apache.spark.internal.SparkLogger; + * import org.apache.spark.internal.SparkLoggerFactory; + * private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class); + *

+ * + * Logging Messages with Variables: + * When logging messages with variables, wrap all the variables with `MDC`s and they will be + * automatically added to the Mapped Diagnostic Context (MDC). + *

+ * + * import org.apache.spark.internal.LogKeys; + * import org.apache.spark.internal.MDC; + * logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID$.MODULE$, i)); + *

+ * + * Constant String Messages: + * For logging constant string messages, use the standard logging methods. + *

+ * + * logger.error("Failed to abort the writer after failing to write map output.", e); + *

+ * + * If you want to output logs in `java code` through the structured log framework, + * you can define `custom LogKey` and use it in `java` code as follows: + *

+ * + * // To add a `custom LogKey`, implement `LogKey` + * public static class CUSTOM_LOG_KEY implements LogKey { } + * import org.apache.spark.internal.MDC; + * logger.error("Unable to delete key {} for cache", MDC.of(CUSTOM_LOG_KEY, "key")); + */ +// checkstyle.on: RegexpSinglelineJava public class SparkLogger { private static final MessageFactory MESSAGE_FACTORY = ParameterizedMessageFactory.INSTANCE; diff --git a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala index 1f67a211c01fa..9cc93c4c87985 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala @@ -20,9 +20,37 @@ import java.util.Locale /** * All structured logging `keys` used in `MDC` must be extends `LogKey` + *

+ * + * `LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs. + * Follow these guidelines when adding a new LogKey: + *

*/ trait LogKey { - val name: String = this.toString.toLowerCase(Locale.ROOT) + private lazy val _name: String = getClass.getSimpleName.stripSuffix("$").toLowerCase(Locale.ROOT) + def name: String = _name } /** diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala index 2ea61358b6add..72c7cdfa62362 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala @@ -29,6 +29,44 @@ import org.slf4j.{Logger, LoggerFactory} import org.apache.spark.internal.Logging.SparkShellLoggingFilter import org.apache.spark.util.SparkClassUtils +/** + * Guidelines for the Structured Logging Framework - Scala Logging + *

+ * + * Use the `org.apache.spark.internal.Logging` trait for logging in Scala code: + * Logging Messages with Variables: + * When logging a message with variables, wrap all the variables with `MDC`s and they will be + * automatically added to the Mapped Diagnostic Context (MDC). + * This allows for structured logging and better log analysis. + *

+ * + * logInfo(log"Trying to recover app: ${MDC(LogKeys.APP_ID, app.id)}") + *

+ * + * Constant String Messages: + * If you are logging a constant string message, use the log methods that accept a constant + * string. + *

+ * + * logInfo("StateStore stopped") + *

+ * + * Exceptions: + * To ensure logs are compatible with Spark SQL and log analysis tools, avoid + * `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from + * the `Logging` trait to log exceptions, maintaining structured and parsable logs. + *

+ * + * If you want to output logs in `scala code` through the structured log framework, + * you can define `custom LogKey` and use it in `scala` code as follows: + *

+ * + * // To add a `custom LogKey`, implement `LogKey` + * case object CUSTOM_LOG_KEY extends LogKey + * import org.apache.spark.internal.MDC; + * logInfo(log"${MDC(CUSTOM_LOG_KEY, "key")}") + */ + /** * Mapped Diagnostic Context (MDC) that will be used in log messages. * The values of the MDC will be inline in the log message, while the key-value pairs will be diff --git a/common/utils/src/main/scala/org/apache/spark/internal/README.md b/common/utils/src/main/scala/org/apache/spark/internal/README.md deleted file mode 100644 index 28d2794851870..0000000000000 --- a/common/utils/src/main/scala/org/apache/spark/internal/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# Guidelines for the Structured Logging Framework - -## Scala Logging -Use the `org.apache.spark.internal.Logging` trait for logging in Scala code: -* **Logging Messages with Variables**: When logging a message with variables, wrap all the variables with `MDC`s and they will be automatically added to the Mapped Diagnostic Context (MDC). This allows for structured logging and better log analysis. -```scala -logInfo(log"Trying to recover app: ${MDC(LogKeys.APP_ID, app.id)}") -``` -* **Constant String Messages**: If you are logging a constant string message, use the log methods that accept a constant string. -```scala -logInfo("StateStore stopped") -``` - -## Java Logging -Use the `org.apache.spark.internal.SparkLoggerFactory` to get the logger instance in Java code: -* **Getting Logger Instance**: Instead of using `org.slf4j.LoggerFactory`, use `org.apache.spark.internal.SparkLoggerFactory` to ensure structured logging. -```java -import org.apache.spark.internal.SparkLogger; -import org.apache.spark.internal.SparkLoggerFactory; - -private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class); -``` -* **Logging Messages with Variables**: When logging messages with variables, wrap all the variables with `MDC`s and they will be automatically added to the Mapped Diagnostic Context (MDC). -```java -import org.apache.spark.internal.LogKeys; -import org.apache.spark.internal.MDC; - -logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID$.MODULE$, i)); -``` - -* **Constant String Messages**: For logging constant string messages, use the standard logging methods. -```java -logger.error("Failed to abort the writer after failing to write map output.", e); -``` - -## LogKey - -`LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs. Follow these guidelines when adding a new LogKey: -* Define all structured logging keys in `LogKey.scala`, and sort them alphabetically for ease of search. -* Use `UPPER_SNAKE_CASE` for key names. -* Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`, `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a general key instead of creating separate keys for each scenario such as `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`. This balances simplicity with the detail needed for effective logging. -* Use abbreviations in names if they are widely understood, such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES. -* For time-related keys, use milliseconds as the unit of time. - -## Exceptions - -To ensure logs are compatible with Spark SQL and log analysis tools, avoid `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from the `Logging` trait to log exceptions, maintaining structured and parsable logs. diff --git a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java index 2d370bad4cc80..1d2e6d76a7590 100644 --- a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java +++ b/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java @@ -84,7 +84,12 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) { } @Override - String expectedPatternForExternalSystemCustomLogKey(Level level) { - return toRegexPattern(level, ".* : External system custom log message.\n"); + String expectedPatternForScalaCustomLogKey(Level level) { + return toRegexPattern(level, ".* : Scala custom log message.\n"); + } + + @Override + String expectedPatternForJavaCustomLogKey(Level level) { + return toRegexPattern(level, ".* : Java custom log message.\n"); } } diff --git a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java b/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java index 0869f9827324d..90677b521640f 100644 --- a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java +++ b/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java @@ -26,9 +26,10 @@ import org.apache.logging.log4j.Level; import org.junit.jupiter.api.Test; -import org.apache.spark.internal.SparkLogger; +import org.apache.spark.internal.LogKey; import org.apache.spark.internal.LogKeys; import org.apache.spark.internal.MDC; +import org.apache.spark.internal.SparkLogger; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -68,8 +69,11 @@ private String basicMsg() { private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, null); - private final MDC externalSystemCustomLog = - MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "External system custom log message."); + private final MDC scalaCustomLogMDC = + MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "Scala custom log message."); + + private final MDC javaCustomLogMDC = + MDC.of(JavaCustomLogKeys.CUSTOM_LOG_KEY, "Java custom log message."); // test for basic message (without any mdc) abstract String expectedPatternForBasicMsg(Level level); @@ -89,8 +93,11 @@ private String basicMsg() { // test for message (with mdc - the value is null) abstract String expectedPatternForMsgWithMDCValueIsNull(Level level); - // test for external system custom LogKey - abstract String expectedPatternForExternalSystemCustomLogKey(Level level); + // test for scala custom LogKey + abstract String expectedPatternForScalaCustomLogKey(Level level); + + // test for java custom LogKey + abstract String expectedPatternForJavaCustomLogKey(Level level); @Test public void testBasicMsgLogger() { @@ -142,8 +149,6 @@ public void testLoggerWithMDC() { Runnable errorFn = () -> logger().error(msgWithMDC, executorIDMDC); Runnable warnFn = () -> logger().warn(msgWithMDC, executorIDMDC); Runnable infoFn = () -> logger().info(msgWithMDC, executorIDMDC); - Runnable debugFn = () -> logger().debug(msgWithMDC, executorIDMDC); - Runnable traceFn = () -> logger().trace(msgWithMDC, executorIDMDC); List.of( Pair.of(Level.ERROR, errorFn), Pair.of(Level.WARN, warnFn), @@ -213,20 +218,46 @@ public void testLoggerWithMDCValueIsNull() { } @Test - public void testLoggerWithExternalSystemCustomLogKey() { - Runnable errorFn = () -> logger().error("{}", externalSystemCustomLog); - Runnable warnFn = () -> logger().warn("{}", externalSystemCustomLog); - Runnable infoFn = () -> logger().info("{}", externalSystemCustomLog); + public void testLoggerWithScalaCustomLogKey() { + Runnable errorFn = () -> logger().error("{}", scalaCustomLogMDC); + Runnable warnFn = () -> logger().warn("{}", scalaCustomLogMDC); + Runnable infoFn = () -> logger().info("{}", scalaCustomLogMDC); + List.of( + Pair.of(Level.ERROR, errorFn), + Pair.of(Level.WARN, warnFn), + Pair.of(Level.INFO, infoFn)).forEach(pair -> { + try { + assertTrue(captureLogOutput(pair.getRight()).matches( + expectedPatternForScalaCustomLogKey(pair.getLeft()))); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + public void testLoggerWithJavaCustomLogKey() { + Runnable errorFn = () -> logger().error("{}", javaCustomLogMDC); + Runnable warnFn = () -> logger().warn("{}", javaCustomLogMDC); + Runnable infoFn = () -> logger().info("{}", javaCustomLogMDC); List.of( Pair.of(Level.ERROR, errorFn), Pair.of(Level.WARN, warnFn), Pair.of(Level.INFO, infoFn)).forEach(pair -> { try { assertTrue(captureLogOutput(pair.getRight()).matches( - expectedPatternForExternalSystemCustomLogKey(pair.getLeft()))); + expectedPatternForJavaCustomLogKey(pair.getLeft()))); } catch (IOException e) { throw new RuntimeException(e); } }); } } + +class JavaCustomLogKeys { + // Custom `LogKey` must be `implements LogKey` + public static class CUSTOM_LOG_KEY implements LogKey { } + + // Singleton + public static final CUSTOM_LOG_KEY CUSTOM_LOG_KEY = new CUSTOM_LOG_KEY(); +} diff --git a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java index 416f0b6172c00..ec19014e117ce 100644 --- a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java +++ b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java @@ -149,14 +149,28 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) { } @Override - String expectedPatternForExternalSystemCustomLogKey(Level level) { + String expectedPatternForScalaCustomLogKey(Level level) { return compactAndToRegexPattern(level, """ { "ts": "", "level": "", - "msg": "External system custom log message.", + "msg": "Scala custom log message.", "context": { - "custom_log_key": "External system custom log message." + "custom_log_key": "Scala custom log message." + }, + "logger": "" + }"""); + } + + @Override + String expectedPatternForJavaCustomLogKey(Level level) { + return compactAndToRegexPattern(level, """ + { + "ts": "", + "level": "", + "msg": "Java custom log message.", + "context": { + "custom_log_key": "Java custom log message." }, "logger": "" }"""); diff --git a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala index 3baa720f38a90..ab9803d83bf62 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala @@ -47,8 +47,8 @@ class PatternLoggingSuite extends LoggingSuiteBase with BeforeAndAfterAll { override def expectedPatternForMsgWithMDCAndException(level: Level): String = s""".*$level $className: Error in executor 1.\njava.lang.RuntimeException: OOM\n[\\s\\S]*""" - override def expectedPatternForExternalSystemCustomLogKey(level: Level): String = { - s""".*$level $className: External system custom log message.\n""" + override def expectedPatternForCustomLogKey(level: Level): String = { + s""".*$level $className: Custom log message.\n""" } override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = { diff --git a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala index 2152b57524d72..694f06706421a 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala @@ -78,8 +78,8 @@ trait LoggingSuiteBase // test for message and exception def expectedPatternForMsgWithMDCAndException(level: Level): String - // test for external system custom LogKey - def expectedPatternForExternalSystemCustomLogKey(level: Level): String + // test for custom LogKey + def expectedPatternForCustomLogKey(level: Level): String def verifyMsgWithConcat(level: Level, logOutput: String): Unit @@ -146,18 +146,17 @@ trait LoggingSuiteBase } } - private val externalSystemCustomLog = - log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "External system custom log message.")}" - test("Logging with external system custom LogKey") { + private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}" + test("Logging with custom LogKey") { Seq( - (Level.ERROR, () => logError(externalSystemCustomLog)), - (Level.WARN, () => logWarning(externalSystemCustomLog)), - (Level.INFO, () => logInfo(externalSystemCustomLog)), - (Level.DEBUG, () => logDebug(externalSystemCustomLog)), - (Level.TRACE, () => logTrace(externalSystemCustomLog))).foreach { + (Level.ERROR, () => logError(customLog)), + (Level.WARN, () => logWarning(customLog)), + (Level.INFO, () => logInfo(customLog)), + (Level.DEBUG, () => logDebug(customLog)), + (Level.TRACE, () => logTrace(customLog))).foreach { case (level, logFunc) => val logOutput = captureLogOutput(logFunc) - assert(expectedPatternForExternalSystemCustomLogKey(level).r.matches(logOutput)) + assert(expectedPatternForCustomLogKey(level).r.matches(logOutput)) } } @@ -261,15 +260,15 @@ class StructuredLoggingSuite extends LoggingSuiteBase { }""") } - override def expectedPatternForExternalSystemCustomLogKey(level: Level): String = { + override def expectedPatternForCustomLogKey(level: Level): String = { compactAndToRegexPattern( s""" { "ts": "", "level": "$level", - "msg": "External system custom log message.", + "msg": "Custom log message.", "context": { - "custom_log_key": "External system custom log message." + "custom_log_key": "Custom log message." }, "logger": "$className" }""" @@ -307,6 +306,6 @@ class StructuredLoggingSuite extends LoggingSuiteBase { } object CustomLogKeys { - // External system custom LogKey must be `extends LogKey` + // Custom `LogKey` must be `extends LogKey` case object CUSTOM_LOG_KEY extends LogKey }