Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,49 @@
import org.slf4j.Logger;
// checkstyle.on: RegexpSinglelineJava

// checkstyle.off: RegexpSinglelineJava
/**
* Guidelines for the Structured Logging Framework - Java Logging
* <p>
*
* Use the `org.apache.spark.internal.SparkLoggerFactory` to get the logger instance in Java code:
* Getting Logger Instance:
* Instead of using `org.slf4j.LoggerFactory`, use `org.apache.spark.internal.SparkLoggerFactory`
* to ensure structured logging.
* <p>
*
* import org.apache.spark.internal.SparkLogger;
* import org.apache.spark.internal.SparkLoggerFactory;
* private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class);
* <p>
*
* Logging Messages with Variables:
* When logging messages with variables, wrap all the variables with `MDC`s and they will be
* automatically added to the Mapped Diagnostic Context (MDC).
* <p>
*
* import org.apache.spark.internal.LogKeys;
* import org.apache.spark.internal.MDC;
* logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID$.MODULE$, i));
* <p>
*
* Constant String Messages:
* For logging constant string messages, use the standard logging methods.
* <p>
*
* logger.error("Failed to abort the writer after failing to write map output.", e);
* <p>
*
* If you want to output logs in `java code` through the structured log framework,
* you can define `custom LogKey` and use it in `java` code as follows:
* <p>
*
* // To add a `custom LogKey`, implement `LogKey`
* public static class CUSTOM_LOG_KEY implements LogKey { }
* import org.apache.spark.internal.MDC;
* logger.error("Unable to delete key {} for cache", MDC.of(CUSTOM_LOG_KEY, "key"));
*/
// checkstyle.on: RegexpSinglelineJava
public class SparkLogger {

private static final MessageFactory MESSAGE_FACTORY = ParameterizedMessageFactory.INSTANCE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,37 @@ import java.util.Locale

/**
* All structured logging `keys` used in `MDC` must be extends `LogKey`
* <p>
*
* `LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs.
* Follow these guidelines when adding a new LogKey:
* <ul>
* <li>
* Define all structured logging keys in `LogKey.scala`, and sort them alphabetically for
* ease of search.
* </li>
* <li>
* Use `UPPER_SNAKE_CASE` for key names.
* </li>
* <li>
* Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`,
* `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a
* general key instead of creating separate keys for each scenario such as
* `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`.
* This balances simplicity with the detail needed for effective logging.
* </li>
* <li>
* Use abbreviations in names if they are widely understood,
* such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES.
* </li>
* <li>
* For time-related keys, use milliseconds as the unit of time.
* </li>
* </ul>
*/
trait LogKey {
val name: String = this.toString.toLowerCase(Locale.ROOT)
private lazy val _name: String = getClass.getSimpleName.stripSuffix("$").toLowerCase(Locale.ROOT)
def name: String = _name
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,44 @@ import org.slf4j.{Logger, LoggerFactory}
import org.apache.spark.internal.Logging.SparkShellLoggingFilter
import org.apache.spark.util.SparkClassUtils

/**
* Guidelines for the Structured Logging Framework - Scala Logging
* <p>
*
* Use the `org.apache.spark.internal.Logging` trait for logging in Scala code:
* Logging Messages with Variables:
* When logging a message with variables, wrap all the variables with `MDC`s and they will be
* automatically added to the Mapped Diagnostic Context (MDC).
* This allows for structured logging and better log analysis.
* <p>
*
* logInfo(log"Trying to recover app: ${MDC(LogKeys.APP_ID, app.id)}")
* <p>
*
* Constant String Messages:
* If you are logging a constant string message, use the log methods that accept a constant
* string.
* <p>
*
* logInfo("StateStore stopped")
* <p>
*
* Exceptions:
* To ensure logs are compatible with Spark SQL and log analysis tools, avoid
* `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from
* the `Logging` trait to log exceptions, maintaining structured and parsable logs.
* <p>
*
* If you want to output logs in `scala code` through the structured log framework,
* you can define `custom LogKey` and use it in `scala` code as follows:
* <p>
*
* // To add a `custom LogKey`, implement `LogKey`
* case object CUSTOM_LOG_KEY extends LogKey
* import org.apache.spark.internal.MDC;
* logInfo(log"${MDC(CUSTOM_LOG_KEY, "key")}")
*/

/**
* Mapped Diagnostic Context (MDC) that will be used in log messages.
* The values of the MDC will be inline in the log message, while the key-value pairs will be
Expand Down
47 changes: 0 additions & 47 deletions common/utils/src/main/scala/org/apache/spark/internal/README.md
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mridulm @panbingkun This README is for Spark developers. As long as it doesn't mention 3rd party supports, can we just keep it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine to it, WDYT @mridulm ?
we can just put the new description in the scala/java doc.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will wait for @mridulm's response until this weekend.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC @panbingkun included the contents into the existing code; which is also a nice addition to the code documentation.
IMO that is a better overall approach given this is internal to spark.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I have adopted the plan of deleting README.md

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) {
}

@Override
String expectedPatternForExternalSystemCustomLogKey(Level level) {
return toRegexPattern(level, ".*<level> <className>: External system custom log message.\n");
String expectedPatternForScalaCustomLogKey(Level level) {
return toRegexPattern(level, ".*<level> <className>: Scala custom log message.\n");
}

@Override
String expectedPatternForJavaCustomLogKey(Level level) {
return toRegexPattern(level, ".*<level> <className>: Java custom log message.\n");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@
import org.apache.logging.log4j.Level;
import org.junit.jupiter.api.Test;

import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.LogKey;
import org.apache.spark.internal.LogKeys;
import org.apache.spark.internal.MDC;
import org.apache.spark.internal.SparkLogger;

import static org.junit.jupiter.api.Assertions.assertTrue;

Expand Down Expand Up @@ -68,8 +69,11 @@ private String basicMsg() {

private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, null);

private final MDC externalSystemCustomLog =
MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "External system custom log message.");
private final MDC scalaCustomLogMDC =
MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "Scala custom log message.");

private final MDC javaCustomLogMDC =
MDC.of(JavaCustomLogKeys.CUSTOM_LOG_KEY, "Java custom log message.");

// test for basic message (without any mdc)
abstract String expectedPatternForBasicMsg(Level level);
Expand All @@ -89,8 +93,11 @@ private String basicMsg() {
// test for message (with mdc - the value is null)
abstract String expectedPatternForMsgWithMDCValueIsNull(Level level);

// test for external system custom LogKey
abstract String expectedPatternForExternalSystemCustomLogKey(Level level);
// test for scala custom LogKey
abstract String expectedPatternForScalaCustomLogKey(Level level);

// test for java custom LogKey
abstract String expectedPatternForJavaCustomLogKey(Level level);

@Test
public void testBasicMsgLogger() {
Expand Down Expand Up @@ -142,8 +149,6 @@ public void testLoggerWithMDC() {
Runnable errorFn = () -> logger().error(msgWithMDC, executorIDMDC);
Runnable warnFn = () -> logger().warn(msgWithMDC, executorIDMDC);
Runnable infoFn = () -> logger().info(msgWithMDC, executorIDMDC);
Runnable debugFn = () -> logger().debug(msgWithMDC, executorIDMDC);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After #46405, the following 2 lines of code are no longer needed.

Runnable traceFn = () -> logger().trace(msgWithMDC, executorIDMDC);
List.of(
Pair.of(Level.ERROR, errorFn),
Pair.of(Level.WARN, warnFn),
Expand Down Expand Up @@ -213,20 +218,46 @@ public void testLoggerWithMDCValueIsNull() {
}

@Test
public void testLoggerWithExternalSystemCustomLogKey() {
Runnable errorFn = () -> logger().error("{}", externalSystemCustomLog);
Runnable warnFn = () -> logger().warn("{}", externalSystemCustomLog);
Runnable infoFn = () -> logger().info("{}", externalSystemCustomLog);
public void testLoggerWithScalaCustomLogKey() {
Runnable errorFn = () -> logger().error("{}", scalaCustomLogMDC);
Runnable warnFn = () -> logger().warn("{}", scalaCustomLogMDC);
Runnable infoFn = () -> logger().info("{}", scalaCustomLogMDC);
List.of(
Pair.of(Level.ERROR, errorFn),
Pair.of(Level.WARN, warnFn),
Pair.of(Level.INFO, infoFn)).forEach(pair -> {
try {
assertTrue(captureLogOutput(pair.getRight()).matches(
expectedPatternForScalaCustomLogKey(pair.getLeft())));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}

@Test
public void testLoggerWithJavaCustomLogKey() {
Runnable errorFn = () -> logger().error("{}", javaCustomLogMDC);
Runnable warnFn = () -> logger().warn("{}", javaCustomLogMDC);
Runnable infoFn = () -> logger().info("{}", javaCustomLogMDC);
List.of(
Pair.of(Level.ERROR, errorFn),
Pair.of(Level.WARN, warnFn),
Pair.of(Level.INFO, infoFn)).forEach(pair -> {
try {
assertTrue(captureLogOutput(pair.getRight()).matches(
expectedPatternForExternalSystemCustomLogKey(pair.getLeft())));
expectedPatternForJavaCustomLogKey(pair.getLeft())));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
}

class JavaCustomLogKeys {
// Custom `LogKey` must be `implements LogKey`
public static class CUSTOM_LOG_KEY implements LogKey { }

// Singleton
public static final CUSTOM_LOG_KEY CUSTOM_LOG_KEY = new CUSTOM_LOG_KEY();
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,28 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) {
}

@Override
String expectedPatternForExternalSystemCustomLogKey(Level level) {
String expectedPatternForScalaCustomLogKey(Level level) {
return compactAndToRegexPattern(level, """
{
"ts": "<timestamp>",
"level": "<level>",
"msg": "External system custom log message.",
"msg": "Scala custom log message.",
"context": {
"custom_log_key": "External system custom log message."
"custom_log_key": "Scala custom log message."
},
"logger": "<className>"
}""");
}

@Override
String expectedPatternForJavaCustomLogKey(Level level) {
return compactAndToRegexPattern(level, """
{
"ts": "<timestamp>",
"level": "<level>",
"msg": "Java custom log message.",
"context": {
"custom_log_key": "Java custom log message."
},
"logger": "<className>"
}""");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ class PatternLoggingSuite extends LoggingSuiteBase with BeforeAndAfterAll {
override def expectedPatternForMsgWithMDCAndException(level: Level): String =
s""".*$level $className: Error in executor 1.\njava.lang.RuntimeException: OOM\n[\\s\\S]*"""

override def expectedPatternForExternalSystemCustomLogKey(level: Level): String = {
s""".*$level $className: External system custom log message.\n"""
override def expectedPatternForCustomLogKey(level: Level): String = {
s""".*$level $className: Custom log message.\n"""
}

override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = {
Expand Down
Loading