From fa6a533bc2d0d8b24c69452eb515446131dccf55 Mon Sep 17 00:00:00 2001 From: sanggusti Date: Tue, 14 Oct 2025 11:22:21 +0700 Subject: [PATCH 1/4] Feat: configure logging. Add support for easy logging setup with sensible defaults Fixes #685 --- src/litdata/CHANGELOG.md | 3 +++ src/litdata/__init__.py | 3 +++ src/litdata/debugger.py | 56 ++++++++++++++++++++++++++++++++++++++++ tests/test_debugger.py | 36 +++++++++++++++++++++++++- 4 files changed, 97 insertions(+), 1 deletion(-) diff --git a/src/litdata/CHANGELOG.md b/src/litdata/CHANGELOG.md index fd3928610..0af1eeadc 100644 --- a/src/litdata/CHANGELOG.md +++ b/src/litdata/CHANGELOG.md @@ -11,6 +11,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added - Introduced `CHANGELOG.md` to track changes across releases ([#733](https://github.com/lightning-ai/litdata/pull/733)) +- Added `configure_logging` to `src/litdata/debugger.py` ([#685](https://github.com/Lightning-AI/litData/issues/685)) +- Added necessary test for `configure_logging` at `tests/test_debugger.py` ([#685](https://github.com/Lightning-AI/litData/issues/685)) +- Added `configure_logging` import to `src/litdata/__init__.py` ([#685](https://github.com/Lightning-AI/litData/issues/685)) ### Changed diff --git a/src/litdata/__init__.py b/src/litdata/__init__.py index e46acecda..7a9eedd19 100644 --- a/src/litdata/__init__.py +++ b/src/litdata/__init__.py @@ -25,6 +25,9 @@ from litdata.utilities.breakpoint import breakpoint from litdata.utilities.hf_dataset import index_hf_dataset from litdata.utilities.train_test_split import train_test_split +from litdata.debugger import configure_logging + +configure_logging() warnings.filterwarnings( "ignore", diff --git a/src/litdata/debugger.py b/src/litdata/debugger.py index bc2b1e6c5..321dd0044 100644 --- a/src/litdata/debugger.py +++ b/src/litdata/debugger.py @@ -14,12 +14,17 @@ import logging import os import re +import sys import threading import time from functools import lru_cache +from typing import TYPE_CHECKING, Any, TextIO, Union from litdata.utilities.env import _DistributedEnv, _is_in_dataloader_worker, _WorkerEnv +_DEFAULT_LOG_FORMAT = ( + "%(asctime)s - %(processName)s[%(process)d] - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s" +) class TimedFlushFileHandler(logging.FileHandler): """FileHandler that flushes every N seconds in a background thread.""" @@ -79,6 +84,57 @@ def get_logger_level(level: str) -> int: raise ValueError(f"Invalid log level: {level}") +def _get_default_handler(stream, format): + handler = logging.StreamHandler(stream) + formatter = logging.Formatter(format) + handler.setFormatter(formatter) + return handler + + +def configure_logging( + level: Union[str, int] = logging.INFO, + format: str = _DEFAULT_LOG_FORMAT, + stream: TextIO = sys.stdout, + use_rich: bool = False, +): + """Configure logging for the entire library with sensible defaults. + + Args: + level (int): Logging level (default: logging.INFO) + format (str): Log message format string + stream (file-like): Output stream for logs + use_rich (bool): Makes the logs more readable by using rich, useful for debugging. Defaults to False. + + """ + if isinstance(level, str): + level = level.upper() + level = getattr(logging, level) + + # Clear any existing handlers to prevent duplicates + library_logger = logging.getLogger("litdata") + for handler in library_logger.handlers[:]: + library_logger.removeHandler(handler) + + if use_rich: + try: + from rich.logging import RichHandler + from rich.traceback import install + + install(show_locals=True) + handler = RichHandler(rich_tracebacks=True, show_time=True, show_path=True) + except ImportError: + logging.warning("Rich is not installed, using default logging") + handler = _get_default_handler(stream, format) + else: + handler = _get_default_handler(stream, format) + + # Configure library logger + library_logger.setLevel(level) + library_logger.addHandler(handler) + library_logger.propagate = False + pass + + class LitDataLogger: _instance = None _lock = threading.Lock() diff --git a/tests/test_debugger.py b/tests/test_debugger.py index d90303bde..829a2ef74 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -1,7 +1,13 @@ +import io import logging - import pytest +from unittest import mock +from litdata.debugger import configure_logging + +@pytest.fixture +def log_stream(): + return io.StringIO() def test_get_logger_level(): from litdata.debugger import get_logger_level @@ -13,3 +19,31 @@ def test_get_logger_level(): assert get_logger_level("CRITICAL") == logging.CRITICAL with pytest.raises(ValueError, match="Invalid log level"): get_logger_level("INVALID") + + +def test_configure_logging(log_stream): + # Configure logging with test stream + configure_logging(level=logging.DEBUG, stream=log_stream) + + # Get logger and log a test message + logger = logging.getLogger("litdata") + test_message = "Test debug message" + logger.debug(test_message) + + # Verify log output + log_contents = log_stream.getvalue() + assert test_message in log_contents + assert "DEBUG" in log_contents + assert logger.propagate is False + + +def test_configure_logging_2(): + configure_logging(use_rich=False) + assert logging.getLogger("litdata").handlers[0].__class__.__name__ == "StreamHandler" + + +def test_configure_logging_rich_not_installed(): + # patch builtins.__import__ to raise ImportError + with mock.patch("builtins.__import__", side_effect=ImportError): + configure_logging(use_rich=True) + assert logging.getLogger("litdata").handlers[0].__class__.__name__ == "StreamHandler" From dff4a5fec9a53aeab66d6ab864e35d975c36eac4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Oct 2025 04:26:20 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/litdata/__init__.py | 2 +- tests/test_debugger.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/litdata/__init__.py b/src/litdata/__init__.py index 7a9eedd19..34f7f420f 100644 --- a/src/litdata/__init__.py +++ b/src/litdata/__init__.py @@ -14,6 +14,7 @@ from litdata.__about__ import * # noqa: F403 from litdata.constants import _LIGHTNING_SDK_AVAILABLE +from litdata.debugger import configure_logging from litdata.processing.functions import map, merge_datasets, optimize, walk from litdata.raw.dataset import StreamingRawDataset from litdata.streaming.combined import CombinedStreamingDataset @@ -25,7 +26,6 @@ from litdata.utilities.breakpoint import breakpoint from litdata.utilities.hf_dataset import index_hf_dataset from litdata.utilities.train_test_split import train_test_split -from litdata.debugger import configure_logging configure_logging() diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 829a2ef74..fa43f1cee 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -1,14 +1,17 @@ import io import logging +from unittest import mock + import pytest -from unittest import mock from litdata.debugger import configure_logging + @pytest.fixture def log_stream(): return io.StringIO() + def test_get_logger_level(): from litdata.debugger import get_logger_level From ce95ed09f27a86d1f7da63a0ff856bb3f6ed5462 Mon Sep 17 00:00:00 2001 From: sanggusti Date: Tue, 14 Oct 2025 23:00:26 +0700 Subject: [PATCH 3/4] Remove eager call import-time `configure_logging` from `__init__.py` --- src/litdata/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/litdata/__init__.py b/src/litdata/__init__.py index 34f7f420f..d43c0001e 100644 --- a/src/litdata/__init__.py +++ b/src/litdata/__init__.py @@ -14,7 +14,6 @@ from litdata.__about__ import * # noqa: F403 from litdata.constants import _LIGHTNING_SDK_AVAILABLE -from litdata.debugger import configure_logging from litdata.processing.functions import map, merge_datasets, optimize, walk from litdata.raw.dataset import StreamingRawDataset from litdata.streaming.combined import CombinedStreamingDataset @@ -27,7 +26,6 @@ from litdata.utilities.hf_dataset import index_hf_dataset from litdata.utilities.train_test_split import train_test_split -configure_logging() warnings.filterwarnings( "ignore", From 421cef34e2c463ad181a5cd51d4523ca32bd6f05 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:00:41 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/litdata/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/litdata/__init__.py b/src/litdata/__init__.py index d43c0001e..e46acecda 100644 --- a/src/litdata/__init__.py +++ b/src/litdata/__init__.py @@ -26,7 +26,6 @@ from litdata.utilities.hf_dataset import index_hf_dataset from litdata.utilities.train_test_split import train_test_split - warnings.filterwarnings( "ignore", message=r"A newer version of lightning-sdk.*",