Skip to content

Commit

Permalink
fix(ingest): better logging line attribution (#9876)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Feb 21, 2024
1 parent f13ae77 commit b15b352
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 4 deletions.
6 changes: 3 additions & 3 deletions metadata-ingestion/src/datahub/ingestion/source/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ def _authenticate(self) -> None:
logger.info("Authenticated to Tableau server")
# Note that we're not catching ConfigurationError, since we want that to throw.
except ValueError as e:
self.report.report_failure(
self.report.failure(
key="tableau-login",
reason=str(e),
)
Expand Down Expand Up @@ -810,7 +810,7 @@ def get_connection_object_page(
error and (error.get(c.EXTENSIONS) or {}).get(c.SEVERITY) == c.WARNING
for error in errors
):
self.report.report_warning(key=connection_type, reason=f"{errors}")
self.report.warning(key=connection_type, reason=f"{errors}")
else:
raise RuntimeError(f"Query {connection_type} error: {errors}")

Expand Down Expand Up @@ -2555,7 +2555,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
if self.database_tables:
yield from self.emit_upstream_tables()
except MetadataQueryException as md_exception:
self.report.report_failure(
self.report.failure(
key="tableau-metadata",
reason=f"Unable to retrieve metadata from tableau. Information: {str(md_exception)}",
)
Expand Down
54 changes: 53 additions & 1 deletion metadata-ingestion/src/datahub/utilities/logging_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import contextlib
import logging
import os
import pathlib
import sys
from typing import Deque, Iterator, Optional

Expand All @@ -23,7 +24,7 @@
from datahub.utilities.tee_io import TeeIO

BASE_LOGGING_FORMAT = (
"[%(asctime)s] %(levelname)-8s {%(filename)s:%(lineno)d} - %(message)s"
"[%(asctime)s] %(levelname)-8s {%(name)s:%(lineno)d} - %(message)s"
)
DATAHUB_PACKAGES = [
"datahub",
Expand All @@ -36,6 +37,56 @@
NO_COLOR = os.environ.get("NO_COLOR", False)


def extract_name_from_filename(filename: str, fallback_name: str) -> str:
"""Guess the module path from the filename.
Because the logger name may not be the same as the package path (e.g. when using stacklevel),
we do a best-effort attempt to extract the module name from the filename.
>>> extract_name_from_filename("/datahub-ingestion/.local/lib/python3.10/site-packages/datahub/configuration/common.py", "bad")
'datahub.configuration.common'
>>> extract_name_from_filename("/home/user/datahub/metadata-ingestion/src/datahub/telemetry/telemetry.py", "bad")
'datahub.telemetry.telemetry'
>>> extract_name_from_filename("/this/is/not/a/normal/path.py", "fallback.package")
'fallback.package'
Args:
filename: The filename of the module.
fallback_name: The name to use if we can't guess the module.
Returns:
The guessed module name.
"""

with contextlib.suppress(Exception):
# Split the path into components
path_parts = list(pathlib.Path(filename).parts)

# Remove the .py extension from the last part
if path_parts[-1].endswith(".py"):
path_parts[-1] = path_parts[-1][:-3]

# If we're in a site-packages directory, we want to use the package name as the top-level module.
if "site-packages" in path_parts:
# Find the index of 'site-packages' in the path
site_packages_index = path_parts.index("site-packages")
# Join the parts from 'site-packages' onwards with '.'
return ".".join(path_parts[site_packages_index + 1 :])

# We're probably in a development environment, so take everything after 'metadata-ingestion'
metadata_ingestion_index = next(
(i for i, part in enumerate(path_parts) if "metadata-ingestion" in part),
None,
)
if metadata_ingestion_index is not None:
# Join the parts from 'metadata-ingestion/src' onwards with '.'
return ".".join(path_parts[metadata_ingestion_index + 2 :])

return fallback_name


class _ColorLogFormatter(logging.Formatter):
# Adapted from https://stackoverflow.com/a/56944256/3638629.

Expand All @@ -51,6 +102,7 @@ def __init__(self) -> None:
super().__init__(BASE_LOGGING_FORMAT)

def formatMessage(self, record: logging.LogRecord) -> str:
record.name = extract_name_from_filename(record.pathname, record.name)
if not NO_COLOR and sys.stderr.isatty():
return self._formatMessageColor(record)
else:
Expand Down
13 changes: 13 additions & 0 deletions metadata-ingestion/tests/unit/test_utilities.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import doctest

from datahub.utilities.delayed_iter import delayed_iter
from datahub.utilities.sql_parser import SqlLineageSQLParser

Expand Down Expand Up @@ -282,3 +284,14 @@ def test_sqllineage_sql_parser_tables_with_special_names():
]
assert sorted(SqlLineageSQLParser(sql_query).get_tables()) == expected_tables
assert sorted(SqlLineageSQLParser(sql_query).get_columns()) == expected_columns


def test_logging_name_extraction():
import datahub.utilities.logging_manager

assert (
doctest.testmod(
datahub.utilities.logging_manager, raise_on_error=True
).attempted
> 0
)

0 comments on commit b15b352

Please sign in to comment.