diff --git a/ingestion/src/metadata/cli/app.py b/ingestion/src/metadata/cli/app.py index 7bd888401944..b42eb136219b 100644 --- a/ingestion/src/metadata/cli/app.py +++ b/ingestion/src/metadata/cli/app.py @@ -32,6 +32,7 @@ def run_app(config_path: Path) -> None: try: config_dict = load_config_file(config_path) + # no logging for config because apps might have custom secrets workflow = ApplicationWorkflow.create(config_dict) except Exception as exc: logger.error(f"Error running the application {exc}") diff --git a/ingestion/src/metadata/cli/dataquality.py b/ingestion/src/metadata/cli/dataquality.py index 4433b274d94b..6b472bee5616 100644 --- a/ingestion/src/metadata/cli/dataquality.py +++ b/ingestion/src/metadata/cli/dataquality.py @@ -20,7 +20,7 @@ from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import ( PipelineType, ) -from metadata.utils.logger import cli_logger +from metadata.utils.logger import cli_logger, redacted_config from metadata.workflow.data_quality import TestSuiteWorkflow from metadata.workflow.workflow_init_error_handler import WorkflowInitErrorHandler @@ -37,6 +37,9 @@ def run_test(config_path: Path) -> None: workflow_config_dict = None try: workflow_config_dict = load_config_file(config_path) + logger.debug( + "Using workflow config:\n%s", redacted_config(workflow_config_dict) + ) workflow = TestSuiteWorkflow.create(workflow_config_dict) except Exception as exc: logger.debug(traceback.format_exc()) diff --git a/ingestion/src/metadata/cli/ingest.py b/ingestion/src/metadata/cli/ingest.py index e52b7354076b..68fbd12c6a91 100644 --- a/ingestion/src/metadata/cli/ingest.py +++ b/ingestion/src/metadata/cli/ingest.py @@ -20,7 +20,7 @@ from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import ( PipelineType, ) -from metadata.utils.logger import cli_logger +from metadata.utils.logger import cli_logger, redacted_config from metadata.workflow.metadata import MetadataWorkflow from metadata.workflow.workflow_init_error_handler import WorkflowInitErrorHandler @@ -37,6 +37,7 @@ def run_ingest(config_path: Path) -> None: config_dict = None try: config_dict = load_config_file(config_path) + logger.debug("Using workflow config:\n%s", redacted_config(config_dict)) workflow = MetadataWorkflow.create(config_dict) except Exception as exc: logger.debug(traceback.format_exc()) diff --git a/ingestion/src/metadata/cli/lineage.py b/ingestion/src/metadata/cli/lineage.py index f2246fa1cfaa..3d79cc98beff 100644 --- a/ingestion/src/metadata/cli/lineage.py +++ b/ingestion/src/metadata/cli/lineage.py @@ -27,7 +27,7 @@ from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.utils.constants import UTF_8 -from metadata.utils.logger import cli_logger +from metadata.utils.logger import cli_logger, redacted_config from metadata.workflow.workflow_init_error_handler import WorkflowInitErrorHandler logger = cli_logger() @@ -52,6 +52,7 @@ def run_lineage(config_path: Path) -> None: config_dict = None try: config_dict = load_config_file(config_path) + logger.debug("Using workflow config:\n%s", redacted_config(config_dict)) workflow = LineageWorkflow.model_validate(config_dict) except Exception as exc: diff --git a/ingestion/src/metadata/cli/profile.py b/ingestion/src/metadata/cli/profile.py index 51053f8f6311..000fdf96fc99 100644 --- a/ingestion/src/metadata/cli/profile.py +++ b/ingestion/src/metadata/cli/profile.py @@ -20,7 +20,7 @@ from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import ( PipelineType, ) -from metadata.utils.logger import cli_logger +from metadata.utils.logger import cli_logger, redacted_config from metadata.workflow.profiler import ProfilerWorkflow from metadata.workflow.workflow_init_error_handler import WorkflowInitErrorHandler @@ -37,6 +37,9 @@ def run_profiler(config_path: Path) -> None: workflow_config_dict = None try: workflow_config_dict = load_config_file(config_path) + logger.debug( + "Using workflow config:\n%s", redacted_config(workflow_config_dict) + ) workflow = ProfilerWorkflow.create(workflow_config_dict) except Exception as exc: logger.debug(traceback.format_exc()) diff --git a/ingestion/src/metadata/cli/usage.py b/ingestion/src/metadata/cli/usage.py index b4a969939f2c..ebbffe8a5e9b 100644 --- a/ingestion/src/metadata/cli/usage.py +++ b/ingestion/src/metadata/cli/usage.py @@ -20,7 +20,7 @@ from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import ( PipelineType, ) -from metadata.utils.logger import cli_logger +from metadata.utils.logger import cli_logger, redacted_config from metadata.workflow.usage import UsageWorkflow from metadata.workflow.workflow_init_error_handler import WorkflowInitErrorHandler @@ -37,6 +37,7 @@ def run_usage(config_path: Path) -> None: config_dict = None try: config_dict = load_config_file(config_path) + logger.debug("Using workflow config:\n%s", redacted_config(config_dict)) workflow = UsageWorkflow.create(config_dict) except Exception as exc: logger.debug(traceback.format_exc()) diff --git a/ingestion/src/metadata/utils/logger.py b/ingestion/src/metadata/utils/logger.py index 15f724a287bf..aadc0fafb2fe 100644 --- a/ingestion/src/metadata/utils/logger.py +++ b/ingestion/src/metadata/utils/logger.py @@ -13,10 +13,11 @@ """ import logging +from copy import deepcopy from enum import Enum from functools import singledispatch from types import DynamicClassAttribute -from typing import Optional, Union +from typing import Dict, Optional, Union from metadata.data_quality.api.models import ( TableAndTests, @@ -37,6 +38,8 @@ ) logging.basicConfig(format=BASE_LOGGING_FORMAT, datefmt="%Y-%m-%d %H:%M:%S") +REDACTED_KEYS = {"serviceConnection", "securityConfig"} + class Loggers(Enum): """ @@ -260,3 +263,21 @@ def _(record: OMetaPipelineStatus) -> str: def _(record: PatchRequest) -> str: """Get the log of the new entity""" return get_log_name(record.new_entity) + + +def redacted_config(config: Dict[str, Union[str, dict]]) -> Dict[str, Union[str, dict]]: + config_copy = deepcopy(config) + + def traverse_and_modify(obj): + if isinstance(obj, dict): + for key, value in obj.items(): + if key in REDACTED_KEYS: + obj[key] = "REDACTED" + else: + traverse_and_modify(value) + elif isinstance(obj, list): + for item in obj: + traverse_and_modify(item) + + traverse_and_modify(config_copy) + return config_copy diff --git a/ingestion/tests/unit/utils/test_logger.py b/ingestion/tests/unit/utils/test_logger.py new file mode 100644 index 000000000000..432e220161e9 --- /dev/null +++ b/ingestion/tests/unit/utils/test_logger.py @@ -0,0 +1,31 @@ +from metadata.utils.logger import redacted_config + + +def test_safe_config_logger(): + example_obj = { + "serviceConnection": "some_value", + "securityConfig": "another_value", + "nested": { + "serviceConnection": "another_value", + "list": [ + {"serviceConnection": "value_in_list"}, + {"otherField": "other_value"}, + {"securityConfig": "security_value"}, + ], + }, + } + + result = redacted_config(example_obj) + expected = { + "serviceConnection": "REDACTED", + "securityConfig": "REDACTED", + "nested": { + "serviceConnection": "REDACTED", + "list": [ + {"serviceConnection": "REDACTED"}, + {"otherField": "other_value"}, + {"securityConfig": "REDACTED"}, + ], + }, + } + assert result == expected