From 4f6bddcd7436032c7228dce3c62d5e5dd12bf03b Mon Sep 17 00:00:00 2001 From: Kohei Matsumoto Date: Tue, 17 Dec 2024 14:18:38 +0900 Subject: [PATCH] Update mask_literals_with_sqlparse function to handle multiple SQL statements --- .../src/metadata/ingestion/lineage/masker.py | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/ingestion/src/metadata/ingestion/lineage/masker.py b/ingestion/src/metadata/ingestion/lineage/masker.py index 49a5999e72c6..6962aa652624 100644 --- a/ingestion/src/metadata/ingestion/lineage/masker.py +++ b/ingestion/src/metadata/ingestion/lineage/masker.py @@ -38,11 +38,11 @@ def mask_literals_with_sqlparse(query: str): logger = get_logger() try: - parsed = sqlparse.parse(query) # Parse the query + # Parse all statements in the query + parsed_statements = sqlparse.parse(query) - if not parsed: + if not parsed_statements: return query - parsed = parsed[0] def mask_token(token): # Mask all literals: strings, numbers, or other literal values @@ -61,17 +61,21 @@ def mask_token(token): for t in token.tokens: mask_token(t) - # Process all tokens - for token in parsed.tokens: - if isinstance(token, Comparison): - # In comparisons, mask both sides if literals - for t in token.tokens: - mask_token(t) - else: - mask_token(token) + # Process each statement + masked_statements = [] + for statement in parsed_statements: + for token in statement.tokens: + if isinstance(token, Comparison): + # In comparisons, mask both sides if literals + for t in token.tokens: + mask_token(t) + else: + mask_token(token) + masked_statements.append(str(statement)) + + # Reconstruct the query with masked literals + return "".join(masked_statements) - # Return the formatted masked query - return str(parsed) except Exception as exc: logger.debug(f"Failed to mask query with sqlparse: {exc}") logger.debug(traceback.format_exc())