diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index f387618bfaec12..bf28ab0e7b229b 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -66,6 +66,7 @@ "SQL_LINEAGE_TIMEOUT_ENABLED", True ) SQL_LINEAGE_TIMEOUT_SECONDS = 10 +SQL_PARSER_TRACE = get_boolean_env_variable("DATAHUB_SQL_PARSER_TRACE", False) # These rules are a subset of the rules in sqlglot.optimizer.optimizer.RULES. @@ -365,10 +366,11 @@ def _sqlglot_force_column_normalizer( return node - # logger.debug( - # "Prior to case normalization sql %s", - # statement.sql(pretty=True, dialect=dialect), - # ) + if SQL_PARSER_TRACE: + logger.debug( + "Prior to case normalization sql %s", + statement.sql(pretty=True, dialect=dialect), + ) statement = statement.transform(_sqlglot_force_column_normalizer, copy=False) # logger.debug( # "Sql after casing normalization %s", @@ -562,7 +564,7 @@ def _select_statement_cll( # noqa: C901 ) ) - # TODO: Also extract referenced columns (aka auxillary / non-SELECT lineage) + # TODO: Also extract referenced columns (aka auxiliary / non-SELECT lineage) except (sqlglot.errors.OptimizeError, ValueError, IndexError) as e: raise SqlUnderstandingError( f"sqlglot failed to compute some lineage: {e}" @@ -1022,6 +1024,14 @@ def _sqlglot_lineage_inner( logger.debug( f"Resolved {total_schemas_resolved} of {total_tables_discovered} table schemas" ) + if SQL_PARSER_TRACE: + for qualified_table, schema_info in table_name_schema_mapping.items(): + logger.debug( + "Table name %s resolved to %s with schema %s", + qualified_table, + table_name_urn_mapping[qualified_table], + schema_info, + ) column_lineage: Optional[List[_ColumnLineageInfo]] = None try: