Skip to content

Commit

Permalink
Fix(optimizer)!: treat LEVEL column in CONNECT BY queries as an ident…
Browse files Browse the repository at this point in the history
…ifier (#4627)

* Fix(optimizer)!: treat LEVEL column in CONNECT BY queries as an identifier

* Generate WHERE before CONNECT clause
  • Loading branch information
georgesittas authored Jan 16, 2025
1 parent 9cbd5ef commit 59d886d
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 18 deletions.
2 changes: 1 addition & 1 deletion sqlglot/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2491,11 +2491,11 @@ def query_modifiers(self, expression: exp.Expression, *sqls: str) -> str:
return csv(
*sqls,
*[self.sql(join) for join in expression.args.get("joins") or []],
self.sql(expression, "connect"),
self.sql(expression, "match"),
*[self.sql(lateral) for lateral in expression.args.get("laterals") or []],
self.sql(expression, "prewhere"),
self.sql(expression, "where"),
self.sql(expression, "connect"),
self.sql(expression, "group"),
self.sql(expression, "having"),
*[gen(self, expression) for gen in self.AFTER_HAVING_MODIFIER_TRANSFORMS.values()],
Expand Down
28 changes: 15 additions & 13 deletions sqlglot/optimizer/qualify_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,21 @@ def qualify_columns(
dialect = Dialect.get_or_raise(schema.dialect)
pseudocolumns = dialect.PSEUDOCOLUMNS

snowflake_or_oracle = dialect in ("oracle", "snowflake")

for scope in traverse_scope(expression):
scope_expression = scope.expression
is_select = isinstance(scope_expression, exp.Select)

if is_select and snowflake_or_oracle and scope_expression.args.get("connect"):
# In Snowflake / Oracle queries that have a CONNECT BY clause, one can use the LEVEL
# pseudocolumn, which doesn't belong to a table, so we change it into an identifier
scope_expression.transform(
lambda n: n.this if isinstance(n, exp.Column) and n.name == "LEVEL" else n,
copy=False,
)
scope.clear_cache()

resolver = Resolver(scope, schema, infer_schema=infer_schema)
_pop_table_column_aliases(scope.ctes)
_pop_table_column_aliases(scope.derived_tables)
Expand All @@ -76,7 +90,7 @@ def qualify_columns(
if not schema.empty and expand_alias_refs:
_expand_alias_refs(scope, resolver, dialect)

if isinstance(scope.expression, exp.Select):
if is_select:
if expand_stars:
_expand_stars(
scope,
Expand Down Expand Up @@ -898,22 +912,10 @@ def get_source_columns(self, name: str, only_visible: bool = False) -> t.Sequenc
for (name, alias) in itertools.zip_longest(columns, column_aliases)
]

pseudocolumns = self._get_source_pseudocolumns(name)
if pseudocolumns:
columns = list(columns)
columns.extend(c for c in pseudocolumns if c not in columns)

self._get_source_columns_cache[cache_key] = columns

return self._get_source_columns_cache[cache_key]

def _get_source_pseudocolumns(self, name: str) -> t.Sequence[str]:
if self.schema.dialect == "snowflake" and self.scope.expression.args.get("connect"):
# When there is a CONNECT BY clause, there is only one table being scanned
# See: https://docs.snowflake.com/en/sql-reference/constructs/connect-by
return ["LEVEL"]
return []

def _get_all_source_columns(self) -> t.Dict[str, t.Sequence[str]]:
if self._source_columns is None:
self._source_columns = {
Expand Down
6 changes: 3 additions & 3 deletions tests/dialects/test_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,10 +513,10 @@ def test_connect_by(self):
LEVEL,
SYS_CONNECT_BY_PATH(last_name, '/') AS "Path"
FROM employees
START WITH last_name = 'King'
CONNECT BY PRIOR employee_id = manager_id AND LEVEL <= 4
WHERE
level <= 3 AND department_id = 80"""
level <= 3 AND department_id = 80
START WITH last_name = 'King'
CONNECT BY PRIOR employee_id = manager_id AND LEVEL <= 4"""

for query in (f"{body}{start}{connect}", f"{body}{connect}{start}"):
self.validate_identity(query, pretty, pretty=True)
Expand Down
27 changes: 26 additions & 1 deletion tests/fixtures/optimizer/qualify_columns.sql
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,32 @@ START WITH title = 'President'
CONNECT BY manager_ID = PRIOR employee_id
ORDER BY
employee_ID NULLS LAST;
WITH EMPLOYEES AS (SELECT T.TITLE AS TITLE, T.EMPLOYEE_ID AS EMPLOYEE_ID, T.MANAGER_ID AS MANAGER_ID FROM (VALUES ('President', 1, NULL), ('Vice President Engineering', 10, 1), ('Programmer', 100, 10), ('QA Engineer', 101, 10), ('Vice President HR', 20, 1), ('Health Insurance Analyst', 200, 20)) AS T(TITLE, EMPLOYEE_ID, MANAGER_ID)) SELECT EMPLOYEES.EMPLOYEE_ID AS EMPLOYEE_ID, EMPLOYEES.MANAGER_ID AS MANAGER_ID, EMPLOYEES.TITLE AS TITLE, EMPLOYEES.LEVEL AS LEVEL FROM EMPLOYEES AS EMPLOYEES START WITH EMPLOYEES.TITLE = 'President' CONNECT BY EMPLOYEES.MANAGER_ID = PRIOR EMPLOYEES.EMPLOYEE_ID ORDER BY EMPLOYEE_ID;
WITH EMPLOYEES AS (SELECT T.TITLE AS TITLE, T.EMPLOYEE_ID AS EMPLOYEE_ID, T.MANAGER_ID AS MANAGER_ID FROM (VALUES ('President', 1, NULL), ('Vice President Engineering', 10, 1), ('Programmer', 100, 10), ('QA Engineer', 101, 10), ('Vice President HR', 20, 1), ('Health Insurance Analyst', 200, 20)) AS T(TITLE, EMPLOYEE_ID, MANAGER_ID)) SELECT EMPLOYEES.EMPLOYEE_ID AS EMPLOYEE_ID, EMPLOYEES.MANAGER_ID AS MANAGER_ID, EMPLOYEES.TITLE AS TITLE, LEVEL AS LEVEL FROM EMPLOYEES AS EMPLOYEES START WITH EMPLOYEES.TITLE = 'President' CONNECT BY EMPLOYEES.MANAGER_ID = PRIOR EMPLOYEES.EMPLOYEE_ID ORDER BY EMPLOYEE_ID;

# execute: false
# dialect: oracle
WITH
t1 AS (
SELECT
1 AS c1,
1 AS c2,
'Y' AS TOP_PARENT_INDICATOR,
1 AS id
FROM DUAL
),
t2 AS (
SELECT
1 AS c2,
2 AS id
FROM DUAL
)
SELECT t1.c1
FROM t1
LEFT JOIN t2 ON t1.c2 = t2.c2
WHERE (t1.TOP_PARENT_INDICATOR = 'Y' OR LEVEL = 1)
START WITH (t1.id IS NOT NULL)
CONNECT BY PRIOR t1.id = t2.id;
WITH T1 AS (SELECT 1 AS C1, 1 AS C2, 'Y' AS TOP_PARENT_INDICATOR, 1 AS ID FROM DUAL DUAL), T2 AS (SELECT 1 AS C2, 2 AS ID FROM DUAL DUAL) SELECT T1.C1 AS C1 FROM T1 T1 LEFT JOIN T2 T2 ON T1.C2 = T2.C2 WHERE (T1.TOP_PARENT_INDICATOR = 'Y' OR LEVEL = 1) START WITH (NOT T1.ID IS NULL) CONNECT BY PRIOR T1.ID = T2.ID;

--------------------------------------
-- Derived tables
Expand Down

0 comments on commit 59d886d

Please sign in to comment.