From dd9d5b91d7aa30e4a000d5370f09dc99378891dc Mon Sep 17 00:00:00 2001 From: Shikanime Deva Date: Mon, 19 Jul 2021 13:56:30 +0200 Subject: [PATCH] Fix get_type with comments between WITH keyword --- sqlparse/sql.py | 27 ++++++++++++++------------- tests/test_regressions.py | 9 +++++++++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 586cd216..1ccfbdbe 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -413,27 +413,28 @@ def get_type(self): Whitespaces and comments at the beginning of the statement are ignored. """ - first_token = self.token_first(skip_cm=True) - if first_token is None: + token = self.token_first(skip_cm=True) + if token is None: # An "empty" statement that either has not tokens at all # or only whitespace tokens. return 'UNKNOWN' - elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): - return first_token.normalized + elif token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return token.normalized - elif first_token.ttype == T.Keyword.CTE: + elif token.ttype == T.Keyword.CTE: # The WITH keyword should be followed by either an Identifier or # an IdentifierList containing the CTE definitions; the actual # DML keyword (e.g. SELECT, INSERT) will follow next. - fidx = self.token_index(first_token) - tidx, token = self.token_next(fidx, skip_ws=True) - if isinstance(token, (Identifier, IdentifierList)): - _, dml_keyword = self.token_next(tidx, skip_ws=True) - - if dml_keyword is not None \ - and dml_keyword.ttype == T.Keyword.DML: - return dml_keyword.normalized + tidx = self.token_index(token) + while tidx is not None: + tidx, token = self.token_next(tidx, skip_ws=True) + if isinstance(token, (Identifier, IdentifierList)): + tidx, token = self.token_next(tidx, skip_ws=True) + + if token is not None \ + and token.ttype == T.Keyword.DML: + return token.normalized # Hmm, probably invalid syntax, so return unknown. return 'UNKNOWN' diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 4ffc69f3..bc8b7dd3 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -427,3 +427,12 @@ def test_splitting_at_and_backticks_issue588(): 'grant foo to user1@`myhost`; grant bar to user1@`myhost`;') assert len(splitted) == 2 assert splitted[-1] == 'grant bar to user1@`myhost`;' + + +def test_comment_between_cte_clauses_issue632(): + p, = sqlparse.parse(""" + WITH foo AS (), + -- A comment before baz subquery + baz AS () + SELECT * FROM baz;""") + assert p.get_type() == "SELECT"