Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for semicolon stripping to DbApiHook, PrestoHook, and TrinoHook #41916

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev/breeze/src/airflow_breeze/global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ def get_airflow_extras():
# END OF EXTRAS LIST UPDATED BY PRE COMMIT
]

CHICKEN_EGG_PROVIDERS = " ".join(["standard amazon"])
CHICKEN_EGG_PROVIDERS = " ".join(["standard amazon common.sql"])


BASE_PROVIDERS_COMPATIBILITY_CHECKS: list[dict[str, str | list[str]]] = [
Expand Down
4 changes: 2 additions & 2 deletions dev/breeze/tests/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def test_get_documentation_package_path():
"postgres",
"beta0",
"""
"apache-airflow-providers-common-sql>=1.17.0b0",
"apache-airflow-providers-common-sql>=1.20.0b0",
"apache-airflow>=2.8.0b0",
"psycopg2-binary>=2.9.4",
""",
Expand All @@ -219,7 +219,7 @@ def test_get_documentation_package_path():
"postgres",
"",
"""
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"psycopg2-binary>=2.9.4",
""",
Expand Down
52 changes: 26 additions & 26 deletions generated/provider_dependencies.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"deps": [
"PyAthena>=3.0.10",
"apache-airflow-providers-common-compat>=1.2.1",
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow-providers-http",
"apache-airflow>=2.8.0",
"asgiref>=2.3.0",
Expand Down Expand Up @@ -102,7 +102,7 @@
},
"apache.drill": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"sqlalchemy-drill>=1.1.0"
],
Expand All @@ -116,7 +116,7 @@
},
"apache.druid": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pydruid>=0.4.1"
],
Expand Down Expand Up @@ -159,7 +159,7 @@
},
"apache.hive": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"hmsclient>=0.1.0",
"jmespath>=0.7.0",
Expand Down Expand Up @@ -201,7 +201,7 @@
},
"apache.impala": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"impyla>=0.18.0,<1.0"
],
Expand Down Expand Up @@ -265,7 +265,7 @@
},
"apache.pinot": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pinotdb>=5.1.0"
],
Expand Down Expand Up @@ -434,7 +434,7 @@
"databricks": {
"deps": [
"aiohttp>=3.9.2, <4",
"apache-airflow-providers-common-sql>=1.10.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
"mergedeep>=1.3.4",
Expand Down Expand Up @@ -545,7 +545,7 @@
},
"elasticsearch": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"elasticsearch>=8.10,<9"
],
Expand All @@ -559,7 +559,7 @@
},
"exasol": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
Expand Down Expand Up @@ -632,7 +632,7 @@
"deps": [
"PyOpenSSL>=23.0.0",
"apache-airflow-providers-common-compat>=1.2.1",
"apache-airflow-providers-common-sql>=1.7.2",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"asgiref>=3.5.2",
"dill>=0.2.3",
Expand Down Expand Up @@ -787,7 +787,7 @@
},
"jdbc": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"jaydebeapi>=1.1.1"
],
Expand Down Expand Up @@ -855,7 +855,7 @@
},
"microsoft.mssql": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"methodtools>=0.4.7",
"pymssql>=2.3.0"
Expand Down Expand Up @@ -906,7 +906,7 @@
},
"mysql": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"mysql-connector-python>=8.0.29",
"mysqlclient>=1.4.0; sys_platform != 'darwin'"
Expand Down Expand Up @@ -937,7 +937,7 @@
},
"odbc": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pyodbc>=5.0.0"
],
Expand Down Expand Up @@ -973,7 +973,7 @@
"openlineage": {
"deps": [
"apache-airflow-providers-common-compat>=1.2.1",
"apache-airflow-providers-common-sql>=1.6.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"attrs>=22.2",
"openlineage-integration-common>=1.24.2",
Expand Down Expand Up @@ -1017,7 +1017,7 @@
},
"oracle": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"oracledb>=2.0.0"
],
Expand Down Expand Up @@ -1083,7 +1083,7 @@
},
"postgres": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"psycopg2-binary>=2.9.4"
],
Expand All @@ -1099,7 +1099,7 @@
},
"presto": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
Expand Down Expand Up @@ -1214,7 +1214,7 @@
},
"slack": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"slack_sdk>=3.19.0"
],
Expand All @@ -1239,7 +1239,7 @@
"snowflake": {
"deps": [
"apache-airflow-providers-common-compat>=1.1.0",
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
Expand All @@ -1260,7 +1260,7 @@
},
"sqlite": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0"
],
"devel-deps": [],
Expand All @@ -1285,7 +1285,7 @@
},
"standard": {
"deps": [
"apache-airflow-providers-common-sql>=1.18.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0"
],
"devel-deps": [],
Expand Down Expand Up @@ -1318,7 +1318,7 @@
},
"teradata": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"teradatasql>=17.20.0.28",
"teradatasqlalchemy>=17.20.0.0"
Expand All @@ -1335,7 +1335,7 @@
},
"trino": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
Expand All @@ -1353,7 +1353,7 @@
},
"vertica": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"vertica-python>=0.6.0"
],
Expand Down Expand Up @@ -1393,7 +1393,7 @@
},
"ydb": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"ydb>=3.12.1"
],
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/amazon/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ versions:
dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-compat>=1.2.1
- apache-airflow-providers-common-sql>=1.3.1
- apache-airflow-providers-common-sql>=1.20.0
- apache-airflow-providers-http
# We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number
# of candidates to consider. Make sure to configure boto3 version here as well as in all the tools below
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/apache/drill/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.14.1
- apache-airflow-providers-common-sql>=1.20.0
- sqlalchemy-drill>=1.1.0

integrations:
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/apache/druid/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.14.1
- apache-airflow-providers-common-sql>=1.20.0
- pydruid>=0.4.1

integrations:
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/apache/hive/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.3.1
- apache-airflow-providers-common-sql>=1.20.0
- hmsclient>=0.1.0
# In pandas 2.2 minimal version of the sqlalchemy is 2.0
# https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ versions:

dependencies:
- impyla>=0.18.0,<1.0
- apache-airflow-providers-common-sql>=1.14.1
- apache-airflow-providers-common-sql>=1.20.0
- apache-airflow>=2.8.0

additional-extras:
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/apache/pinot/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.14.1
- apache-airflow-providers-common-sql>=1.20.0
- pinotdb>=5.1.0

integrations:
Expand Down
15 changes: 12 additions & 3 deletions providers/src/airflow/providers/common/sql/hooks/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ class DbApiHook(BaseHook):
conn_name_attr: str
# Override to have a default connection id for a particular dbHook
default_conn_name = "default_conn_id"
# Override if this db doesn't support semicolons in SQL queries
strip_semicolon = False
# Override if this db supports autocommit.
supports_autocommit = False
# Override if this db supports executemany.
Expand Down Expand Up @@ -369,14 +371,18 @@ def strip_sql_string(sql: str) -> str:
return sql.strip().rstrip(";")

@staticmethod
def split_sql_string(sql: str) -> list[str]:
def split_sql_string(sql: str, strip_semicolon: bool = False) -> list[str]:
"""
Split string into multiple SQL expressions.

:param sql: SQL string potentially consisting of multiple expressions
:param strip_semicolon: whether to strip semicolon from SQL string
:return: list of individual expressions
"""
splits = sqlparse.split(sqlparse.format(sql, strip_comments=True))
splits = sqlparse.split(
sql=sqlparse.format(sql, strip_comments=True),
strip_semicolon=strip_semicolon,
)
return [s for s in splits if s]

@property
Expand Down Expand Up @@ -471,7 +477,10 @@ def run(

if isinstance(sql, str):
if split_statements:
sql_list: Iterable[str] = self.split_sql_string(sql)
sql_list: Iterable[str] = self.split_sql_string(
sql=sql,
strip_semicolon=self.strip_semicolon,
)
else:
sql_list = [sql] if sql.strip() else []
else:
Expand Down
3 changes: 2 additions & 1 deletion providers/src/airflow/providers/common/sql/hooks/sql.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class ConnectorProtocol(Protocol):
class DbApiHook(BaseHook):
conn_name_attr: str
default_conn_name: str
strip_semicolon: bool
supports_autocommit: bool
supports_executemany: bool
connector: ConnectorProtocol | None
Expand Down Expand Up @@ -93,7 +94,7 @@ class DbApiHook(BaseHook):
@staticmethod
def strip_sql_string(sql: str) -> str: ...
@staticmethod
def split_sql_string(sql: str) -> list[str]: ...
def split_sql_string(sql: str, strip_semicolon: bool = False) -> list[str]: ...
@property
def last_description(self) -> Sequence[Sequence] | None: ...
@overload
Expand Down
1 change: 1 addition & 0 deletions providers/src/airflow/providers/common/sql/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ state: ready
source-date-epoch: 1730012422
# note that those versions are maintained by release manager - do not update them manually
versions:
- 1.20.0
- 1.19.0
- 1.18.0
- 1.17.1
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/databricks/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.10.0
- apache-airflow-providers-common-sql>=1.20.0
- requests>=2.27.0,<3
# The connector 2.9.0 released on Aug 10, 2023 has a bug that it does not properly declare urllib3 and
# it needs to be excluded. See https://github.com/databricks/databricks-sql-python/issues/190
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.17.0
- apache-airflow-providers-common-sql>=1.20.0
- elasticsearch>=8.10,<9

integrations:
Expand Down
Loading
Loading