diff --git a/requirements/base.txt b/requirements/base.txt index e6e19adadbb9d..ac968d9405359 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -68,6 +68,19 @@ dnspython==2.0.0 # via email-validator email-validator==1.1.1 # via flask-appbuilder +flask==1.1.2 + # via + # apache-superset + # flask-appbuilder + # flask-babel + # flask-caching + # flask-compress + # flask-jwt-extended + # flask-login + # flask-migrate + # flask-openid + # flask-sqlalchemy + # flask-wtf flask-appbuilder==3.3.0 # via apache-superset flask-babel==1.0.0 @@ -94,19 +107,6 @@ flask-wtf==0.14.3 # via # apache-superset # flask-appbuilder -flask==1.1.2 - # via - # apache-superset - # flask-appbuilder - # flask-babel - # flask-caching - # flask-compress - # flask-jwt-extended - # flask-login - # flask-migrate - # flask-openid - # flask-sqlalchemy - # flask-wtf geographiclib==1.50 # via geopy geopy==2.0.0 @@ -123,11 +123,6 @@ idna==2.10 # via # email-validator # yarl -importlib-metadata==2.1.1 - # via - # jsonschema - # kombu - # markdown isodate==0.6.0 # via apache-superset itsdangerous==1.1.0 @@ -154,15 +149,15 @@ markupsafe==1.1.1 # jinja2 # mako # wtforms -marshmallow-enum==1.5.1 - # via flask-appbuilder -marshmallow-sqlalchemy==0.23.1 - # via flask-appbuilder marshmallow==3.9.0 # via # flask-appbuilder # marshmallow-enum # marshmallow-sqlalchemy +marshmallow-enum==1.5.1 + # via flask-appbuilder +marshmallow-sqlalchemy==0.23.1 + # via flask-appbuilder msgpack==1.0.0 # via apache-superset multidict==5.0.0 @@ -176,7 +171,9 @@ numpy==1.19.4 # pandas # pyarrow packaging==20.4 - # via bleach + # via + # bleach + # deprecation pandas==1.2.2 # via apache-superset parsedatetime==2.6 @@ -264,10 +261,6 @@ six==1.15.0 # wtforms-json slackclient==2.5.0 # via apache-superset -sqlalchemy-utils==0.36.8 - # via - # apache-superset - # flask-appbuilder sqlalchemy==1.3.20 # via # alembic @@ -276,13 +269,16 @@ sqlalchemy==1.3.20 # flask-sqlalchemy # marshmallow-sqlalchemy # sqlalchemy-utils +sqlalchemy-utils==0.36.8 + # via + # apache-superset + # flask-appbuilder sqlparse==0.3.0 # via apache-superset typing-extensions==3.7.4.3 # via # aiohttp # apache-superset - # yarl urllib3==1.25.11 # via selenium vine==1.3.0 @@ -295,18 +291,16 @@ werkzeug==1.0.1 # via # flask # flask-jwt-extended -wtforms-json==0.3.3 - # via apache-superset wtforms==2.3.3 # via # flask-wtf # wtforms-json +wtforms-json==0.3.3 + # via apache-superset yarl==1.6.2 # via aiohttp zipp==3.4.1 - # via - # -r requirements/base.in - # importlib-metadata + # via -r requirements/base.in # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements/development.txt b/requirements/development.txt index 7b6ca3ffb7a53..c065721174d3b 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -70,13 +70,13 @@ tableschema==1.20.0 # via -r requirements/development.in tabulator==1.52.5 # via tableschema -thrift-sasl==0.4.2 - # via pyhive thrift==0.13.0 # via # -r requirements/development.in # pyhive # thrift-sasl +thrift-sasl==0.4.2 + # via pyhive unicodecsv==0.14.1 # via # tableschema diff --git a/requirements/integration.txt b/requirements/integration.txt index e1cb0edb3e2a9..b9ec99d133268 100644 --- a/requirements/integration.txt +++ b/requirements/integration.txt @@ -21,12 +21,6 @@ filelock==3.0.12 # virtualenv identify==1.5.9 # via pre-commit -importlib-metadata==2.1.1 - # via - # pluggy - # pre-commit - # tox - # virtualenv nodeenv==1.5.0 # via pre-commit packaging==20.4 @@ -63,8 +57,6 @@ virtualenv==20.1.0 # via # pre-commit # tox -zipp==3.4.1 - # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/testing.in b/requirements/testing.in index e95cab68f04c7..9c67f790a73e8 100644 --- a/requirements/testing.in +++ b/requirements/testing.in @@ -32,3 +32,4 @@ pylint pytest pytest-cov statsd +pytest-mock diff --git a/requirements/testing.txt b/requirements/testing.txt index 4d116c53dbddd..f9fe540c8cc9e 100644 --- a/requirements/testing.txt +++ b/requirements/testing.txt @@ -1,4 +1,4 @@ -# SHA1:dba8c39bbdcb85b86e83af855d023b55a2674e87 +# SHA1:d39180c0eb498d1a7dd73b8428e6ab304b728484 # # This file is autogenerated by pip-compile-multi # To update, run: @@ -9,6 +9,8 @@ -r integration.txt -e file:. # via -r requirements/base.in +appnope==0.1.2 + # via ipython astroid==2.4.2 # via pylint backcall==0.2.0 @@ -25,12 +27,12 @@ iniconfig==1.1.1 # via pytest ipdb==0.13.4 # via -r requirements/testing.in -ipython-genutils==0.2.0 - # via traitlets ipython==7.16.1 # via # -r requirements/testing.in # ipdb +ipython-genutils==0.2.0 + # via traitlets isort==5.6.4 # via pylint jedi==0.17.2 @@ -63,18 +65,19 @@ pyhive[hive,presto]==0.6.3 # -r requirements/testing.in pylint==2.6.0 # via -r requirements/testing.in -pytest-cov==2.10.1 - # via -r requirements/testing.in pytest==6.1.2 # via # -r requirements/testing.in # pytest-cov + # pytest-mock +pytest-cov==2.10.1 + # via -r requirements/testing.in +pytest-mock==3.6.1 + # via -r requirements/testing.in statsd==3.3.0 # via -r requirements/testing.in traitlets==5.0.5 # via ipython -typed-ast==1.4.3 - # via astroid wcwidth==0.2.5 # via prompt-toolkit websocket-client==0.57.0 diff --git a/superset/db_engine_specs/gsheets.py b/superset/db_engine_specs/gsheets.py index 0f5f6ef3b1d96..0becf050647e2 100644 --- a/superset/db_engine_specs/gsheets.py +++ b/superset/db_engine_specs/gsheets.py @@ -17,14 +17,17 @@ import json import re from contextlib import closing -from typing import Any, Dict, Optional, Pattern, Tuple, TYPE_CHECKING +from typing import Any, Dict, List, Optional, Pattern, Tuple, TYPE_CHECKING +from flask import g from flask_babel import gettext as __ +from sqlalchemy.engine import create_engine from sqlalchemy.engine.url import URL +from typing_extensions import TypedDict from superset import security_manager from superset.db_engine_specs.sqlite import SqliteEngineSpec -from superset.errors import SupersetErrorType +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType if TYPE_CHECKING: from superset.models.core import Database @@ -33,6 +36,12 @@ SYNTAX_ERROR_REGEX = re.compile('SQLError: near "(?P.*?)": syntax error') +class GSheetsParametersType(TypedDict): + credentials_info: Dict[str, Any] + query: Dict[str, Any] + table_catalog: Dict[str, str] + + class GSheetsEngineSpec(SqliteEngineSpec): """Engine for Google spreadsheets""" @@ -45,7 +54,7 @@ class GSheetsEngineSpec(SqliteEngineSpec): SYNTAX_ERROR_REGEX: ( __( 'Please check your query for syntax errors near "%(server_error)s". ' - "Then, try running your query again." + "Then, try running your query again.", ), SupersetErrorType.SYNTAX_ERROR, {}, @@ -54,7 +63,7 @@ class GSheetsEngineSpec(SqliteEngineSpec): @classmethod def modify_url_for_impersonation( - cls, url: URL, impersonate_user: bool, username: Optional[str] + cls, url: URL, impersonate_user: bool, username: Optional[str], ) -> None: if impersonate_user and username is not None: user = security_manager.find_user(username=username) @@ -77,3 +86,41 @@ def extra_table_metadata( metadata = {} return {"metadata": metadata["extra"]} + + @classmethod + def validate_parameters( + cls, parameters: GSheetsParametersType, + ) -> List[SupersetError]: + errors: List[SupersetError] = [] + + credentials_info = parameters.get("credentials_info") + table_catalog = parameters.get("table_catalog", {}) + + if not table_catalog: + return errors + + # We need a subject in case domain wide delegation is set, otherwise the + # check will fail. This means that the admin will be able to add sheets + # that only they have access, even if later users are not able to access + # them. + subject = g.user.email if g.user else None + + engine = create_engine( + "gsheets://", service_account_info=credentials_info, subject=subject, + ) + conn = engine.connect() + for name, url in table_catalog.items(): + try: + results = conn.execute(f'SELECT * FROM "{url}" LIMIT 1') + results.fetchall() + except Exception: # pylint: disable=broad-except + errors.append( + SupersetError( + message=f"Unable to connect to spreadsheet {name} at {url}", + error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, + level=ErrorLevel.WARNING, + extra={"name": name, "url": url}, + ), + ) + + return errors diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py new file mode 100644 index 0000000000000..04d793a860e4a --- /dev/null +++ b/tests/unit_tests/conftest.py @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + + +@pytest.fixture +def app_context(): + """ + A fixture for running the test inside an app context. + """ + from superset.app import create_app + + app = create_app() + with app.app_context(): + yield diff --git a/tests/unit_tests/db_engine_specs/test_gsheets.py b/tests/unit_tests/db_engine_specs/test_gsheets.py new file mode 100644 index 0000000000000..1ced5969726bf --- /dev/null +++ b/tests/unit_tests/db_engine_specs/test_gsheets.py @@ -0,0 +1,177 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=unused-argument, invalid-name +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType + + +class ProgrammingError(Exception): + """ + Dummy ProgrammingError so we don't need to import the optional gsheets. + """ + + +def test_validate_parameters_simple(mocker, app_context): + from superset.db_engine_specs.gsheets import GSheetsEngineSpec + + parameters = {} + errors = GSheetsEngineSpec.validate_parameters(parameters) + assert errors == [] + + +def test_validate_parameters_catalog(mocker, app_context): + from superset.db_engine_specs.gsheets import GSheetsEngineSpec + + g = mocker.patch("superset.db_engine_specs.gsheets.g") + g.user.email = "admin@example.com" + + create_engine = mocker.patch("superset.db_engine_specs.gsheets.create_engine") + conn = create_engine.return_value.connect.return_value + results = conn.execute.return_value + results.fetchall.side_effect = [ + ProgrammingError("The caller does not have permission"), + [(1,)], + ProgrammingError("Unsupported table: https://www.google.com/"), + ] + + parameters = { + "table_catalog": { + "private_sheet": "https://docs.google.com/spreadsheets/d/1/edit", + "public_sheet": "https://docs.google.com/spreadsheets/d/1/edit#gid=1", + "not_a_sheet": "https://www.google.com/", + }, + } + errors = GSheetsEngineSpec.validate_parameters(parameters) + assert errors == [ + SupersetError( + message=( + "Unable to connect to spreadsheet private_sheet at " + "https://docs.google.com/spreadsheets/d/1/edit" + ), + error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, + level=ErrorLevel.WARNING, + extra={ + "name": "private_sheet", + "url": "https://docs.google.com/spreadsheets/d/1/edit", + "issue_codes": [ + { + "code": 1003, + "message": ( + "Issue 1003 - There is a syntax error in the SQL query. " + "Perhaps there was a misspelling or a typo." + ), + }, + { + "code": 1005, + "message": ( + "Issue 1005 - The table was deleted or renamed in the " + "database." + ), + }, + ], + }, + ), + SupersetError( + message=( + "Unable to connect to spreadsheet not_a_sheet at " + "https://www.google.com/" + ), + error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, + level=ErrorLevel.WARNING, + extra={ + "name": "not_a_sheet", + "url": "https://www.google.com/", + "issue_codes": [ + { + "code": 1003, + "message": ( + "Issue 1003 - There is a syntax error in the SQL query. " + "Perhaps there was a misspelling or a typo." + ), + }, + { + "code": 1005, + "message": ( + "Issue 1005 - The table was deleted or renamed in the " + "database.", + ), + }, + ], + }, + ), + ] + create_engine.assert_called_with( + "gsheets://", service_account_info=None, subject="admin@example.com", + ) + + +def test_validate_parameters_catalog_and_credentials(mocker, app_context): + from superset.db_engine_specs.gsheets import GSheetsEngineSpec + + g = mocker.patch("superset.db_engine_specs.gsheets.g") + g.user.email = "admin@example.com" + + create_engine = mocker.patch("superset.db_engine_specs.gsheets.create_engine") + conn = create_engine.return_value.connect.return_value + results = conn.execute.return_value + results.fetchall.side_effect = [ + [(2,)], + [(1,)], + ProgrammingError("Unsupported table: https://www.google.com/"), + ] + + parameters = { + "table_catalog": { + "private_sheet": "https://docs.google.com/spreadsheets/d/1/edit", + "public_sheet": "https://docs.google.com/spreadsheets/d/1/edit#gid=1", + "not_a_sheet": "https://www.google.com/", + }, + "credentials_info": "SECRET", + } + errors = GSheetsEngineSpec.validate_parameters(parameters) + assert errors == [ + SupersetError( + message=( + "Unable to connect to spreadsheet not_a_sheet at " + "https://www.google.com/" + ), + error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, + level=ErrorLevel.WARNING, + extra={ + "name": "not_a_sheet", + "url": "https://www.google.com/", + "issue_codes": [ + { + "code": 1003, + "message": ( + "Issue 1003 - There is a syntax error in the SQL query. " + "Perhaps there was a misspelling or a typo." + ), + }, + { + "code": 1005, + "message": ( + "Issue 1005 - The table was deleted or renamed in the " + "database.", + ), + }, + ], + }, + ), + ] + create_engine.assert_called_with( + "gsheets://", service_account_info="SECRET", subject="admin@example.com", + )