Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Create BigQuery Parameters for DatabaseModal #14721

Merged
merged 25 commits into from
May 23, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions superset/databases/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
)
from superset.databases.utils import get_table_metadata
from superset.db_engine_specs import get_available_engine_specs
from superset.db_engine_specs.base import BasicParametersMixin
from superset.exceptions import InvalidPayloadFormatError, InvalidPayloadSchemaError
from superset.extensions import security_manager
from superset.models.core import Database
Expand Down Expand Up @@ -909,11 +908,15 @@ def available(self) -> Response:
"preferred": engine_spec.engine in preferred_databases,
}

if issubclass(engine_spec, BasicParametersMixin):
payload["parameters"] = engine_spec.parameters_json_schema()
if hasattr(engine_spec, "parameters_json_schema") or hasattr(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@betodealmeida let me know if this is good enough of a guard statement moving forward to make sure we don't call parameters_json_schema for engines we have implemented yet

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks great!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though I think ideally we'd have all these methods in BaseEngineSpec and we'd check if parameters_schema is not None.

hughhhh marked this conversation as resolved.
Show resolved Hide resolved
engine_spec, "sqlalchemy_uri_placeholder"
):
payload[
"parameters"
] = engine_spec.parameters_json_schema() # type: ignore
payload[
"sqlalchemy_uri_placeholder"
] = engine_spec.sqlalchemy_uri_placeholder
] = engine_spec.sqlalchemy_uri_placeholder # type: ignore

available_databases.append(payload)

Expand Down
3 changes: 2 additions & 1 deletion superset/databases/commands/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def run(self) -> None:

# try to connect
sqlalchemy_uri = engine_spec.build_sqlalchemy_uri(
self._properties["parameters"] # type: ignore
self._properties["parameters"], # type: ignore
self._properties.get("encrypted_extra", "{}"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to deserialize this:

serialized_encrypted_extra = self._properties.get("encrypted_extra", "{}")
try:
    encrypted_extra = json.loads(serialized_encrypted_extra)
except json.decoder.JSONDecodeError:
    encrypted_extra = {}

Then:

sqlalchemy_uri = engine_spec.build_sqlalchemy_uri(
    self._properties["parameters"],
    encrypted_extra,
)

You can then pass serialized_encrypted_extra on line 95 below.

)
if self._model and sqlalchemy_uri == self._model.safe_sqlalchemy_uri():
sqlalchemy_uri = self._model.sqlalchemy_uri_decrypted
Expand Down
6 changes: 5 additions & 1 deletion superset/databases/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ def build_sqlalchemy_uri(
the constructed SQLAlchemy URI to be passed.
"""
parameters = data.pop("parameters", None)
encrypted_extra = data.get("encrypted_extra", None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, we need to try to deserialize from JSON like above.


if parameters:
if "engine" not in parameters:
raise ValidationError(
Expand Down Expand Up @@ -275,7 +277,9 @@ def build_sqlalchemy_uri(
]
)

data["sqlalchemy_uri"] = engine_spec.build_sqlalchemy_uri(parameters)
data["sqlalchemy_uri"] = engine_spec.build_sqlalchemy_uri(
parameters, encrypted_extra
)
return data


Expand Down
6 changes: 5 additions & 1 deletion superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,11 @@ class BasicParametersMixin:
encryption_parameters: Dict[str, str] = {}

@classmethod
def build_sqlalchemy_uri(cls, parameters: BasicParametersType) -> str:
def build_sqlalchemy_uri(
cls,
parameters: BasicParametersType,
encryted_extra: Optional[Dict[str, str]] = None,
) -> str:
query = parameters.get("query", {})
if parameters.get("encryption"):
if not cls.encryption_parameters:
Expand Down
68 changes: 68 additions & 0 deletions superset/db_engine_specs/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@
from typing import Any, Dict, List, Optional, Pattern, Tuple, TYPE_CHECKING

import pandas as pd
from apispec import APISpec
from apispec.ext.marshmallow import MarshmallowPlugin
from flask_babel import gettext as __
from marshmallow import fields, Schema
from sqlalchemy import literal_column
from sqlalchemy.sql.expression import ColumnClause
from typing_extensions import TypedDict

from superset.db_engine_specs.base import BaseEngineSpec
from superset.errors import SupersetErrorType
from superset.exceptions import SupersetGenericDBErrorException
from superset.sql_parse import Table
from superset.utils import core as utils
from superset.utils.hashing import md5_sha_from_str
Expand All @@ -38,6 +43,28 @@
+ "permission in project (?P<project>.+?)"
)

ma_plugin = MarshmallowPlugin()


class EncryptedField(fields.String):
pass


class BigQueryParametersSchema(Schema):
credentials_info = EncryptedField(description="credentials.json file for BigQuery")
hughhhh marked this conversation as resolved.
Show resolved Hide resolved


def encrypted_field_properties(self, field: Any, **_) -> Dict[str, Any]: # type: ignore
ret = {}
if isinstance(field, EncryptedField):
if self.openapi_version.major > 2:
ret["x-encrypted-extra"] = True
return ret
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's move this and EncryptedField outside, since other specs will also use them. We can have them in superset/databases/schemas.py.



class BigQueryParametersType(TypedDict):
pass
hughhhh marked this conversation as resolved.
Show resolved Hide resolved


class BigQueryEngineSpec(BaseEngineSpec):
"""Engine spec for Google's BigQuery
Expand All @@ -48,6 +75,10 @@ class BigQueryEngineSpec(BaseEngineSpec):
engine_name = "Google BigQuery"
max_column_name_length = 128

parameters_schema = BigQueryParametersSchema()
drivername = engine
sqlalchemy_uri_placeholder = "bigquery://{project_id}"

# BigQuery doesn't maintain context when running multiple statements in the
# same cursor, so we need to run all statements at once
run_multiple_statements_as_one = True
Expand Down Expand Up @@ -282,3 +313,40 @@ def df_to_sql(
to_gbq_kwargs[key] = to_sql_kwargs[key]

pandas_gbq.to_gbq(df, **to_gbq_kwargs)

@classmethod
def build_sqlalchemy_uri(
cls, _: BigQueryParametersType, encrypted_extra: Optional[Dict[str, str]] = None
) -> str:
if encrypted_extra:
project_id = encrypted_extra.get("project_id")
return f"{cls.drivername}://{project_id}"

raise SupersetGenericDBErrorException(
message="Big Query encrypted_extra is not available",
hughhhh marked this conversation as resolved.
Show resolved Hide resolved
)

@classmethod
def get_parameters_from_uri(cls, _: str) -> Any:
# BigQuery doesn't have parameters
return None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally:

get_parameters_from_uri(build_sqlalchemy_uri(parameters)) == parameters

Ie, this should be the opposite of build_sqlalchemy_uri.

We need to return credentials_info here so that the edit form can show it to the user. This means that we need to modify get_parameters_from_uri to also receive encrypted_extra.


@classmethod
def parameters_json_schema(cls) -> Any:
"""
Return configuration parameters as OpenAPI.
"""
if not cls.parameters_schema:
return None

spec = APISpec(
title="Database Parameters",
version="1.0.0",
openapi_version="3.0.0",
plugins=[ma_plugin],
)

ma_plugin.init_spec(spec)
ma_plugin.converter.add_attribute_function(encrypted_field_properties)
spec.components.schema(cls.__name__, schema=cls.parameters_schema)
return spec.to_dict()["components"]["schemas"][cls.__name__]
1 change: 1 addition & 0 deletions superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def backend(self) -> str:
def parameters(self) -> Dict[str, Any]:
# Build parameters if db_engine_spec is a subclass of BasicParametersMixin
parameters = {"engine": self.backend}

if hasattr(self.db_engine_spec, "parameters_schema") and hasattr(
self.db_engine_spec, "get_parameters_from_uri"
):
Expand Down
18 changes: 18 additions & 0 deletions tests/databases/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from superset.connectors.sqla.models import SqlaTable
from superset.db_engine_specs.mysql import MySQLEngineSpec
from superset.db_engine_specs.postgres import PostgresEngineSpec
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
from superset.db_engine_specs.hana import HanaEngineSpec
from superset.errors import SupersetError
from superset.models.core import Database, ConfigurationMethod
Expand Down Expand Up @@ -1373,6 +1374,7 @@ def test_available(self, app, get_available_engine_specs):
app.config = {"PREFERRED_DATABASES": ["postgresql"]}
get_available_engine_specs.return_value = [
PostgresEngineSpec,
BigQueryEngineSpec,
HanaEngineSpec,
]

Expand Down Expand Up @@ -1428,6 +1430,22 @@ def test_available(self, app, get_available_engine_specs):
"preferred": True,
"sqlalchemy_uri_placeholder": "postgresql+psycopg2://user:password@host:port/dbname[?key=value&key=value...]",
},
{
"engine": "bigquery",
"name": "Google BigQuery",
"parameters": {
"properties": {
"credentials_info": {
"description": "credentials.json file for BigQuery",
"type": "string",
"x-encrypted-extra": True,
}
},
"type": "object",
},
"preferred": False,
"sqlalchemy_uri_placeholder": "bigquery://{project_id}",
},
{"engine": "hana", "name": "SAP HANA", "preferred": False},
]
}
Expand Down
5 changes: 4 additions & 1 deletion tests/db_engine_specs/postgres_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,10 @@ def test_base_parameters_mixin():
"query": {"foo": "bar"},
"encryption": True,
}
sqlalchemy_uri = PostgresEngineSpec.build_sqlalchemy_uri(parameters)
encrypted_extra = None
sqlalchemy_uri = PostgresEngineSpec.build_sqlalchemy_uri(
parameters, encrypted_extra
)
assert sqlalchemy_uri == (
"postgresql+psycopg2://username:password@localhost:5432/dbname?"
"foo=bar&sslmode=verify-ca"
Expand Down