Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(SIP-95): new endpoint for extra table metadata #28063

Merged
merged 6 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion superset-frontend/src/SqlLab/actions/sqlLab.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ describe('async actions', () => {
const getTableMetadataEndpoint = 'glob:**/api/v1/database/*/table/*/*/';
fetchMock.get(getTableMetadataEndpoint, {});
const getExtraTableMetadataEndpoint =
'glob:**/api/v1/database/*/table_extra/*/*/';
'glob:**/api/v1/database/*/table_metadata/extra/';
fetchMock.get(getExtraTableMetadataEndpoint, {});

let isFeatureEnabledMock;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ beforeEach(() => {
columns: table.columns,
},
});
fetchMock.get('glob:*/api/v1/database/*/table_extra/*/*', {
fetchMock.get('glob:*/api/v1/database/*/table_metadata/extra/', {
status: 200,
body: {},
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jest.mock(
);
const getTableMetadataEndpoint = 'glob:**/api/v1/database/*/table/*/*/';
const getExtraTableMetadataEndpoint =
'glob:**/api/v1/database/*/table_extra/*/*/';
'glob:**/api/v1/database/*/table_metadata/extra/*';
const updateTableSchemaEndpoint = 'glob:*/tableschemaview/*/expanded';

beforeEach(() => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ const DndFilterSelect = (props: DndFilterSelectProps) => {

if (!isSqllabView && dbId && name && schema) {
SupersetClient.get({
endpoint: `/api/v1/database/${dbId}/table_extra/${name}/${schema}/`,
endpoint: `/api/v1/database/${dbId}/table_metadata/extra/?name=${name}&schema=${schema}`,
})
.then(({ json }: { json: Record<string, any> }) => {
if (json?.partitions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class AdhocFilterControl extends React.Component {

if (!isSqllabView && dbId && name && schema) {
SupersetClient.get({
endpoint: `/api/v1/database/${dbId}/table_extra/${name}/${schema}/`,
endpoint: `/api/v1/database/${dbId}/table_metadata/extra/?name=${name}&schema=${schema}`,
})
.then(({ json }) => {
if (json && json.partitions) {
Expand Down
6 changes: 3 additions & 3 deletions superset-frontend/src/hooks/apiResources/tables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ const tableApi = api.injectEndpoints({
FetchTableMetadataQueryParams
>({
query: ({ dbId, schema, table }) => ({
endpoint: `/api/v1/database/${dbId}/table_extra/${encodeURIComponent(
table,
)}/${encodeURIComponent(schema)}/`,
endpoint: schema
? `/api/v1/database/${dbId}/table_metadata/extra/?name=${table}&schema=${schema}`
: `/api/v1/database/${dbId}/table_metadata/extra/?name=${table}`,
transformResponse: ({ json }: JsonResponse) => json,
}),
}),
Expand Down
1 change: 1 addition & 0 deletions superset/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ class RouteMethod: # pylint: disable=too-few-public-methods
"select_star": "read",
"table_metadata": "read",
"table_extra_metadata": "read",
"table_extra_metadata_deprecated": "read",
"test_connection": "write",
"validate_parameters": "write",
"favorite_status": "read",
Expand Down
116 changes: 107 additions & 9 deletions superset/databases/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
# specific language governing permissions and limitations
# under the License.
# pylint: disable=too-many-lines

from __future__ import annotations

import json
import logging
from datetime import datetime, timedelta
from io import BytesIO
from typing import Any, cast, Optional
from typing import Any, cast
from zipfile import is_zipfile, ZipFile

from deprecation import deprecated
Expand Down Expand Up @@ -82,6 +85,7 @@
get_export_ids_schema,
OAuth2ProviderResponseSchema,
openapi_spec_methods_override,
QualifiedTableSchema,
SchemasResponseSchema,
SelectStarResponseSchema,
TableExtraMetadataResponseSchema,
Expand All @@ -92,9 +96,18 @@
from superset.databases.utils import get_table_metadata
from superset.db_engine_specs import get_available_engine_specs
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import OAuth2Error, SupersetErrorsException, SupersetException
from superset.exceptions import (
DatabaseNotFoundException,
InvalidPayloadSchemaError,
OAuth2Error,
SupersetErrorsException,
SupersetException,
SupersetSecurityException,
TableNotFoundException,
)
from superset.extensions import security_manager
from superset.models.core import Database
from superset.sql_parse import Table
from superset.superset_typing import FlaskResponse
from superset.utils.core import error_msg_from_exception, parse_js_uri_path_item
from superset.utils.oauth2 import decode_oauth2_state
Expand All @@ -121,6 +134,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
"tables",
"table_metadata",
"table_extra_metadata",
"table_extra_metadata_deprecated",
"select_star",
"schemas",
"test_connection",
Expand Down Expand Up @@ -764,15 +778,20 @@ def table_metadata(
@check_table_access
@safe
@statsd_metrics
@deprecated(deprecated_in="4.0", removed_in="5.0")
@event_logger.log_this_with_context(
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}"
f".table_extra_metadata",
f".table_extra_metadata_deprecated",
log_to_statsd=False,
)
def table_extra_metadata(
def table_extra_metadata_deprecated(
self, database: Database, table_name: str, schema_name: str
) -> FlaskResponse:
"""Get table extra metadata.

A newer API was introduced between 4.0 and 5.0, with support for catalogs for
SIP-95. This method was kept to prevent breaking API integrations, but will be
removed in 5.0.
---
get:
summary: Get table extra metadata
Expand Down Expand Up @@ -812,13 +831,92 @@ def table_extra_metadata(
500:
$ref: '#/components/responses/500'
"""
self.incr_stats("init", self.table_metadata.__name__)
self.incr_stats("init", self.table_extra_metadata_deprecated.__name__)

parsed_schema = parse_js_uri_path_item(schema_name, eval_undefined=True)
table_name = cast(str, parse_js_uri_path_item(table_name))
payload = database.db_engine_spec.extra_table_metadata(
database, table_name, parsed_schema
)
table = Table(table_name, parsed_schema)
payload = database.db_engine_spec.get_extra_table_metadata(database, table)
return self.response(200, **payload)

@expose("/<int:pk>/table_metadata/extra/", methods=("GET",))
@protect()
@statsd_metrics
@event_logger.log_this_with_context(
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}"
f".table_extra_metadata",
log_to_statsd=False,
)
def table_extra_metadata(self, pk: int) -> FlaskResponse:
"""
Get extra metadata for a given table.

Optionally, a schema and a catalog can be passed, if different from the default
ones.
---
get:
summary: Get table extra metadata
description: >-
Extra metadata associated with the table (partitions, description, etc.)
parameters:
- in: path
schema:
type: integer
name: pk
description: The database id
- in: query
schema:
type: string
name: name
required: true
description: Table name
- in: query
schema:
type: string
name: schema
description: >-
Optional table schema, if not passed the schema configured in the database
will be used
- in: query
schema:
type: string
name: catalog
description: >-
Optional table catalog, if not passed the catalog configured in the
database will be used
responses:
200:
description: Table extra metadata information
content:
application/json:
schema:
$ref: "#/components/schemas/TableExtraMetadataResponseSchema"
401:
$ref: '#/components/responses/401'
404:
$ref: '#/components/responses/404'
500:
$ref: '#/components/responses/500'
"""
self.incr_stats("init", self.table_extra_metadata.__name__)

if not (database := DatabaseDAO.find_by_id(pk)):
raise DatabaseNotFoundException("No such database")

try:
parameters = QualifiedTableSchema().load(request.args)
except ValidationError as ex:
raise InvalidPayloadSchemaError(ex) from ex

table = Table(parameters["name"], parameters["schema"], parameters["catalog"])
try:
security_manager.raise_for_access(database=database, table=table)
except SupersetSecurityException as ex:
# instead of raising 403, raise 404 to hide table existence
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!

raise TableNotFoundException("No such table") from ex

payload = database.db_engine_spec.get_extra_table_metadata(database, table)

return self.response(200, **payload)

@expose("/<int:pk>/select_star/<path:table_name>/", methods=("GET",))
Expand All @@ -832,7 +930,7 @@ def table_extra_metadata(
log_to_statsd=False,
)
def select_star(
self, database: Database, table_name: str, schema_name: Optional[str] = None
self, database: Database, table_name: str, schema_name: str | None = None
) -> FlaskResponse:
"""Get database select star for table.
---
Expand Down
24 changes: 24 additions & 0 deletions superset/databases/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1195,3 +1195,27 @@ class OAuth2ProviderResponseSchema(Schema):
class Meta: # pylint: disable=too-few-public-methods
# Ignore unknown fields that might be sent by the OAuth2 provider
unknown = EXCLUDE


class QualifiedTableSchema(Schema):
"""
Schema for a qualified table reference.

Catalog and schema can be ommited, to fallback to default values. Table name must be
present.
"""

name = fields.String(
required=True,
metadata={"description": "The table name"},
)
schema = fields.String(
required=False,
load_default=None,
metadata={"description": "The table schema"},
)
catalog = fields.String(
required=False,
load_default=None,
metadata={"description": "The table catalog"},
)
2 changes: 1 addition & 1 deletion superset/db_engine_specs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ For some databases the `df_to_sql` classmethod needs to be implemented. For exam

### Extra table metadata

DB engine specs can return additional metadata associated with a table. This is done via the `extra_table_metadata` class method. Trino uses this to return information about the latest partition, for example, and Bigquery returns clustering information. This information is then surfaced in the SQL Lab UI, when browsing tables in the metadata explorer (on the left panel).
DB engine specs can return additional metadata associated with a table. This is done via the `get_extra_table_metadata` class method. Trino uses this to return information about the latest partition, for example, and Bigquery returns clustering information. This information is then surfaced in the SQL Lab UI, when browsing tables in the metadata explorer (on the left panel).

### DB API exception mapping

Expand Down
25 changes: 19 additions & 6 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import json
import logging
import re
import warnings
from datetime import datetime
from re import Match, Pattern
from typing import (
Expand Down Expand Up @@ -1034,21 +1035,33 @@ def normalize_indexes(cls, indexes: list[dict[str, Any]]) -> list[dict[str, Any]
return indexes

@classmethod
def extra_table_metadata( # pylint: disable=unused-argument
def get_extra_table_metadata( # pylint: disable=unused-argument
cls,
database: Database,
table_name: str,
schema_name: str | None,
table: Table,
betodealmeida marked this conversation as resolved.
Show resolved Hide resolved
) -> dict[str, Any]:
"""
Returns engine-specific table metadata

:param database: Database instance
:param table_name: Table name
:param schema_name: Schema name
:param table: A Table instance
:return: Engine-specific table metadata
"""
# TODO: Fix circular import caused by importing Database
# old method that doesn't work with catalogs
if hasattr(cls, "extra_table_metadata"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

optional:
do:

if not hasattr(cls, "extra_table_metadata"):
    return {}
...

to reduce nesting

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, I'll fix it in the next PR!

warnings.warn(
"The `extra_table_metadata` method is deprecated, please implement "
"the `get_extra_table_metadata` method in the DB engine spec.",
DeprecationWarning,
)

# If a catalog is passed, return nothing, since we don't know the exact
# table that is being requested.
if table.catalog:
return {}

return cls.extra_table_metadata(database, table.table, table.schema)

return {}

@classmethod
Expand Down
8 changes: 5 additions & 3 deletions superset/db_engine_specs/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,12 @@ def get_indexes(
return cls.normalize_indexes(inspector.get_indexes(table_name, schema))

@classmethod
def extra_table_metadata(
cls, database: "Database", table_name: str, schema_name: Optional[str]
def get_extra_table_metadata(
cls,
database: "Database",
table: Table,
) -> dict[str, Any]:
indexes = database.get_indexes(table_name, schema_name)
indexes = database.get_indexes(table.table, table.schema)
if not indexes:
return {}
partitions_columns = [
Expand Down
9 changes: 4 additions & 5 deletions superset/db_engine_specs/gsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,14 @@ def get_url_for_impersonation(
return url

@classmethod
def extra_table_metadata(
def get_extra_table_metadata(
cls,
database: Database,
table_name: str,
schema_name: str | None,
table: Table,
) -> dict[str, Any]:
with database.get_raw_connection(schema=schema_name) as conn:
with database.get_raw_connection(schema=table.schema) as conn:
cursor = conn.cursor()
cursor.execute(f'SELECT GET_METADATA("{table_name}")')
cursor.execute(f'SELECT GET_METADATA("{table.table}")')
results = cursor.fetchone()[0]
try:
metadata = json.loads(results)
Expand Down
Loading
Loading