Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(sqla): avoid unnecessary type check on adhoc column #23491

Merged
merged 3 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 26 additions & 18 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,15 +1006,19 @@ def adhoc_metric_to_sqla(

return self.make_sqla_column_compatible(sqla_metric, label)

def adhoc_column_to_sqla(
def adhoc_column_to_sqla( # pylint: disable=too-many-locals
self,
col: AdhocColumn,
force_type_check: bool = False,
template_processor: Optional[BaseTemplateProcessor] = None,
) -> ColumnElement:
"""
Turn an adhoc column into a sqlalchemy column.

:param col: Adhoc column definition
:param force_type_check: Should the column type be checked in the db.
This is needed to validate if a filter with an adhoc column
is applicable.
:param template_processor: template_processor instance
:returns: The metric defined as a sqlalchemy column
:rtype: sqlalchemy.sql.column
Expand All @@ -1027,28 +1031,28 @@ def adhoc_column_to_sqla(
template_processor=template_processor,
)
col_in_metadata = self.get_column(expression)
time_grain = col.get("timeGrain")
has_timegrain = col.get("columnType") == "BASE_AXIS" and time_grain
is_dttm = False
if col_in_metadata:
sqla_column = col_in_metadata.get_sqla_col(
template_processor=template_processor
)
is_dttm = col_in_metadata.is_temporal
else:
try:
sqla_column = literal_column(expression)
# probe adhoc column type
tbl, _ = self.get_from_clause(template_processor)
qry = sa.select([sqla_column]).limit(1).select_from(tbl)
sql = self.database.compile_sqla_query(qry)
col_desc = get_columns_description(self.database, sql)
is_dttm = col_desc[0]["is_dttm"]
except SupersetGenericDBErrorException as ex:
raise ColumnNotFoundException(message=str(ex)) from ex

if (
is_dttm
and col.get("columnType") == "BASE_AXIS"
and (time_grain := col.get("timeGrain"))
):
sqla_column = literal_column(expression)
if has_timegrain or force_type_check:
try:
# probe adhoc column type
tbl, _ = self.get_from_clause(template_processor)
qry = sa.select([sqla_column]).limit(1).select_from(tbl)
sql = self.database.compile_sqla_query(qry)
col_desc = get_columns_description(self.database, sql)
is_dttm = col_desc[0]["is_dttm"]
except SupersetGenericDBErrorException as ex:
raise ColumnNotFoundException(message=str(ex)) from ex

if is_dttm and has_timegrain:
sqla_column = self.db_engine_spec.get_timestamp_expr(
col=sqla_column,
pdf=None,
Expand Down Expand Up @@ -1458,7 +1462,11 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
col_obj = dttm_col
elif is_adhoc_column(flt_col):
try:
sqla_col = self.adhoc_column_to_sqla(flt_col)
sqla_col = self.adhoc_column_to_sqla(
col=flt_col,
force_type_check=True,
template_processor=template_processor,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bycatch - the template processor was missing here

)
applied_adhoc_filters_columns.append(flt_col)
except ColumnNotFoundException:
rejected_adhoc_filters_columns.append(flt_col)
Expand Down
2 changes: 1 addition & 1 deletion superset/utils/pandas_postprocessing/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


@validate_column_args("index", "columns")
def pivot( # pylint: disable=too-many-arguments,too-many-locals
def pivot( # pylint: disable=too-many-arguments
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bycatch: a warning that pylint picked up but wasn't regarded as an error

df: DataFrame,
index: List[str],
aggregates: Dict[str, Dict[str, Any]],
Expand Down