-
Notifications
You must be signed in to change notification settings - Fork 14.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(embedded+async queries): support async queries to work with embedded guest user #26332
Changes from all commits
e76a4fa
48ed6c9
5743dea
6891185
1df0373
59d8703
c681637
2a6379b
65fc1f1
9e6227e
822c537
9b3c559
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -600,7 +600,15 @@ def get_payload( | |
set_and_log_cache( | ||
cache_manager.cache, | ||
cache_key, | ||
{"data": self._query_context.cache_values}, | ||
{ | ||
"data": { | ||
# setting form_data into query context cache value as well | ||
# so that it can be used to reconstruct form_data field | ||
# for query context object when reading from cache | ||
"form_data": self._query_context.form_data, | ||
Comment on lines
+605
to
+608
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have concerns here.. We've discussed this at length previously, and I feel we should always consider |
||
**self._query_context.cache_values, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @villebro do you have any thoughts on this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zephyring from where does the raise_for_access get called when it is passing in the query context from cache? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first call to |
||
}, | ||
}, | ||
self.get_cache_timeout(), | ||
) | ||
return_value["cache_key"] = cache_key # type: ignore | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
|
||
from celery.exceptions import SoftTimeLimitExceeded | ||
from flask import current_app, g | ||
from flask_appbuilder.security.sqla.models import User | ||
from marshmallow import ValidationError | ||
|
||
from superset.charts.schemas import ChartDataQueryContextSchema | ||
|
@@ -58,6 +59,20 @@ | |
raise error | ||
|
||
|
||
def _load_user_from_job_metadata(job_metadata: dict[str, Any]) -> User: | ||
if user_id := job_metadata.get("user_id"): | ||
# logged in user | ||
user = security_manager.get_user_by_id(user_id) | ||
elif guest_token := job_metadata.get("guest_token"): | ||
# embedded guest user | ||
user = security_manager.get_guest_user_from_token(guest_token) | ||
del job_metadata["guest_token"] | ||
else: | ||
# default to anonymous user if no user is found | ||
user = security_manager.get_anonymous_user() | ||
return user | ||
|
||
|
||
@celery_app.task(name="load_chart_data_into_cache", soft_time_limit=query_timeout) | ||
def load_chart_data_into_cache( | ||
job_metadata: dict[str, Any], | ||
|
@@ -66,12 +81,7 @@ | |
# pylint: disable=import-outside-toplevel | ||
from superset.commands.chart.data.get_data_command import ChartDataCommand | ||
|
||
user = ( | ||
security_manager.get_user_by_id(job_metadata.get("user_id")) | ||
or security_manager.get_anonymous_user() | ||
) | ||
|
||
with override_user(user, force=False): | ||
with override_user(_load_user_from_job_metadata(job_metadata), force=False): | ||
try: | ||
set_form_data(form_data) | ||
query_context = _create_query_context_from_form(form_data) | ||
|
@@ -106,12 +116,7 @@ | |
) -> None: | ||
cache_key_prefix = "ejr-" # ejr: explore_json request | ||
|
||
user = ( | ||
security_manager.get_user_by_id(job_metadata.get("user_id")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if user_id is None, get_user_by_id() will crash the celery task |
||
or security_manager.get_anonymous_user() | ||
) | ||
|
||
with override_user(user, force=False): | ||
with override_user(_load_user_from_job_metadata(job_metadata), force=False): | ||
try: | ||
set_form_data(form_data) | ||
datasource_id, datasource_type = get_datasource_info(None, None, form_data) | ||
|
@@ -140,7 +145,13 @@ | |
"response_type": response_type, | ||
} | ||
cache_key = generate_cache_key(cache_value, cache_key_prefix) | ||
set_and_log_cache(cache_manager.cache, cache_key, cache_value) | ||
cache_instance = cache_manager.cache | ||
cache_timeout = ( | ||
cache_instance.cache.default_timeout if cache_instance.cache else None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. using cache_instance default_timeout if found. Otherwise it will fallback to config["CACHE_DEFAULT_TIMEOUT"] |
||
) | ||
set_and_log_cache( | ||
cache_instance, cache_key, cache_value, cache_timeout=cache_timeout | ||
) | ||
result_url = f"/superset/explore_json/data/{cache_key}" | ||
async_query_manager.update_job( | ||
job_metadata, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As we have the same logic in two places, could we DRY this out into a simple local util
_get_job_metadata(job_metadata)
to avoid divergence down the line?